Simple questions for those who know, pretty hard for me, as I think that it is not pratically possible.
After making a simple python program, it is possible to run it on the computer's command prompt.
I was wondering or it is possible to allow someone who runs it that way, to add an element to a list (list.insert) and have it still there the next time the program is run?(thus editting the predefined list to a new list and saving it that way)
EDIT: just giving a bit more information:
All the program has to do is allow you to choose a list. From this list it returns a random item.
I was just hoping to allow to add items to this list while running the program, keeping the list updated afterwards.
The most basic way is to use the Pickle module to save and load your data to disk:
http://docs.python.org/2/library/pickle.html
http://docs.python.org/2/library/pickle.html#example
Here's how I would use it in a simple program
try:
import cPickle as pickle
except ImportError:
import pickle
class MyClass(object):
def __init__(self, file_name):
self.array = []
self.file_name = file_name
self.load_data()
def add_element(self, element):
self.array.append(element)
self.save_data()
def load_data(self):
try:
with open(self.file_name, "r") as f:
self.array = pickle.load(f)
except IOError:
pass
def save_data(self):
with open(self.file_name, "w") as f:
pickle.dump(self.array, f)
def main():
FILE_NAME = "test.pkl"
a = MyClass(FILE_NAME)
print "elements in array are", a.array
for i in range(5):
a.add_element(i)
if __name__ == "__main__":
main()
Related
I realize I likely need to use dynamic requirements to accomplish the following task, however I have not been able to wrap my head around what this would look like in practice.
The goal is to use Luigi to generate data and add it to a database, without knowing ahead of time what data will be generated.
Take the following example using mongodb:
import luigi
from uuid import uuid4
from luigi.contrib import mongodb
import pymongo
# Make up IDs, though in practice the IDs may be generated from an API
class MakeID(luigi.Task):
def run(self):
with self.output().open('w') as f:
f.write(','.join([str(uuid4()) for e in range(10)]))
# Write the data to file
def output(self):
return luigi.LocalTarget('data.csv')
class ToDataBase(luigi.Task):
def requires(self):
return MakeID()
def run(self):
with self.input().open('r') as f:
ids = f.read().split(',')
# Add some fake data to simulate generating new data
count_data = {key: value for value, key in enumerate(ids)}
# Add data to the database
self.output().write(count_data)
def output(self):
# Attempt to read non-existent file to get the IDs to check if task is complete
with self.input().open('r') as f:
valid_ids = f.read().split(',')
client = pymongo.MongoClient('localhost',
27017,
ssl=False)
return mongodb.MongoRangeTarget(client,
'myDB',
'myData',
valid_ids,
'myField')
if __name__ == '__main__':
luigi.run()
The goal is to obtain data, modify it and then add it to a database.
The above code fails when run because the output method of ToDataBase runs before the require method so the while the function has access to the input, the input does not yet exist. Regardless I still need to check to be sure that the data was added to the database.
This github issue is close to what I am looking for, though as I mentioned I have not been able to figure out dynamic requirements for this use case in practice.
The solution is to create a third task (in the example Dynamic) that yields the task that is waiting on dynamic input and making the dependency a parameter rather than a requires method.
class ToDatabase(luigi.Task):
fp = luigi.Parameter()
def output(self):
with open(self.fp, 'r') as f:
valid_ids = [str(e) for e in f.read().split(',')]
client = pymongo.MongoClient('localhost', 27017, ssl=False)
return mongodb.MongoRangeTarget(client, 'myDB', 'myData',
valid_ids, 'myField')
def run(self):
with open(self.fp, 'r') as f:
valid_ids = [str(e) for e in f.read().split(',')]
self.output().write({k: 5 for k in valid_ids})
class Dynamic(luigi.Task):
def output(self):
return self.input()
def requires(self):
return MakeIDs()
def run(self):
yield(AddToDatabase(fp=self.input().path))
import praw
import time
class getPms():
r = praw.Reddit(user_agent="Test Bot By /u/TheC4T")
r.login(username='*************', password='***************')
cache = []
inboxMessage = []
file = 'cache.txt'
def __init__(self):
cache = self.cacheRead(self, self.file)
self.bot_run(self)
self.cacheSave(self, self.file)
time.sleep(5)
return self.inboxMessage
def getPms(self):
def bot_run():
inbox = self.r.get_inbox(limit=25)
print(self.cache)
# print(r.get_friends())#this works
for message in inbox:
if message.id not in self.cache:
# print(message.id)
print(message.body)
# print(message.subject)
self.cache.append(message.id)
self.inboxMessage.append(message.body)
# else:
# print("no messages")
def cacheSave(self, file):
with open(file, 'w') as f:
for s in self.cache:
f.write(s + '\n')
def cacheRead(self, file):
with open(file, 'r') as f:
cache1 = [line.rstrip('\n') for line in f]
return cache1
# while True: #threading is needed in order to run this as a loop. Probably gonna do this in the main method though
# def getInbox(self):
# return self.inboxMessage
The exception is:
cache = self.cacheRead(self, self.file)
AttributeError: 'getPms' object has no attribute 'cacheRead'
I am new to working with classes in python and need help with what I am doing wrong with this if you need any more information I can add some. It worked when it was all functions but now that I attempted to switch it to a class it has stopped working.
Your cacheRead function (as well as bot_run and cacheSave) is indented too far, so it's defined in the body of your other function getPms. Thus it is only accessible inside of getPms. But you're trying to call it from __init__.
I'm not sure what you're trying to achieve here because getPms doesn't have anything else in it but three function definitions. As far as I can tell you should just take out the def getPms line and unindent the three functions it contains so they line up with the __init__ method.
Here are few points:
Unless you're explicitly inheriting from some specific class, you can omit parenthesis:
class A(object):, class A():, class A: are equivalent.
Your class name and class method have the same name. I'm not sure does Python confuse about this or not, but you probably do. You can name your class PMS and your method get, for example, so you'll obtain PMS.get(...)
In the present version of indentation cacheRead and cacheSave functions are simply inaccessible from init; why not move them to generic class namespace?
When calling member functions, you don't need to specify self as the first argument since you're already calling the function from this object. So instead of cache = self.cacheRead(self, self.file) you have to do it like this: cache = self.cacheRead(self.file)
I have #1 .py script which is a GTK GUI application and I have a second script which needs a string from the first script that can be gained by gtk.Entry().get_text()
The problem is that I dont know how to use that function/command outside of #1 script
Lets say #1 script is called test.py and inside there is:
def __init__(self):
#some code
#some code
self.TextBox = gtk.Entry()
self.TextBox .connect("key-press-event", self.keyEnter)
#some code
#some code
#some code
def keyEnter(self, widget, ev):
if ev.keyval == 65293 and not self.TextBox.get_text() == "":
self.TextBox1.grab_focus()
self.TextBox.set_editable(False)`
And #2 script is called test2.py and inside contains:
Meta = self.client.get_file_and_metadata(#here it needs to go self.Textbox.get_text())
I couldn't access the gtk.Entry() from other script I wrote since it is run as a separate process (i don't have that knowledge to manipulate processes yet) i did this:
def keyEnter(self, widget, ev):
if ev.keyval == 65293 and not self.TextBox.get_text() == "":
self.TextBox1.grab_focus()
self.TextBox.set_editable(False)
file = open('file.txt', 'w+')
file.write(self.TextBox.get_text())
file.close()
i changed
Meta = self.client.get_file_and_metadata(#here it needs to go self.Textbox.get_text())
that was supposed to download the file with the name written in self.Textbox
i changed it to
Meta = self.client.get_file_and_metadata(getpass.getuser())
and finally the string from self.Textbox i got through uploading
with open("file.txt", "r") as chat:
data=chat.read().splitlines(True) #split lines in the list
d = str(data[:1]) #take only first line from the list
dat=str(d).strip("[]") #remove the brackets "[]" that remained when string was extracted from the list
with open("file.txt", "w") as chat1:
chat1.writelines(data[1:]) # writes all except the first line
chat.close()
chat1.close()
self.f = open('file.txt', 'rb')
self.response = self.client.put_file(str(dat[:-2]), self.f) #str(dat[:-2]) -> i wanted to remove the newline char "\n"
I dont know if anyone will find this helpful but this solved the problem at my side :))
I have a program depending on a large code base that prints a lot of irrelevant and annoying messages. I would like to clean them up a bit, but since their content is dynamically generated, I can't just grep for them.
Is there a way to place a hook on the print statement? (I use python 2.4, but I would be interested in results for any version). Is there another way to find from which "print" statement the output comes?
For CPython2.5 or older:
import sys
import inspect
import collections
_stdout = sys.stdout
Record = collections.namedtuple(
'Record',
'frame filename line_number function_name lines index')
class MyStream(object):
def __init__(self, target):
self.target = target
def write(self, text):
if text.strip():
record = Record(*inspect.getouterframes(inspect.currentframe())[1])
self.target.write(
'{f} {n}: '.format(f = record.filename, n = record.line_number))
self.target.write(text)
sys.stdout = MyStream(sys.stdout)
def foo():
print('Hi')
foo()
yields
/home/unutbu/pybin/test.py 20: Hi
For CPython2.6+ we can import the print function with
from __future__ import print_function
and then redirect it as we wish:
from __future__ import print_function
import sys
import inspect
import collections
Record = collections.namedtuple(
'Record',
'frame filename line_number function_name lines index')
def myprint(text):
if text.strip():
record = Record(*inspect.getouterframes(inspect.currentframe())[1])
sys.stdout.write('{f} {n}: '.format(f = record.filename, n = record.line_number))
sys.stdout.write(text + '\n')
def foo():
print('Hi')
print = myprint
foo()
Note that inspect.currentframe uses sys._getframe which is not part of all implementations of Python. So the solution above may only work for CPython.
Strictly speaking, code base that you depend on, as in libraries, shouldn't contain any print statements. So, you should really just remove all of them.
Other than that, you can monkey-patch stdout: Adding a datetime stamp to Python print
a very gross hack to make this work:
use your favorite text editor, use your search/find feature.
find all the print statements.
and input into each of them a number, or identifier manually. (or automatically if you do this what a script)
a script to do this would be simple, just have it look for for print with regex, and replace it with print ID, and then it will all be the same, but you will get numbers.
cheers.
edit
barring any strange formatting, the following code should do it for you.
note, this is just an example of a way you could do it. not really an answer.
import re
class inc():
def __init__(self):
self.x = 0
def get(self):
self.x += 1
return self.x
def replacer(filename_in, filename_out):
i = inc()
out = open(filename_out, 'w')
with open(filename_in) as f:
for line in f:
out.write("%s\n" % re.sub(r'print', 'print %d,' % i.get(), line))
i used an basic incrementer class in case you wanted to had some kind of more complex ID, instead of just having a counter.
In harsh circumstances (output done in some weird binary libraries) you could also use strace -e write (and more options). If you do not read strace's output, the straced program waits until you do, so you can send it a signal and see where it dies.
Here is a trick that Jeeeyul came up with for Java: Replace the output stream (i.e. sys.out) with something that notices when a line feed has been written.
If this flag is true, throw an exception when the next byte is being written. Catch the exception in the same place, walk up the stack trace until you find code that doesn't belong to your "debug stream writer".
Pseudocode:
class DebugPrintln:
def __init__(self):
self.wasLF = False
def write(self, x):
if self.wasLF:
self.wasLF = False
frames = traceback.extract_stack()
... find calling code and output it ...
if x == '\n':
self.wasLF = true
super.write(x)
Consider this scenario:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
walk = os.walk('/home')
for root, dirs, files in walk:
for pathname in dirs+files:
print os.path.join(root, pathname)
for root, dirs, files in walk:
for pathname in dirs+files:
print os.path.join(root, pathname)
I know that this example is kinda redundant, but you should consider that we need to use the same walk data more than once. I've a benchmark scenario and the use of same walk data is mandatory to get helpful results.
I've tried walk2 = walk to clone and use in the second iteration, but it didn't work. The question is... How can I copy it? Is it ever possible?
Thank you in advance.
You can use itertools.tee():
walk, walk2 = itertools.tee(walk)
Note that this might "need significant extra storage", as the documentation points out.
If you know you are going to iterate through the whole generator for every usage, you will probably get the best performance by unrolling the generator to a list and using the list multiple times.
walk = list(os.walk('/home'))
Define a function
def walk_home():
for r in os.walk('/home'):
yield r
Or even this
def walk_home():
return os.walk('/home')
Both are used like this:
for root, dirs, files in walk_home():
for pathname in dirs+files:
print os.path.join(root, pathname)
This is a good usecase for functools.partial()
to make a quick generator-factory:
from functools import partial
import os
walk_factory = partial(os.walk, '/home')
walk1, walk2, walk3 = walk_factory(), walk_factory(), walk_factory()
What functools.partial() does is hard to describe with human-words, but this^ is what it's for.
It partially fills out function-params without executing that function. Consequently it acts as a function/generator factory.
This answer aims to extend/elaborate on what the other answers have expressed. The solution will necessarily vary depending on what exactly you aim to achieve.
If you want to iterate over the exact same result of os.walk multiple times, you will need to initialize a list from the os.walk iterable's items (i.e. walk = list(os.walk(path))).
If you must guarantee the data remains the same, that is probably your only option. However, there are several scenarios in which this is not possible or desirable.
It will not be possible to list() an iterable if the output is of sufficient size (i.e. attempting to list() an entire filesystem may freeze your computer).
It is not desirable to list() an iterable if you wish to acquire "fresh" data prior to each use.
In the event that list() is not suitable, you will need to run your generator on demand. Note that generators are extinguised after each use, so this poses a slight problem. In order to "rerun" your generator multiple times, you can use the following pattern:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
class WalkMaker:
def __init__(self, path):
self.path = path
def __iter__(self):
for root, dirs, files in os.walk(self.path):
for pathname in dirs + files:
yield os.path.join(root, pathname)
walk = WalkMaker('/home')
for path in walk:
pass
# do something...
for path in walk:
pass
The aforementioned design pattern will allow you to keep your code DRY.
This "Python Generator Listeners" code allows you to have many listeners on a single generator, like os.walk, and even have someone "chime in" later.
def walkme():
os.walk('/home')
m1 = Muxer(walkme)
m2 = Muxer(walkme)
then m1 and m2 can run in threads even and process at their leisure.
See: https://gist.github.com/earonesty/cafa4626a2def6766acf5098331157b3
import queue
from threading import Lock
from collections import namedtuple
class Muxer():
Entry = namedtuple('Entry', 'genref listeners, lock')
already = {}
top_lock = Lock()
def __init__(self, func, restart=False):
self.restart = restart
self.func = func
self.queue = queue.Queue()
with self.top_lock:
if func not in self.already:
self.already[func] = self.Entry([func()], [], Lock())
ent = self.already[func]
self.genref = ent.genref
self.lock = ent.lock
self.listeners = ent.listeners
self.listeners.append(self)
def __iter__(self):
return self
def __next__(self):
try:
e = self.queue.get_nowait()
except queue.Empty:
with self.lock:
try:
e = self.queue.get_nowait()
except queue.Empty:
try:
e = next(self.genref[0])
for other in self.listeners:
if not other is self:
other.queue.put(e)
except StopIteration:
if self.restart:
self.genref[0] = self.func()
raise
return e
def __del__(self):
with self.top_lock:
try:
self.listeners.remove(self)
except ValueError:
pass
if not self.listeners and self.func in self.already:
del self.already[self.func]