conditionally close file on exit from function - python

I have a (recursive) function which I would like to accept either a string or an opened file object. If the argument is a string, then the function opens a file and uses that file object. It seems best to close this opened file object explicitly when I return from the function, but only if a string was passed in. (Imagine the surprise from the user when they pass in an opened file object and find that their file object was closed somewhere). Here's what I'm currently using:
def read_file(f, param):
do_close = isinstance(f,basestring)
f = open(f, 'rb') if do_close else f
try:
info = f.read(4)
#check info here
if info == Info_I_Want(param):
return f.read(get_data(info))
else:
f.seek(goto_new_position(info))
return read_file(f,param)
except IKnowThisError:
return None
finally:
if do_close:
f.close()
You can assume that IKnowThisError will be raised at some point if I don't find the info I want.
This feels very kludgy. Is there a better way?

Why not wrapping your recursive function with a wrapper to avoid overhead ?
def read_file(f, param):
if isinstance(f, basestring):
with open(f, 'rb') as real_f:
return read_file2(real_f, param)
else:
return read_file2(real_f, param)
def read_file2(f, param):
# Now f should be a file object
...

How about calling your function recursively?
def read_file(f, param):
if isinstance(f, basestring):
with open(f, 'rb') as real_f:
return read_file(real_f, param)
else:
# normal path

The upcoming Python 3.3 offers a more general solution for this kind of problem, namely contextlib.ExitStack. This allow to conditionally add context managers to the current with-block:
def read_file(f, param):
with ExitStack() as stack:
if isinstance(f, basestring):
f = stack.enter_context(open(f, 'rb'))
# Your code here

Related

Decorator factory parameter is a static value and not a variable's value

So I have
# my decorator factory
def execute_in(directory): # <-- I want this to be a variable's value which can change
def decorator(function):
def wrapper(*args, **kwargs):
os.chdir(directory)
print(directory) # currently is printing None which is my problem
value = function(*args, **kwargs)
os.chdir(home_dir)
return value
return wrapper
return decorator
and
# a function that runs after assigning General.archive_dir a value
#execute_in(General.archive_dir)
def get_data():
print(General.archive_dir) # will print the correct directory name
with open('data.csv', 'r') as f:
rows = [row for row in csv.reader(f, delimiter=',')]
return rows
My problem is that the decorator factory is using the value of the variable General.archive_dir instantiated at program start when its value is None. I want it to use the value of General.archive_dir at the time the decorated function is called. How can I do this?
I apologize if this question is unclear. If you can, please let me know how I can clarify it if needed.
One solution is calling #execute_in with a lambda.
directory inside wrapper would become a function that, when called, returns the current value.
archive_dir = None
# decorator factory
def execute_in(directory_path_getter):
def decorator(function):
def wrapper(*args, **kwargs):
print('from wrapper:', directory_path_getter()) # Notice the function call
value = function(*args, **kwargs)
return value
return wrapper
return decorator
#execute_in(lambda: archive_dir)
def get_data():
...
archive_dir = 'some directory'
print(get_data())
Prints:
from wrapper: some directory
from get_data: some directory
['some data']
If a decorator isn't strictly required, a context manager can also fulfill the task of temporarily changing directories.
import os
from contextlib import contextmanager
#contextmanager
def execute_in(directory):
orig_dir = os.getcwd()
os.chdir(directory)
try:
yield
finally:
os.chdir(orig_dir)
Using a context manager would allow for changing directories many times in one method, and can be nested.
settings = {
'archive_dir': './dir'
}
def get_data():
print(os.getcwd())
with execute_in(settings['archive_dir']):
print(' ' + os.getcwd())
with execute_in('bin'):
print(' ' + os.getcwd())
print(' ' + os.getcwd())
print(os.getcwd())
And when we run it
>>> get_data()
/home/they4kman/.PyCharm2019.2/config/scratches
/home/they4kman/.PyCharm2019.2/config/scratches/dir
/home/they4kman/.PyCharm2019.2/config/scratches/dir/bin
/home/they4kman/.PyCharm2019.2/config/scratches/dir
/home/they4kman/.PyCharm2019.2/config/scratches

python - remain in base class when calling parent method from child

I have the following base class:
class ClientRepo(Repository):
def __init__(self) -> None:
self.__clientList = []
def hasClientWithId(self, clientId):
for client in self.__clientList:
if client.getId() == clientId:
return True
return False
def addClient(self, client):
if type(client).__name__ == 'ClientDAO':
if not self.hasClientWithId(client.getId()):
client.setClientId(self.__maximumIndexInClientList() + 1)
self.__clientList.append(client)
else:
raise ObjectAlreadyInCollectionException
else:
raise TypeError
which basically only holds a list and can add a ClientDAO to it.
And the following, which derives from it:
class ClientFileRepository(ClientRepo):
def __init__(self, fileName) -> None:
super().__init__()
self.__fileName = fileName
self.__file = None
def hasClientWithId(self, clientId):
self.__loadRepo()
hasClientWithId = super().hasClientWithId(clientId)
super().clean()
return hasClientWithId
def addClient(self, client):
self.__loadRepo()
super().addClient(client)
self.__storeRepo()
super().clean()
def __loadFileReadMode(self):
self.__file = open(self.__fileName, "r")
def __loadFileWriteMode(self):
self.__file = open(self.__fileName, "w")
def __closeFile(self):
self.__file.close()
def __loadRepo(self):
self.__loadFileReadMode()
for line in self.__file:
splitLine = line.split()
clientToAdd = ClientDAO(splitLine[1])
clientToAdd.setClientId(int(splitLine[0]))
super().addClientWithId(clientToAdd)
self.__closeFile()
def __storeRepo(self):
self.__loadFileWriteMode()
self.__file.write("")
for client in super().getList():
self.__file.write(self.clientToString(client))
self.__closeFile()
def clientToString(self, clientDAO):
return str(clientDAO.getId()) + " " + clientDAO.getName() + "\n"
a class which should load the list from a file, call addClient from parent, and store the updated list in the file. The problem is that after child class loads the file in addClient, it calls the method in the parent, which calls hasClientWithId, from the child, again. But I want it to call hasClientWithId, from the parent, that is, the context it is in. Can I achieve that?
I can think of several ways to achieve your goal. I ranked them from worst to best
1. Exactly what you asked for
You wanted that ClientRepo.addClient calls ClientRepo.hasClientWithId instead of ClientFileRepository.hasClientWithId. It is possible to enforce that:
class ClientRepo(Repository):
def addClient(self, client):
if type(client).__name__ == 'ClientDAO':
if not ClientRepo.hasClientWithId(self, client.getId()):
client.setClientId(self.__maximumIndexInClientList() + 1)
self.__clientList.append(client)
else:
raise ObjectAlreadyInCollectionException
else:
raise TypeError
This is not a good approach, because it's unintuitive and breaks the principles of OOP. Any other programmer writing a subclass of ClientRepo that overrides hasClientWithId would expect that this will have an effect for every call to hasClientWithId even inside of addClient
2. Let ClientFileRepository decide which function to use
Add a variable
self.__isFileOpen = False
in ClientFileRepository.__init__, set it to True when you open the file and to False when you close the file. Then change the hasClientWithId within ClientFileRepository to
def hasClientWithId(self, clientId):
if not self.__isFileOpen:
self.__loadRepo()
result = super().hasClientWithId(clientId)
super().clean()
return result
else:
return super().hasClientWithId(clientId)
to avoid opening the same file again. This works, but it is pretty difficult to write new functions for this class, because you always need to be aware if the function call is a call from within your class or from somewhere else. Also this seems pretty inefficient, because you read and write the entire file, even when you only add one client.
3. Read the file only once and modify the underlying ClientRepo
class ClientFileRepository(ClientRepo):
def __init__(self, fileName) -> None:
super().__init__()
self.__fileName = fileName
self.__loadRepo()
# No hasClientWithId needed
def addClient(self, client):
super().addClient(client)
self.__storeRepo()
def __loadRepo(self):
with open(self.__filename) as file:
for line in file:
splitLine = line.split()
clientToAdd = ClientDAO(splitLine[1])
clientToAdd.setClientId(int(splitLine[0]))
super().addClientWithId(clientToAdd)
def __storeRepo(self):
with open(self.__filename, "w") as file:
file.write("")
for client in super().getList():
file.write(self.clientToString(client))
This obviously assumes that the file is not changed by someone else between calls to addClient and the program still overwrites the entire file for every addClient. If this is a problem for you it is best to be explicit and make loadRepo and storeRepo public. Then the programmer using this class can decide when loading and saving are necessary and useful. You can use context managers for this.
Extra: Read and save the file for every method
You can use function decorators to use solution 2 without writing the same code for every function:
import functools
def loadAndStore(function):
#functoools.wraps(function)
def wrappedFunction(self, *args, **kwargs):
if self.__isFileOpen:
return function(self, *args, **kwargs)
else:
self.__isFileOpen = True
self.__loadRepo()
try:
return function(self, *args, **kwargs)
except Exception as e: # Only catch expected exceptions
raise
finally:
self.__storeRepo()
self.clear() # some cleanup
self.__isFileOpen = False
return wrappedFunction
class ClientFileRepository(ClientRepo):
def __init__(self, fileName) -> None:
super().__init__()
self.__fileName = fileName
self.__isFileOpen = False
#loadAndStore
def hasClientWithId(self, clientId):
return super().hasClientWithId(clientId)
#loadAndStore
def addClient(self, client):
super().addClient(client)
def __loadRepo(self):
with open(self.__filename) as file:
for line in file:
splitLine = line.split()
clientToAdd = ClientDAO(splitLine[1])
clientToAdd.setClientId(int(splitLine[0]))
super().addClientWithId(clientToAdd)
def __storeRepo(self):
with open(self.__filename, "w") as file:
file.write("")
for client in super().getList():
file.write(self.clientToString(client))
Be careful here, using this is not very intuitive. For example self.__isFileOpen is defined in __init__, but none of the methods below directly use it. Instead its use is hidden in the loadAndStore decorator.
Some quick hints at the end:
type(client).__name__ == 'ClientDAO' is bad practice. Use isinstance(client, ClientDAO) to fully adopt OOP
If this is not part of a bigger project with given naming conventions use the python style guide
Using private variables like __fileName is generally considered unnecessary, just prefix the variable with one underscore to indicate "internal use". The same is true for functions.

Custom generator for file filter

I'm writing a small wrapper class around open that will filter out particular lines from a text file and then split them into name/value pairs before passing them back to the user. Naturally, this process lends itself to being implemented using generators.
My "file" class
class special_file:
def __init__(self, fname):
self.fname = fname
def __iter__(self):
return self
def __next__(self):
return self.next()
def next(self):
with open(self.fname, 'r') as file:
for line in file:
line = line.strip()
if line == '':
continue
name,value = line.split()[0:2]
if '%' in name:
continue
yield name,value
raise StopIteration()
Userland code
for g in special_file('input.txt'):
for n,v in g:
print(n,v)
My code, sadly, has two enormous problems: 1) special_file returns a generator when it really needs to return a tuple, and 2) the StopIteration() exception is never raised so the file is read repeatedly ad infinitum. I have a sneaking suspicion that these two issues are related, but my understanding of generators and iterable sequences is fairly limited. Have I missed something painfully obvious about implementing a generator?
Edit:
I fixed my infinite reading problem by moving the first generator outside of the loop and then just looping over it.
g = special_file('input.txt')
k = next(g)
for n,v in k:
print(n,v)
However, I would like the user to be able to use it like a normal call to open:
for n,v in special_file('input.txt'):
print(n,v)
You've implemented an iterator, in terms of using a generator. Just write the generator directly.
def special_file(filename):
with open(filename, 'r') as file:
for line in file:
line = line.strip()
if line == '':
continue
name, value, *_ = line.split()
if '%' in name:
continue
yield name, value
See here for an overview of what it means to be iterable, what an iterator is, and python's protocols for using them.
Just change
def __iter__(self):
return self
to
def __iter__(self):
return next(self)
and it works as expected!
Thanks to #Leva7 for the suggestion.

Fabrica for different parsers

Problem:
Write a class Fabrica what give: way to file and/or format file,
return: data from this file in dict.
Write a abstract class Reader what have just one method "reader"
without implementation.
Write 3 classes CSVReader, XMLReader, JSONReader. They
inherited from Reader, must implementation method "reader" with
functionality for parse csv, json, xml. Must return data in
dict format to fabrica.
So i have next problem. I don`t understand how to correctly write this all classes. I wrote solution, but have error (code below).
My question is: how to correctly write this all classes?
And recommend me a some good book about OOP please)
class FactoryRader:
def __init__(self, fileName,frmt=None):
self.frmt = frmt
self.fileName = fileName
def __str__(self):
return Reader.openFile(self.fileName, self.frmt)
class Reader:
def openFile(fileName, frmt):
try:
with open(fileName, 'rU') as data:
if fileName.endswith('.csv') or frmt == 'csv':
return CSVReader.reader(data)
if fileName.endswith('.xml') or frmt == 'xml':
return XMLReader()
if fileName.endswith('.js') or (frmt == 'json' or frmt == 'js'):
return JSONReader()
else:
return 'Incorrect File!'
except IOError:
print('Cant open')
def reader(data):
pass
class CSVReader(Reader):
def reader(data):
dialect = csv.Sniffer().sniff(data.readline(), [',',';'])
data.seek(0)
reader = csv.DictReader(data, dialect=dialect)
for row in reader:
print (row)
class JSONReader(Reader):
def reader(data):
pass
class XMLReader(Reader):
def reader(data):
pass
if __name__ == '__main__':
data = FactoryRader('CsvExamples/price.csv')
print(data)
Error
Traceback (most recent call last):
File "ClassParsers.py", line 62, in <module>
print(data)
TypeError: __str__ returned non-string (type NoneType)
These is by no means a complete solution; it's just a bunch of remarks.
As I understand, the try/except block in Reader should go in FactoryReader. There is no such thing as abstract class in Python, so your class Reader could be empty. Or, if you prefer, just
class Reader:
def reader(data):
pass
(if you are using Python2, it's better to use new style classes: class Reader(object)).
You are asked for FactoryReader to return a dict, not a string, so the __str__ function is not important. The Error you get is telling you that the __str__ method in FactoryReader should return a string. It would be better to not implement __str__ and use another name for that function; say get_reader. And then, you should return the data in a dict way. So it would be
def get_reader(self):
if self.fileName.endswith('.csv') or self.frmt == 'csv':
return CSVReader(self.filename)
etc
Then,
class CSVReader(Reader):
def reader(filename):
code to open filename, read it and parse it
code to convert parsed code into dict
return dict
Similarly for JSONReader and XMLReader.

Using one class from another class

I wrote a simple program to read through a log and to parse through and obtain the lowest beginning number (the head) and to print it. I am now editing that program and combining it with a class I wrote to parse an actual logfile. Essentially, as opposed to sorting based off of the simple number from the log from my previous program, I now need to reference the parsed information from one class into another class. I was wondering what the most convenient way to do this. I am a beginner programmer in python and don't know if I can explicitly reference the class.
Here are the classes.
Parser
class LogLine:
SEVERITIES = ['EMERG','ALERT','CRIT','ERR','WARNING','NOTICE','INFO','DEBUG']
severity = 1
def __init__(self, line):
try:
m = re.match(r"^(\d{4}-\d{2}-\d{2}\s*\d{2}:\d{2}:\d{2}),?(\d{3}),?(\s+\[(?:[^\]]+)\])+\s+[A-Z]+\s+(\s?[a-zA-Z0-9\.])+\s?(\((?:\s?\w)+\))\s?(\s?.)+", line)
timestr, msstr, sevstr, self.filename, linestr, self.message = m.groups()
self.line = int(linestr)
self.sev = self.SEVERITIES.index(sevstr)
self.time = float(calendar.timegm(time.strptime(timestr, "%Y-%m-%d %H:%M:%S,%f"))) + float(msstr)/1000.0
dt = datetime.strptime(t, "%Y-%m-%d %H:%M:%S,%f")
except Exception:
print 'error',self.filename
def get_time(self):
return self.time
def get_severity(self):
return self.sev
def get_message(self):
return self.message
def get_filename(self):
return self.filename
def get_line(self):
return self.line
Sorter
class LogFile:
def __init__(self,filepath):
self.logfile = open(filepath, "r")
self.head = None
def __str__(self):
return "x=" + str(self.x) + "y="+str(self.y)
def readline(self):
if self.head != None:
h = self.head
self.head = None
return h
else:
return self.logfile.readline().rstrip(' ')
def get_line(self):
if self.head == None:
self.head = self.readline().rstrip(' ')
return self.head.get.line()
else:
return self.head.get.line()
def close (self):
self.logfile.close()
I have begun to edit my second class by adding the get_line function. Don't know if I'm on the right track.
In simpler terms, I need the head to become "LogLine"
It is okay to use one class from another class. You have one class that parses a single line from a log file and builds an object that represents the line; and you have another class that reads lines from a log file. It would be very natural for the second class to call the first class.
Here is a very simple class that reads all lines from a log file and builds a list:
class LogFile(object):
def __init__(self,filepath):
with open(filepath, "r") as f:
self.lst = [LogLine(line) for line in f]
You can see that self.lst is being set to a list of lines from the input log file, but not just the text of the line; the code is calling LogLine(line) to store instances of LogLine. If you want, you can sort the list after you build it:
self.lst.sort(key=LogLine.get_line)
If the log files are very large, it might not be practical to build the list. You have a .get_line() method function, and we can use that:
class LogFile(object):
def __init__(self,filepath):
self.logfile = open(filepath, "r")
def get_line(self):
try:
line = next(self.logfile) # get next line from open file object
return LogLine(line)
except StopIteration: # next() raises this when you reach the end of the file
return None # return
def close(self):
self.logfile.close()
An open file object (returned by the open() function) can be iterated. We can call next() on this object and it will give us the next input line. When the end of file is reached, Python will raise StopIteration to signal the end of the file.
Here the code will catch the StopIteration exception and return None when the end of the log file is reached. But I think this isn't the best way to handle this problem. Let's make the LogFile class work in for loops and such:
class LogFile(object):
def __init__(self,filepath):
self.f = open(filepath)
def __next__(self): # Python 3.x needs this to be named "__next__"
try:
line = next(self.f)
return LogLine(line)
except StopIteration:
# when we reach the end of input, close the file object
self.f.close()
# re-raise the exception
raise
next = __next__ # Python 2.x needs this to be named "next"
A for loop in Python will repeatedly call the .__next__() method function (Python 3.x) or else the .next() method function (Python 2.x) until the StopIteration exception is raised. Here we have defined both method function names so this code should work in Python 2.x or in Python 3.x.
Now you can do this:
for ll in LogFile("some_log_file"):
... # do something with ll, which will always be a LogLine instance

Categories