I have csv file having contents below
101,item_1
101,item_1
if it is csv my below code will execute
import csv
fName = input()
def read_csv(fName):
try:
with open(fName, 'r') as f:
reader = csv.reader(f)
for row in reader:
print (row)
read_csv(fName)
Here how to write the exception in decorator function and call on the top of that.
first decorator
if fName not endswith .txt or .csv then it has to generate output not accept
Second decorator
if fName = file.txt text file then below operations has to taken care
def read_txt(fName):
f = open(fName, "r")
print(f.readline())
if csv then first function to execute and if txt next function to execute. How to achieve using decorator. I can put if conditon to achieve the situation, but that is not the case
My whole code without decorator is below
fName = input()
def read_csv(fName):
if fName.endswith('.csv'):
#print ('hi')
try:
with open(fName, 'r') as f:
reader = csv.reader(f)
for row in reader:
print (row)
except IOError:
print ("Could not read file:", fName)
#SECOND DECORATOR
if fName.endswith('.txt'):
f = open(fName, "r")
print(f.readline())
#FIRST DECORATOR
if not(fName.endswith('.csv')) and not(fName.endswith('.txt')):
print ('not accept')
read_csv(fName)
You can do it like this with decorators:
import functools
def check_arguments(func):
#functools.wraps(func)
def wrapper(*args, **kwargs):
fname = kwargs['fname']
if not fname.endswith('.csv') and not fname.endswith('.txt'):
print('not accept')
return func(*args, **kwargs)
return wrapper
def set_file_processor(func):
def read_csv(fname):
print('read_csv', fname)
def read_txt(fname):
print('read_txt', fname)
#functools.wraps(func)
def wrapper(*args, **kwargs):
fname = kwargs['fname']
if fname.endswith('.csv'):
read_csv(fname)
elif fname.endswith('.txt'):
read_txt(fname)
return func(*args, **kwargs)
return wrapper
#check_arguments
#set_file_processor
def process(fname):
pass
process(fname='input.csv')
Your problem doesn't seem to come under decorator but under factory pattern i.e. process differently based on the input file.
The below code is a very simple and basic Factory pattern solution to your problem, this should be modified accordingly as per your need,
import os
from abc import ABC, abstractmethod
class FileProcessor(ABC):
#abstractmethod
def process():
pass
class TextFileProcessor(FileProcessor):
def process(self, file_path):
print("Text file processing goes here")
class CsvFileProcessor(FileProcessor):
def process(self, file_path):
print("CSV file processing goes here")
class DefaultFileProcessor(FileProcessor):
def process(self, file_path):
raise ValueError("File %s is not valid" % file_path)
class FileFactory:
processors = {
'txt': TextFileProcessor,
'csv': CsvFileProcessor,
'default': DefaultFileProcessor
}
def __init__(self, file_path):
if not os.path.exists(file_path):
raise IOError("File not found")
self.file_path = file_path
def process(self):
dot_splits = self.file_path.split(".")
ext = dot_splits[-1] if len(dot_splits) > 1 else "default"
ext = ext if ext in self.processors else "default"
processor_class = self.processors.get(ext)
return processor_class().process(self.file_path)
FileFactory(file_path).process()
In later stage if you would like to add json processor then it can also be done easily by adding
processors = {
'txt': TextFileProcessor,
'csv': CsvFileProcessor,
'json': JsonFileProcessor,
'default': DefaultFileProcessor
}
and creating new Json processor class,
class JsonFileProcessor(FileProcessor):
def process(self, file_path):
print("JSON file processing goes here")
Based on your code and this very useful guide, here is a possible solution:
def read_file_decorator(fName):
def read_csv():
print('read_csv')
with open(fName, 'r') as f:
reader = csv.reader(f)
for row in reader:
print(row)
def read_txt():
print('read_txt')
f = open(fName, 'r')
for row in f:
print(row)
if fName.endswith('.csv'):
return read_csv
elif fName.endswith('.txt'):
return read_txt
else:
return None
reader_function = read_file_decorator(fileName)
if reader_function != None:
reader_function()
else:
print('not accept')
I use a stateful decorator remembering the file name inside the reader function before actually executing it (in order not to pass it twice); and I use the fixed value None for invalid file types.
Based on the requirements use of decorator would be an overkill of decorators. But if it's mandatory to implement this using decorator, this is how we can implement:
We can create a dummy function called read_file and a decorator function called reader
User will always call read_file with filename as argument and decorator function reader will check passed filename extension and call the required function - read_csv or read_text
def reader(fun):
def wrapper(*args):
fname = args[0]
if fname.endswith('.csv'):
read_csv(fname)
elif fname.endswith('.txt'):
read_text(fname)
else:
print('not accepted')
return wrapper
def read_csv(fname):
print('In read_csv()')
def read_text(fname):
print('In read_text()')
#reader
def read_file(fname):
pass
read_file('a.csv')
read_file('a.txt')
read_file('filename.py')
Output
In read_csv()
In read_text()
not accepted
Related
What parameters should I pass to load a dataframe using this class?
class SafeLoadExcel:
def __init__(self, file_name):
self.file = file_name
extension = Path(file_name).suffix
if extension == '.csv':
self.load_f = pd.read_csv
elif extension == '.xlsx':
self.load_f = pd.read_excel
def load(self):
try:
df = self.load_f(self.file)
except UnicodeDecodeError:
df = self.load_f(self.file, encoding='iso-8859-1')
return df
Assuming there are only 2 methods on this class, you can get a df by
df = SafeLoadExcel("some/file/name.xlsx").load()
And that's a good hint that this should not have been a class in the first place. It would make as much sense to do
def safe_load_excel(file_name):
extension = Path(file_name).suffix
if extension == '.csv':
load_f = pd.read_csv
elif extension == '.xlsx':
load_f = pd.read_excel
else:
raise ValueError("File must be csv or xlsx")
try:
return load_f(self.file)
except UnicodeDecodeError:
return load_f(self.file, encoding='iso-8859-1')
Assuming SafeLoadExcel provides some other methods for working with the resulting data frame, I would define two separate class methods, and maybe a third that detects which of the first two to use depending on extension present in the file name. SafeLoadExcel.__init__ itself is reserved for when you already have a dataframe, regardless of its original source.
class SafeLoadExcel:
def __init__(self, df):
self.df = df
#classmethod
def _from_file(cls, filename, reader):
"""Load a dataframe from a file.
filename - name of file to read
reader - a function to parse the file and return a dataframe
"""
return cls(reader(filename, encoding='iso-8859-1'))
# CSV-specific wrapper around _from_file
#classmethod
def from_csv(cls, filename):
return cls._from_file(filename, pd.read_csv)
# Excel-specific wrapper around _from_file
#classmethod
def from_excel(cls, filename):
return cls._from_file(filename, pd.read_excel)
# Not to be confused with the private method with a similar name.
# Detect which public class method to use.
#classmethod
def from_file(cls, filename):
if filename.endswith('.csv'):
f = cls.read_csv
elif filename.endswith('.xslx'):
f = cls.read_excel
else:
raise ValueError(f"Cannot determine type of {filename}")
return f(filename)
Then you can use
f1 = SafeLoadExcel.from_csv("foo.csv")
f2 = SafeLoadExcel.from_excel("foo.xlsx")
f3 = SafeLoadExcel.from_file("foo.csv")
f4 = SafeLoadExcel.from_file("foo.xlsx")
f1 and f3 should be effectively the same thing, as should be f2 and f4.
I was asked to build two decorators:
One of them is reading all the files in directory and her subs, and returns the files names that are matching the patterns it gets as arguments.
It returns them as "files" argument to the wrapped function.
The second gets a list of files and returns a dict which the keys are the files names, and the values are the files content.
I did all I have been asked until here.
The next part of the question is to pass the files from the first generator to the second, and to do the second parameters optional - I mean, if the second decorated with the first, it shouldn't get any parameters in the second decorator, and get the files list from the first.
Here i stuck.
that's the decorators :
The first:
from pathlib import Path
import fnmatch
import os
import glob
def find(*args):
def inner(f):
files = []
def wrapper(*wrapper_args, **kwargs):
p = Path("./")
for folder in p.glob("**"):
for file in os.listdir(folder):
for arg in args:
if fnmatch.fnmatch(file, arg):
files.append(file)
return f(files=files, *wrapper_args, **kwargs)
return wrapper()
return inner
#find("*.py", "*.txt")
def file_printer(files):
print(files)
The second:
def read_file(path):
with open(path, "r") as f:
f = f.read()
return f
def use(**use_kwargs):
def inner(f):
content = {}
def wrapper(*wrapper_args, **kwargs):
p = Path("./")
for folder in p.glob("**"):
for file in use_kwargs.get("files"):
if file in os.listdir(folder):
file_path = f"{folder}/{file}"
content[file_path] = read_file(file_path)
return f(content=content, *wrapper_args, **kwargs)
return wrapper()
return inner
#use(files=["cat.py", "dog.py"])
def file_printer(content):
print(content)
And it should work in this case too, and pass the "files" argument from the first to the second. Here I need your help.
#find("*.py", "*.txt")
#use
def file_printer(content):
print(content)
I am trying to mock open and want to check if close gets called at least once
class MyObject():
def __init__(self,path):
fp = open(path)
self.file_list = []
for line in fp:
self.file_list.append(line.strip())
fp.close()
def testsimpleFile():
fake_file = io.StringIO("data.csv\ndata2.csv")
with patch("builtins.open",return_value=fake_file,create=True) as mock_file:
f = MyObject("path/to/open/test.f")
mock_file.assert_called_once_with("/path/to/open/test.f")
golden_list = ["data.csv","data2.csv"]
assert f.file_list == golden_list
This is my working testcode until now and now i want to additionally check if the the close method was called i tried to add
mock_file.close.assert_called_once()
and
mock_file.fake_file.close.assert_called_once()
but both will not catch the method call.
The short of it is: You can't track that the function is being called with assert_called_once if the return value of open isn't a mock object. So, instead of making the return value a StringIO we can make it a MagicMock that will act like a file handle.
import io
from unittest.mock import patch, MagicMock
class MyObject():
def __init__(self,path):
fp = open(path)
self.file_list = []
for line in fp:
self.file_list.append(line.strip())
fp.close()
def testsimpleFile():
fake_file = MagicMock()
fake_file.__iter__.return_value = ["data.csv", "data2.csv"]
with patch("builtins.open", return_value=fake_file, create=True) as mock_file:
f = MyObject("/path/to/open/test.f")
mock_file.assert_called_once_with("/path/to/open/test.f")
golden_list = ["data.csv", "data2.csv"]
assert f.file_list == golden_list
fake_file.close.assert_called_once()
I'm getting the following error when trying to read the row and column count of a CSV:
> coercing to Unicode: need string or buffer, S3BotoStorageFile found
import csv
class CSV:
def __init__(self, file=None):
self.file = file
def read_file(self):
data = []
file_read = read_file(self.file)
return file_read
def get_row_count(self):
return len(self.read_file())
def get_column_count(self):
new_data = self.read_file()
return len(new_data[0])
def get_data(self, rows=1):
data = self.read_file()
return data[:rows]
def read_file(self):
with open(self.file, 'r') as f:
data = [row for row in csv.reader(f.read().splitlines())]
return data
How do I resolve?
well, after reading your code my first reaction was OMG! How many does he open that poor file?
Here's a new version of your class
class CSV:
def __init__(self, file=None):
self.file = file
with open(self.file, 'r') as f:
self.data = [row for row in csv.reader(f)]
def get_row_count(self):
return len(self.data)
def get_column_count(self):
return len(self.data[0])
def get_data(self, rows=1):
return self.data
I also fixed your csv.reader() handling. It accepts a file object, no need to .read() or .read().splitlines(), it can only lead to errors. Which may be the reason why it failed.
Ok, given from what you say, you're working on AWS, and your file is not a string path to a file, but already a file object. So you don't need the open() part as is. You may want to modify your code so it is as follows:
class CSV:
def __init__(self, f=None):
self.file = f
if isinstance(self.file, str): # if the file is a string, it's a path that has to be opened
with open(self.file, 'r') as f:
self.data = [row for row in csv.reader(f)]
elif isinstance(self.file, File) or isinstance(self.file, file): # if that's a file object, no need to open
self.data = [row for row in csv.reader(self.file)]
else: # otherwise, I don't know what to do, so aaaaaaaargh!
raise Exception("File object type unknown: %s %s" % (type(file), file,))
def get_row_count(self):
return len(self.data)
def get_column_count(self):
return len(self.data[0])
def get_data(self, rows=1):
return self.data
Reading the S3BotoStorage.py, the S3BotoStorage class inherits from django.core.files.base.File, which inherits from django.core.files.utils.FileProxyMixin, which is a composition of attributes of the global python file class.
So a File object is not an instance of file, but it has a compatible interface. Therefore, in the previous code I have tested whether the self.file is a str, then it shall be a path that we open() so we get a file() and parse it. Otherwise, self.file is a File object or a file() object, and we just need to parse it. If it's neither of those, then it's an error, and we shall except.
In python, is there an easy way to set up a file-like object for writing that is actually backed by multiple output streams? For instance, I want something like this:
file1 = open("file1.txt", "w")
file2 = open("file2.txt", "w")
ostream = OStreamWrapper(file1, file2, sys.stdout)
#Write to both files and stdout at once:
ostream.write("ECHO!")
So what I'm looking for is OStreamWrapper. I know it'd be pretty easy to write my own, but if there's an existing one, I'd rather use that and not have to worry about finding and covering edge cases.
class OStreamWrapper(object):
def __init__(self, *streams):
self.streams = list(streams)
def write(self, string):
for stream in self.streams:
stream.write(string)
def writelines(self, lines):
# If you want to use stream.writelines(), you have
# to convert lines into a list/tuple as it could be
# a generator.
for line in lines:
for stream in self.streams:
stream.write(line)
def flush(self):
for stream in self.streams:
stream.flush()
Way to wrap all public file functions:
import sys
def _call_for_all_streams(func_name):
def wrapper(self, *args, **kwargs):
result = []
for stream in self._streams:
func = getattr(stream, func_name)
result.append(func(*args, **kwargs))
return result
return wrapper
class OStreamWrapper(object):
def __init__(self, *streams):
self._streams = streams
for method in filter(lambda x: not x.startswith('_'), dir(file)):
setattr(OStreamWrapper, method, _call_for_all_streams(method))
if __name__ == '__main__':
file1 = open("file1.txt", "w")
file2 = open("file2.txt", "w")
ostream = OStreamWrapper(file1, file2, sys.stdout)
ostream.write("ECHO!")
ostream.close()
But it's kinda dirty.
Logbook is another option although it is more than that. Its handlers are more powerful and you can combine whatever you like.