Python: Use local variable globally does not work - python

I am trying to use my code to open a file after searching for it in either operating system. However when I assign the variable inside the function, i cant use it outside of the function. And when I keep the 2nd function out of 1st function, it doesnt recognize the function.
I tried to assign the df_location globally, but this doesnt work.
When i use df = pd.read_csv(df_location[0], index_col=0) inside the function, I am not able to use df anywhere else in my code.
if platform.system() == 'windows':
def find_file(root_folder, rex):
for root, dirs, files in os.walk(root_folder):
for f in files:
result = rex.search(f)
if result:
file_path = os.path.join(root, f)
return file_path
def find_file_in_all_drives(file_name):
matching_files = list()
# create a regular expression for the file
rex = re.compile(file_name)
for drive in win32api.GetLogicalDriveStrings().split('\000')[:-1]:
file_path = find_file(drive, rex)
if file_path:
matching_files.append(file_path)
return matching_files
global df_location
df_location = find_file_in_all_drives("AB_NYC_2019.csv")
if platform.system() == 'mac':
df_location = find_file("/", "AB_NYC_2019.csv")
df = pd.read_csv(df_location[0], index_col=0)
I would like to be able to use the file that is retrieved through the functions.
Thank you!
ideally it should be like this
if platform.system() == 'windows':
def find_file(root_folder, rex):
for root, dirs, files in os.walk(root_folder):
for f in files:
result = rex.search(f)
if result:
file_path = os.path.join(root, f)
return file_path
def find_file_in_all_drives(file_name):
matching_files = list()
# create a regular expression for the file
rex = re.compile(file_name)
for drive in win32api.GetLogicalDriveStrings().split('\000')[:-1]:
file_path = find_file(drive, rex)
if file_path:
matching_files.append(file_path)
return matching_files
df_location = find_file_in_all_drives("AB_NYC_2019.csv")
if platform.system() == 'mac':
df_location = find_file("/", "AB_NYC_2019.csv")
df = pd.read_csv(df_location[0], index_col=0)
but this gives the error message:
"NameError: name 'find_file_in_all_drives' is not defined"

You define find_file_in_all_drives for Window but you should define find_file_in_all_drives also for other systems - but every system will have different code in find_file_in_all_drives. And then you can use find_file_in_all_drives on every system
# all systems use it so it should be defined for all
def find_file(root_folder, rex):
for root, dirs, files in os.walk(root_folder):
for f in files:
result = rex.search(f)
if result:
file_path = os.path.join(root, f)
return file_path
# define different `find_file_in_all_drives` for different systems
if platform.system() == 'windows':
def find_file_in_all_drives(file_name):
matching_files = list()
# create a regular expression for the file
rex = re.compile(file_name)
for drive in win32api.GetLogicalDriveStrings().split('\000')[:-1]:
file_path = find_file(drive, rex)
if file_path:
matching_files.append(file_path)
return matching_files
if platform.system() in ('mac', 'linux'):
def find_file_in_all_drives(file_name):
return find_file("/", file_name)
# now you can use `find_file_in_all_drives` on every system
df_location = find_file_in_all_drives("AB_NYC_2019.csv")
df = pd.read_csv(df_location[0], index_col=0)

You didn't show all your code. Presumably, you have find_file and find_file_in_all_drives function implementations for mac as well, yes? At least that's what I would expect just from looking at the code you've posted.
If that really is ALL the code you have, then the way it's written now, you're only defining find_file and find_file_in_all_drives if platform.system() returns "windows" (side note: just tried this, on my Windows 7 system it returns "Windows" with a capital 'W'.) If that condition is not satisfied these function definitions are not visible anywhere else in your code, because you've put them inside the body of the if-statement.
It looks like you are trying to get different behavior depending on the contents of a string (platform.system()). Since you can't avoid having to implement the varying behavior for both operating systems, you can use polymorphism for this:
import abc
class DataFrameFinder(abc.ABC):
def __init__(self):
pass
#abc.abstractmethod
def find_file(self, root_folder, rex):
raise NotImplementedError
#abc.abstractmethod
def find_file_in_all_drives(self, file_name):
raise NotImplementedError
class DataFrameFinderWindows(DataFrameFinder):
def __init__(self, *args, **kwargs):
DataFrameFinder.__init__(self, *args, **kwargs)
def find_file(self, root_folder, rex):
# Do windows things...
pass
def find_file_in_all_drives(self, file_name):
# Do windows things...
pass
class DataFrameFinderMac(DataFrameFinder):
def __init__(self, *args, **kwargs):
DataFrameFinder.__init__(self, *args, **kwargs)
def find_file(self, root_folder, rex):
# Do mac things...
pass
def find_file_in_all_drives(self, file_name):
# Do mac things...
pass
def main():
import platform
finder_factory = {
"Windows": DataFrameFinderWindows,
"Mac": DataFrameFinderMac
}
finder = finder_factory[platform.system()]()
finder.find_file(...)
return 0
if __name__ == "__main__":
import sys
sys.exit(main())

Related

How can i execute a class with a separate load method?

What parameters should I pass to load a dataframe using this class?
class SafeLoadExcel:
def __init__(self, file_name):
self.file = file_name
extension = Path(file_name).suffix
if extension == '.csv':
self.load_f = pd.read_csv
elif extension == '.xlsx':
self.load_f = pd.read_excel
def load(self):
try:
df = self.load_f(self.file)
except UnicodeDecodeError:
df = self.load_f(self.file, encoding='iso-8859-1')
return df
Assuming there are only 2 methods on this class, you can get a df by
df = SafeLoadExcel("some/file/name.xlsx").load()
And that's a good hint that this should not have been a class in the first place. It would make as much sense to do
def safe_load_excel(file_name):
extension = Path(file_name).suffix
if extension == '.csv':
load_f = pd.read_csv
elif extension == '.xlsx':
load_f = pd.read_excel
else:
raise ValueError("File must be csv or xlsx")
try:
return load_f(self.file)
except UnicodeDecodeError:
return load_f(self.file, encoding='iso-8859-1')
Assuming SafeLoadExcel provides some other methods for working with the resulting data frame, I would define two separate class methods, and maybe a third that detects which of the first two to use depending on extension present in the file name. SafeLoadExcel.__init__ itself is reserved for when you already have a dataframe, regardless of its original source.
class SafeLoadExcel:
def __init__(self, df):
self.df = df
#classmethod
def _from_file(cls, filename, reader):
"""Load a dataframe from a file.
filename - name of file to read
reader - a function to parse the file and return a dataframe
"""
return cls(reader(filename, encoding='iso-8859-1'))
# CSV-specific wrapper around _from_file
#classmethod
def from_csv(cls, filename):
return cls._from_file(filename, pd.read_csv)
# Excel-specific wrapper around _from_file
#classmethod
def from_excel(cls, filename):
return cls._from_file(filename, pd.read_excel)
# Not to be confused with the private method with a similar name.
# Detect which public class method to use.
#classmethod
def from_file(cls, filename):
if filename.endswith('.csv'):
f = cls.read_csv
elif filename.endswith('.xslx'):
f = cls.read_excel
else:
raise ValueError(f"Cannot determine type of {filename}")
return f(filename)
Then you can use
f1 = SafeLoadExcel.from_csv("foo.csv")
f2 = SafeLoadExcel.from_excel("foo.xlsx")
f3 = SafeLoadExcel.from_file("foo.csv")
f4 = SafeLoadExcel.from_file("foo.xlsx")
f1 and f3 should be effectively the same thing, as should be f2 and f4.

How to pass kwargs from decorator to decorator?

I was asked to build two decorators:
One of them is reading all the files in directory and her subs, and returns the files names that are matching the patterns it gets as arguments.
It returns them as "files" argument to the wrapped function.
The second gets a list of files and returns a dict which the keys are the files names, and the values are the files content.
I did all I have been asked until here.
The next part of the question is to pass the files from the first generator to the second, and to do the second parameters optional - I mean, if the second decorated with the first, it shouldn't get any parameters in the second decorator, and get the files list from the first.
Here i stuck.
that's the decorators :
The first:
from pathlib import Path
import fnmatch
import os
import glob
def find(*args):
def inner(f):
files = []
def wrapper(*wrapper_args, **kwargs):
p = Path("./")
for folder in p.glob("**"):
for file in os.listdir(folder):
for arg in args:
if fnmatch.fnmatch(file, arg):
files.append(file)
return f(files=files, *wrapper_args, **kwargs)
return wrapper()
return inner
#find("*.py", "*.txt")
def file_printer(files):
print(files)
The second:
def read_file(path):
with open(path, "r") as f:
f = f.read()
return f
def use(**use_kwargs):
def inner(f):
content = {}
def wrapper(*wrapper_args, **kwargs):
p = Path("./")
for folder in p.glob("**"):
for file in use_kwargs.get("files"):
if file in os.listdir(folder):
file_path = f"{folder}/{file}"
content[file_path] = read_file(file_path)
return f(content=content, *wrapper_args, **kwargs)
return wrapper()
return inner
#use(files=["cat.py", "dog.py"])
def file_printer(content):
print(content)
And it should work in this case too, and pass the "files" argument from the first to the second. Here I need your help.
#find("*.py", "*.txt")
#use
def file_printer(content):
print(content)

How to pass a variable from a function to a class?

How can I print path outside function:
class FirstClas:
path = ''
def num(self):
path = "C:\\Users\\JOHN\\Desktop\\test.txt"
return path
print(path)
This method don't print anything.
This result:
C:\Python\python.exe C:/Users/JOHN/Desktop/test/tt.py
Process finished with exit code 0
You need to create an instance from the class that you created.
I would suggest doing this:
test = FirstClas()
print(test.num())
Hope this helps
Your method never gets called, and the class variable path is pointless here. Do:
class FirstClas:
def num(self):
path = "C:\\Users\\JOHN\\Desktop\\test.txt"
return path
print(FirstClas().num()) # note that this is outside the class!
I don't think you quite understand the purpose of classes, but here's how to make what you have "work" (in the sense that there are no fatal errors):
File global_variable.py
def init_global_variable():
"""initialize variable"""
global GLOBALS_DICT
GLOBALS_DICT = {}
def set_variable(name, value):
"""set variable"""
try:
GLOBALS_DICT[name] = value
return True
except KeyError:
return False
def get_variable(name):
"""get variable"""
try:
return GLOBALS_DICT[name]
except KeyError:
return "Not Found"
init_global_variable() # ADDED.
File tt.py
import os
#import lib.global_variable as glv
import global_variable as glv # Since I don't have your whole package.
class FirstClas:
def num(self):
path = "C:\\Users\\JOHN\\Desktop\\test.txt"
return path
def imag(self):
icon_file = os.path.join(
glv.get_variable("APP_PATH"),
glv.get_variable("DATA_DIR"),
"paths",
"PathExcel",
)
return icon_file
class Second:
# Put statements in a method so they don't run when the class is defined.
def run(self):
test = FirstClas()
print('first: ' + test.num())
print('second: ' + test.imag())
second = Second()
second.run()
Output:
first: C:\Users\JOHN\Desktop\test.txt
second: Not Found\Not Found\paths\PathExcel
the path does not changed(path = ' ') because you don't run the function num

How to write two decorator in file operations

I have csv file having contents below
101,item_1
101,item_1
if it is csv my below code will execute
import csv
fName = input()
def read_csv(fName):
try:
with open(fName, 'r') as f:
reader = csv.reader(f)
for row in reader:
print (row)
read_csv(fName)
Here how to write the exception in decorator function and call on the top of that.
first decorator
if fName not endswith .txt or .csv then it has to generate output not accept
Second decorator
if fName = file.txt text file then below operations has to taken care
def read_txt(fName):
f = open(fName, "r")
print(f.readline())
if csv then first function to execute and if txt next function to execute. How to achieve using decorator. I can put if conditon to achieve the situation, but that is not the case
My whole code without decorator is below
fName = input()
def read_csv(fName):
if fName.endswith('.csv'):
#print ('hi')
try:
with open(fName, 'r') as f:
reader = csv.reader(f)
for row in reader:
print (row)
except IOError:
print ("Could not read file:", fName)
#SECOND DECORATOR
if fName.endswith('.txt'):
f = open(fName, "r")
print(f.readline())
#FIRST DECORATOR
if not(fName.endswith('.csv')) and not(fName.endswith('.txt')):
print ('not accept')
read_csv(fName)
You can do it like this with decorators:
import functools
def check_arguments(func):
#functools.wraps(func)
def wrapper(*args, **kwargs):
fname = kwargs['fname']
if not fname.endswith('.csv') and not fname.endswith('.txt'):
print('not accept')
return func(*args, **kwargs)
return wrapper
def set_file_processor(func):
def read_csv(fname):
print('read_csv', fname)
def read_txt(fname):
print('read_txt', fname)
#functools.wraps(func)
def wrapper(*args, **kwargs):
fname = kwargs['fname']
if fname.endswith('.csv'):
read_csv(fname)
elif fname.endswith('.txt'):
read_txt(fname)
return func(*args, **kwargs)
return wrapper
#check_arguments
#set_file_processor
def process(fname):
pass
process(fname='input.csv')
Your problem doesn't seem to come under decorator but under factory pattern i.e. process differently based on the input file.
The below code is a very simple and basic Factory pattern solution to your problem, this should be modified accordingly as per your need,
import os
from abc import ABC, abstractmethod
class FileProcessor(ABC):
#abstractmethod
def process():
pass
class TextFileProcessor(FileProcessor):
def process(self, file_path):
print("Text file processing goes here")
class CsvFileProcessor(FileProcessor):
def process(self, file_path):
print("CSV file processing goes here")
class DefaultFileProcessor(FileProcessor):
def process(self, file_path):
raise ValueError("File %s is not valid" % file_path)
class FileFactory:
processors = {
'txt': TextFileProcessor,
'csv': CsvFileProcessor,
'default': DefaultFileProcessor
}
def __init__(self, file_path):
if not os.path.exists(file_path):
raise IOError("File not found")
self.file_path = file_path
def process(self):
dot_splits = self.file_path.split(".")
ext = dot_splits[-1] if len(dot_splits) > 1 else "default"
ext = ext if ext in self.processors else "default"
processor_class = self.processors.get(ext)
return processor_class().process(self.file_path)
FileFactory(file_path).process()
In later stage if you would like to add json processor then it can also be done easily by adding
processors = {
'txt': TextFileProcessor,
'csv': CsvFileProcessor,
'json': JsonFileProcessor,
'default': DefaultFileProcessor
}
and creating new Json processor class,
class JsonFileProcessor(FileProcessor):
def process(self, file_path):
print("JSON file processing goes here")
Based on your code and this very useful guide, here is a possible solution:
def read_file_decorator(fName):
def read_csv():
print('read_csv')
with open(fName, 'r') as f:
reader = csv.reader(f)
for row in reader:
print(row)
def read_txt():
print('read_txt')
f = open(fName, 'r')
for row in f:
print(row)
if fName.endswith('.csv'):
return read_csv
elif fName.endswith('.txt'):
return read_txt
else:
return None
reader_function = read_file_decorator(fileName)
if reader_function != None:
reader_function()
else:
print('not accept')
I use a stateful decorator remembering the file name inside the reader function before actually executing it (in order not to pass it twice); and I use the fixed value None for invalid file types.
Based on the requirements use of decorator would be an overkill of decorators. But if it's mandatory to implement this using decorator, this is how we can implement:
We can create a dummy function called read_file and a decorator function called reader
User will always call read_file with filename as argument and decorator function reader will check passed filename extension and call the required function - read_csv or read_text
def reader(fun):
def wrapper(*args):
fname = args[0]
if fname.endswith('.csv'):
read_csv(fname)
elif fname.endswith('.txt'):
read_text(fname)
else:
print('not accepted')
return wrapper
def read_csv(fname):
print('In read_csv()')
def read_text(fname):
print('In read_text()')
#reader
def read_file(fname):
pass
read_file('a.csv')
read_file('a.txt')
read_file('filename.py')
Output
In read_csv()
In read_text()
not accepted

mocking a variable using mox

I want to test this method, however I would need to mock the variable dirContent
def imageFilePaths(paths):
imagesWithPath = []
for _path in paths:
try:
dirContent = os.listdir(_path)
except OSError:
raise OSError("Provided path '%s' doesn't exists." % _path)
for each in dirContent:
selFile = os.path.join(_path, each)
if os.path.isfile(selFile) and isExtensionSupported(selFile):
imagesWithPath.append(selFile)
return list(set(imagesWithPath))
how do I just mock a variable using mox ?
This is how I have however tried to mock os.listdir
def setUp(self):
self._filePaths = ["/test/file/path"]
self.mox = mox.Mox()
def test_imageFilePaths(self):
filePaths = self._filePaths[0]
self.mox.StubOutWithMock(os,'listdir')
dirContent = os.listdir(filePaths).AndReturn(['file1.jpg','file2.PNG','file3.png'])
self.mox.ReplayAll()
utils.imageFilePaths(filePaths)
self.mox.VerifyAll()
also tried this way
def test_imageFilePaths(self):
filePaths = self._filePaths
os = self.mox.CreateMock('os')
os.listdir = self.mox.CreateMock(os)
dirContent = os.listdir(filePaths).AndReturn(['file1.jpg','file2.PNG','file3.png'])
self.mox.ReplayAll()
lst = utils.imageFilePaths(filePaths)
# self.assertEquals('/test/file/path/file1.jpg', lst[0])
self.mox.VerifyAll()
but the call to method being tested doesn't recognizes the mocked discontent
Typically you would not mock a variable, but instead mock the function call used to set that variable's value. In your example, for instance, you'd mock out os.listdir and have it return a mock value.
# Your test file
import os
class YourTest(...):
def setUp(self):
self.mox = mox.Mox()
def tearDown(self):
self.mox.UnsetStubs()
# Your test
def testFoo(self):
self.mox.StubOutWithMock(os, 'listdir')
# the calls you expect to listdir, and what they should return
os.listdir("some path").AndReturn([...])
self.mox.ReplayAll()
# ... the rest of your test

Categories