read a dict into self - python

I have a class that is doing a lot of stuff. In the end, it saves everything into a pickle. When I rerun this class I want to read the pickle instead of doing everything again. Unfortunately it the variable is always empty if I unpickle. Why is that so?
import pandas as pd
class Test:
def __init__(path, value):
# path points to a .txt file but its in the same folder as the pickle
data_path, data = os.path.split(path)
pickle_path = os.path.join(data_path, name.split('.')[1] + '.pickle'
if os.path.isfile(pickle_path):
self = pd.read_pickle(path)
else:
# do a ton of stuff and safe it as pickle afterwards
variable = Test(path, value)
In this case variable is empty if I read from pickle but correct if I do all the stuff...

If I want to cache some calculation results I will load/dump the object outside the class, something like,
pickle_path = os.path.join(data_path, name.split('.')[1] + '.pickle'
if os.path.isfile(pickle_path):
with open(pickle_path, 'rb') as f:
variable = pickle.load(f) # use cached results
else:
variable = Test() # do all the calculations

Related

How do I efficiently store a class in a text file with Python?

I have a file, memory.txt, and I want to store an instance of the class Weapon() in a dictionary, on the second line.
with open(memorypath(), "r") as f:
lines = f.readlines()
inv = inventory()
if "MAINWEAPON" not in inv or inv["MAINWEAPON"] == "":
inv["MAINWEAPON"] = f"""Weapon(sw, 0, Ability(0, "0"), ["{name}'s first weapon."], dmg=30, cc=20, str=15)"""
lines[1] = str(inv) + "\n"
with open(memorypath(), "w") as f:
f.writelines(lines)
(inventory and memorypath are from another file I have for utility functions)
Though, with what I have, if I get inv["MAINWEAPON"] I'll just get the string, not the class. And I have to store it like a string, or else I'll be getting something like <__main\__.Weapon object at (hexadecimal path thing)>.
How do I get the class itself upon getting inv["MAINWEAPON"]?
Another thing, too, I feel like I'm making such confusion with newlines, because file memory.txt has 6 lines but gets shortened to 5, please tell me if I'm doing anything wrong.
If you have a class then you can represent it as a dict and save it as json format.
class Cat:
name: str
def __init__(self, name: str):
self.name = name
def dict(self):
return {'name': self.name}
#classmethod
def from_dict(cls, d):
return cls(name = d['name'])
Now you can save the class as a json to a file like this:
import json
cat = Cat('simon')
with open('cat.json', 'w') as f:
json.dump(cat.dict(), f)
And you can load the json again like this:
with open('cat.json', 'r') as f:
d = json.load(f)
cat = Cat.from_dict(d)
Update
Since python 3.7 the possilility to make dataclasses has been made, and I am here giving an example of how you can use that to save the classes into a json format.
If you want to use the json file as a database and be able to append new entities to it then you will have to load the file into memory and append the new data and finally override the old json file, the code below will do exactly that.
from dataclasses import dataclass, asdict
import json
#dataclass
class Cat:
name: str
def load_cats() -> list[Cat]:
try:
with open('cats.json', 'r') as fd:
return [Cat(**x) for x in json.load(fd)]
except FileNotFoundError:
return []
def save_cat(c):
data = [asdict(x) for x in load_cats() + [c]]
with open('cats.json', 'w') as fd:
json.dump(data, fd)
c = Cat(name='simon')
save_cat(c)
cats = load_cats()
print(cats)
A simplest approach I can suggest would be dataclasses.asdict as mentioned; or else, using a serialization library that supports dataclasses. There are a lot of good ones out there, but for this purpose I might suggest dataclass-wizard. Further, if you want to transform an arbitrary JSON object to dataclass structure, you can use the included CLI tool. When serializing, it will autoamtically apply a key transform (snake_case to camelCase) but this is easily customizable as well.
Disclaimer: I am the creator (and maintener) of this library.

How to modify file content using python?

The following code reads a file, uses syntax tree to append fullstop to docstrings of that file. How can I save changes made in called file? I understand present code doesn't change content in original file but the local variables accessing it. Can you suggest changes, if possible provide learning resource as well?
astcode.py
import ast
import sys
import os
filename = sys.argv[1]
# """Getting all the functions"""
ast_filename = os.path.splitext(ast.__file__)[0] + '.py'
with open(filename) as fd:
file_contents = fd.read()
module = ast.parse(file_contents)
# with open(filename, 'w') as file:
# module level
if(isinstance(module.body[0], ast.Expr)):
docstr = module.body[0].value.s
if(module.body[0].value.s not in '.'):
docstr += '.'
# ast.dump(module, include_attributes=True)
print(docstr)
# function level
function_definitions = [node for node in module.body if isinstance(node, ast.FunctionDef)]
for function in function_definitions:
# next_node = function_definitions[idx].body
next_node = function.body
for new_node in next_node:
if(isinstance(new_node, ast.Expr)):
if(isinstance(new_node.value, ast.Str)):
# Docstring stored in docstr variable.
docstr = new_node.value.s
if(docstr[-1] not in '.'):
new_node.value.s += '.'
# astString = ast.dump(new_node, annotate_fields=True, include_attributes=True)
# print(astString)
# compile(astString, filename, 'eval')
# print(exec(astString))
print(new_node.value.s)
# for line in module:
# file.write(line)
Example
testfile.py
def readDictionaryFile(dictionary_filename):
"""readDictionaryfile doc string"""
return []
def readTextFile(text_filename):
"""readTextfile doc string"""
return []
$ python3 astcode.py testfile.py
Expected
testfile.py
def readDictionaryFile(dictionary_filename):
"""readDictionaryfile doc string."""
return []
def readTextFile(text_filename):
"""readTextfile doc string."""
return []
Note: Fullstop(.) appended.
Looking at the documentation link, I notice there's a NodeVisitor and NodeTransformer with a code example. I looked at how they unparse a function def, and it's basically the same as you've done in your original question, so I used that.
# https://docs.python.org/3/library/ast.html#ast.NodeTransformer
class MyDocstringTransformer(ast.NodeTransformer):
def visit_FunctionDef(self, node):
if len(node.body):
if isinstance(node.body[0], ast.Expr):
if isinstance(node.body[0].value, ast.Constant):
if isinstance(node.body[0].value.value, str):
docstring = node.body[0].value.value
node.body[0].value.value = docstring + '.'
return node
Using python 3.9's ast module gets us https://docs.python.org/3/library/ast.html#ast.unparse which is about as close as we can get to changing the ast node and then rewriting the original file.
tree = ast.parse(file_contents)
new_tree = MyDocstringTransformer().visit(tree)
print(ast.unparse(new_tree))
Instead of just overwriting to the same filename, you may want to write to a temp file, then let the OS attempt to delete the old file and rename the temp file to the old name, thus performing the replace in the OS.

Create a Class that creates a dictionary from reading in Multiple CSV Files - Python

I have 24 csv files that currently reside in a folder directory. The goal is to read all the CSV files in and store them as individual pandas dataframes. At the request of my client, they wish all of our code to be in Object Oriented Programming. I am new to OOP and I would appreciate any help.
I am currently trying to create a class that will read in my files and store them as a dictionary via a for loop. With the key being the name of the file, and the value being the pandas dataframe
I already have a list of filepaths stored in a a variable called fns
This is what I have for the code so far, I jam trying to figure out the loop logic so I don't have to call a new class instance every time.
fns = glob.glob(path + "*.csv")
enc = 'ISO-8859-1'
# create class
class MyFile:
def __init__(self, file_path):
self.file = file_path
def ParseName(self):
self.name_me = self.file.split('\\')[-1].strip('.csv')
def Read_CSV(self):
self.data_csv = pd.read_csv(self.file,delimiter='\t',
low_memory=False, encoding= enc)
My goal is to get a dictionary like this:
{'filename1': DataFrame, 'filename2': DataFrame, .... 'filename24': DataFrame}
I appreciate all the help!
Sample Object-oriented CsvStorage:
import glob
import pandas as pd
from os.path import basename
class CsvStorage:
_dfs = {}
def __init__(self, path):
for f in glob.glob(path):
self._dfs[basename(f)] = pd.read_csv(f, encoding='ISO-8859-1')
def get_dataframes(self):
if not self._dfs:
raise ValueError('No dataframes. Load data first')
return self._dfs
files_path = '*/FILE_*.csv' # adjust to your actual path pattern
csv_store = CsvStorage(files_path)
dfs = csv_store.get_dataframes()
print(dfs)

Python - Use class or modules when you share some data dictionary across functions

I'm suffering of too many classes syndrome, struggling to identify when is properly justified use a class in python.
I have a function which its code escalated very quickly up to around 350 lines of code and in order to have comprehensive tests and more control over the code itself, refactoring pop up into my mind.
Just after starting refactoring, I've split the code into several small functions but they are dependent of some variables and data dictionary that now must be passed as parameters.
The original code looks like the following :
def process_rooms_json(path, country, json_dict):
# Create Dict from json_dict
rooms_dict = dict()
# Create another
employee_dict = dict()
list_meetings = []
# Iterate over the folder
for file_name in [f for f in os.listdir(path)]:
with open(folder + '/' + file_name, 'r', encoding='utf-8') as f:
# Extract contect
set_apps = function_which_creates_list(f.read())
for app in set_apps:
# Block of code which extracts certain info
# Block of code which build a dictionary
list_meetings.append(result_of_big_code)
return list_meetings
and now it's something like
def process_rooms_json(path, country, json_dict):
# Create Dict from json_dict
rooms_dict = dict()
# Create another
employee_dict = dict()
list_meetings = []
# Iterate over the folder
for file_name in [f for f in os.listdir(path)]:
with open(folder + '/' + file_name, 'r', encoding='utf-8') as f:
# Extract content
set_apps = function_which_creates_list(f.read())
for app in set_apps:
# Block of code which extracts certain info
new_dict = do_a_lot_of_stuff_1(employee_dict, country, filename)
new_dict2 = do_a_lot_of_stuff_2(new_dict['some_list'])
new_dict3 = do_a_lot_of_stuff_3(employee_dict, country)
new_dict4 = do_a_lot_of_stuff_4(rooms_dict, country, file_name)
new_dict5 = do_a_lot_of_stuff_5(rooms_dict, employee_dict)
# Create result
return_dict = dict()
return_dict['some_filed'] = new_dict['another field']
...
list_meetings.append(return_didct)
return list_meetings
I'm really tempted to create a class, let say RoomProcessor, and encapsulate in self the path, json dictionaries and country code. I don't know in which extent this is correct or if I'm totally loosing the point.
In terms of maintainability, code readability and proper python idiomatic approach, is sharing same data_dictionary across "functions" proper justification to create a class ?

LRU cache on hard drive python

I want to be able to decorate a function as you would do with functools.lru_cache, however, I want the results to be cached on the hard drive and not in memory. Looking around, I get a feeling this is a solved problem, and I was wondering if anyone could point me in the right direction (or at least give me a few more keywords to try googling)
I don't know if this will help or if it matters, but the function is computing images from unique filenames.
Here's some code to get you started:
from pathlib import Path
import pickle
import hashlib
import os
class LRU_Cache:
def __init__(self, directory, original_function, maxsize=10):
self.directory = directory
self.original_function = original_function
self.maxsize = maxsize
try:
os.mkdir(directory)
except OSError:
pass
def __call__(self, *args):
filename = hashlib.sha1(pickle.dumps(args)).hexdigest()
fullname = os.path.join(self.directory, filename)
try:
with open(fullname, 'rb') as f:
value = pickle.load(f)
Path(fullname).touch()
return value
except FileNotFoundError:
pass
value = self.original_function(*args)
with open(fullname, 'wb') as f:
pickle.dump(value, f)
filenames = os.listdir(self.directory)
if len(filenames) <= self.maxsize:
return
fullnames = [os.path.join(self.directory, filename)
for filename in filenames]
oldest = min(fullnames, key=lambda fn: os.stat(fn).st_mtime)
os.remove(oldest)
It uses hashes the arguments to create a unique filename for each function call. The function return value is pickled using that filename.
Cache hits unpickle the stored result and update the file modification time.
If the cache directory exceeds a target size, the oldest cache file is removed.
Use it like this:
def square(x):
print('!')
return x ** 2
sqr = LRU_Cache('square_cache', square, 10)
Now call sqr normally and results will be cached to disk.

Categories