Invoking a parent's class from child class with no hardcoded arguments - python

I'm fairly new to Python and I've been struggling with inheritance concepts on a work project. The base class would get directory paths, table names etc from a config file (I'm keeping mosthis out because it's irrelevant to the question below) and pass this to a Child class via super()__ init __.
The base class also has a method to export a dataframe to MS SQL Server. The problem is, I can't pass the pandas dataframe from my Child class to my Parent Class, only hardcoded values. Reason I'm doing this is because the dataframes are fairly different from one another (demanding unique data manipulation), but they all come from the same place and will be placed in the same database.
So pretty much I need to run SQL_Export from Parent with the dataframe defined in the Child as input. I tried placing SQLExport in my init, but I get
AttributeError: 'function' object has no attribute 'to_sql'
What I have:
from configparser import ConfigParser
import pandas as pd
config_object = ConfigParser()
config_object.read('config.ini')
class Parent:
def __init__(self, report, df, date=today):
self.report = report
self.df = df
self.name = config_object[self.report]['name']
self.table_name = config_object[self.report]['table_name']
def SQLExport(self):
return self.df.to_sql(self.table_name, con="engine")
class Child(Parent):
def __init__(self):
super().__init__('REPORTNAME', self.load_dataframe)
super().SQLexport(self)
def load_dataframe(self):
self.df = pd.read_json(self.name + ".json")
if __name__ == '__main__':
x = Child()
Thanks!!

There are multiple issues with the code
super().SQLexport(self) function name is wrong in your client class(it should be self.SQLExport) and it wont accept any parameters so self is not required to pass
pd.read_json(self.name. +".json") - Child class has no attribute name, so your self.name will fail
self.load_dataframe function is being passed to super.init method instead of dataframe and no return statement(return df) in load_dataframe function in child class
the above issues are corrected and, you can try with below code which write the dataframe in sqlite in memory database
from configparser import ConfigParser
import pandas as pd
from sqlalchemy import create_engine
engine = create_engine('sqlite:///:memory:')
config_object = ConfigParser()
config_object.read('config.ini')
class Parent:
def __init__(self, report, df):
self.report = report
self.df = df
self.name = config_object[self.report]['name']
self.table_name = config_object[self.report]['table_name']
def SQLExport(self):
print(self.table_name)
return self.df.to_sql(self.table_name, con=engine)
class Child(Parent):
def __init__(self):
self.name="test_123"
super().__init__('REPORTNAME', self.load_dataframe())
super().SQLExport()
def load_dataframe(self):
self.df = pd.read_json(self.name + ".json")
return self.df
def read_data(self):
print(engine.execute("SELECT * FROM sample_table").fetchall())
if __name__ == '__main__':
x = Child()
x.read_data()

Related

How to index through dynamic generated nested objects in Python?

import tkinter as tk
import tksheet
import json
import pickle
class Treino(object):
class config(object):
_list = []
def __init__(self, name, value):
self.name = name
self.value = value
self._list.append(self)
class CategoriaTreino(object):
_list = []
def __init__(self, name):
self.name = name
self._list.append(self)
self.config = {'RNG': 0}
class SubCategoriaTreino(object):
_list = []
def __init__(self, name, repet):
self.name = name
self.repetição = repet
self._list.append(self)
pass
def returnCat():
for cat in TreinoMain.CategoriaTreino._list:
yield cat
def returnSubCat():
for subcat in TreinoMain.CategoriaTreino.SubCategoriaTreino._list:
yield subcat
def savefile():
with open('treinos.pkl', 'wb') as file:
for cat in returnCat():
pickle.dump(cat, file, pickle.HIGHEST_PROTOCOL)
def loadfile():
data = []
with open('treinos.pkl', 'wb') as file:
while True:
try:
data.append(pickle.load(file))
except EOFError:
break
for d in data:
#iterate through objs
pass
if __name__ == "__main__":
TreinoMain = Treino()
ca1 = TreinoMain.CategoriaTreino("Braço")
subca1 = ca1.SubCategoriaTreino('Biceps', '2x10')
So, code is obviously far from done, my issue though is, i was hoping that creating an instance of the parent object, and then an instance of the child object would allow me to easily return attributes from the child object through it's parent object (and save/load from pickle). but apparently that's not the case (can't reefer to the childobj as parentobj.childobj), any ideas on how to do it in a simple way, or a better way to dynamically generate an object inside another object?
Also i'm almost pretty sure the _list will be useless after i load the objs with pickle and iterate through them...
The idea: Make a GUI so that the user can add/remove categories and subcategories of a data structure on the fly
Why i didn't go with nested data types? I figured it would be easier to implement with objects when i'm actually going to use them for what my goal is (randomize exercise schedules) and to write the add/remove functions.
So far what i've tried was to return attributes with:
ca1.subca1.name
That doesn't work.

Python: Inner Class

I am trying to create a json string from a class and I defined my class as follows:
import json
import ast
from datetime import datetime
import pytz
import time
class OuterClass:
def __init__(self):
self.Header = None
self.Body = None
class Header:
def __init__(self, ID = None, Name = None):
self.ID = ID
self.Name = Name
class Body:
def __init__(self, DateTime=None, Display=None):
self.DateTime = DateTime
self.Display = Display
def current_time_by_timezone(timezone_input):
return datetime.now(pytz.timezone(timezone_input))
if __name__ == '__main__':
response = OuterClass()
header = response.Header('123', 'Some Name')
body = response.Body(current_time_by_timezone('US/Central'), 'NOT VALID')
print(json.dumps(response.__dict__))
I'm getting an error 'TypeError: 'NoneType' object is not callable'. Is it because I'm setting the Header and Body in the OuterClass definition myself to None?
The problem with your code is these lines:
self.Header = None
self.Body = None
These create instance variables named Header and Body on every instance of OuterClass, so you can never access the class variables (the nested classes) via an instance, only via OuterClass itself.
It's not very clear what your intention is with this data structure. Defining a class inside another class doesn't do anything special in Python (by default, you could probably make there be special behavior with special effort, like using a metaclass that makes the inner classes into descriptors). Generally though, there's no implied relationship between the classes.
If you want your OuterClass to create instances of the other two classes, you can do that without nesting their definitions. Just put the class definitions at top level and write a method that creates an instance at an appropriate time and does something useful with it (like binding it to an instance variable).
You might want something like:
def Header:
...
def Response:
def __init__(self):
self.header = None
def make_header(self, *args):
self.header = Header(*args)
return self.header
You could keep the classes nested as long as you don't expect that to mean anything special, just be sure that you don't use the class name as an instance variable, or you'll shadow the name of the nested class (a capitalization difference, like self.header vs self.Header could be enough).

use singleton logic within a classmethod

I am currently using this piece of code :
class FileSystem(metaclass=Singleton):
"""File System manager based on Spark"""
def __init__(self, spark):
self._path = spark._jvm.org.apache.hadoop.fs.Path
self._fs = spark._jvm.org.apache.hadoop.fs.FileSystem.get(
spark._jsc.hadoopConfiguration()
)
#classmethod
def without_spark(cls):
with Spark() as spark:
return cls(spark)
My object depends obviously on the Spark object (another object that I created - If you need to see its code, I can add it but I do not think it is required for my current issue).
It can be used in 2 differents ways resulting the same behavior :
fs = FileSystem.without_spark()
# OR
with Spark() as spark:
fs = FileSystem(spark)
My problem is that, even if FileSystem is a singleton, using the class method without_spark makes me enter (__enter__) the context manager of spark, which lead to a connection to spark cluster, which takes a lot of time. How can I make that the first execution of without_spark do the connection, but the next one only returns the already created instance?
The expected behavior would be something like this :
#classmethod
def without_spark(cls):
if not cls.exists: # I do not know how to persist this information in the class
with Spark() as spark:
return cls(spark)
else:
return cls()
I think you are looking for something like
import contextlib
class FileSystem(metaclass=Singleton):
"""File System manager based on Spark"""
spark = None
def __init__(self, spark):
self._path = spark._jvm.org.apache.hadoop.fs.Path
self._fs = spark._jvm.org.apache.hadoop.fs.FileSystem.get(
spark._jsc.hadoopConfiguration()
)
#classmethod
def without_spark(cls):
if cls.spark is None:
cm = cls.spark = Spark()
else:
cm = contextlib.nullcontext(cls.spark)
with cm as s:
return cls(s)
The first time without_spark is called, a new instance of Spark is created and used as a context manager. Subsequent calls reuse the same Spark instance and use a null context manager.
I believe your approach will work as well; you just need to initialize exists to be False, then set it to True the first (and every, really) time you call the class method.
class FileSystem(metaclass=Singleton):
"""File System manager based on Spark"""
exists = False
def __init__(self, spark):
self._path = spark._jvm.org.apache.hadoop.fs.Path
self._fs = spark._jvm.org.apache.hadoop.fs.FileSystem.get(
spark._jsc.hadoopConfiguration()
)
#classmethod
def without_spark(cls):
if not cls.exists:
cls.exists = True
with Spark() as spark:
return cls(spark)
else:
return cls()
Can't you make the constructor argument optional, and initiate the Spark lazily, e.g. in a property (or functools.cached_property):
from functools import cached_property
class FileSystem(metaclass=Singleton):
def __init__(self, spark=None):
self._spark = spark
#cached_property
def spark(self):
if self._spark:
return self._spark
return self._spark := Spark()
#cached_property
def path(self):
return self.spark._jvm.org.apache.hadoop.fs.Path
#cached_property
def fs(self):
with self.spark:
return self.spark._jvm.org.apache.hadoop.fs.FileSystem.get(
self.spark._jsc.hadoopConfiguration()
)

TypeError: __init_subclass__() takes no keyword arguments related to subclass and abstract class design

I implemented the following design using abstract class and its subclass class as follows
from abc import ABC, abstractmethod
class Pipeline(ABC):
#abstractmethod
def read_data(self):
pass
def __init__(self, **kwargs):
self.raw_data = self.read_data()
self.process_data = self.raw_data[self.used_cols]
class case1(Pipeline):
def read_data(self):
return pd.read_csv("file location") # just hard coding for the file location
#property
def used_cols(self):
return ['col_1', 'col_2','col_3','col_4']
I can invoke the class of case1 as follows. It will in fact read a csv file into pandas dataframe.
data = case1()
This existing design will return four hard coded columns, e.g., 'col_1','col_2','col_3' and 'col_4', and it just works fine. At present, I would like to control the columns to be returned by modifying the subclass, in specific, the function of used_cols. I modified class case1 as follows, but it will cause the error message.
class case1(Pipeline):
def read_data(self):
return pd.read_csv("file location") # just hard coding for the file location
#property
def used_cols(self, selected_cols):
return selectd_cols
It was called as follows
selected_cols = ['col_2','col_3']
data = case1(selected_cols)
It turns out that this modification is not right, and generates the error message such as
TypeError: init_subclass() takes no keyword arguments So my question is how to modify the subclass to get the desired control.
reference
Python: How to pass more than one argument to the property getter?
I think you did not fully understand the purpose of properties.
If you create a property used_cols, you'll accessing it using obj.used_cols instead of obj.used_cols(). After creating the property it's not easily possible to call the underlying function directly.
csv file:
col_0,col_1,col_2,col_3
1,1,1,2
2,3,3,4
3,3,3,6
code:
from abc import ABC, abstractmethod
import pandas as pd
class Pipeline(ABC):
#abstractmethod
def read_data(self):
pass
def __init__(self, **kwargs):
self.raw_data = self.read_data()
self.used_cols = kwargs["selected_cols"]
self.process_data = self.raw_data[self.used_cols]
class case1(Pipeline):
def read_data(self):
return pd.read_csv("file_location.csv") # just hard coding for the file location
#property
def used_cols(self):
return self._used_cols
#used_cols.setter
def used_cols(self,selected_cols):
self._used_cols = selected_cols
selected_cols = ['col_2','col_3']
data = case1(selected_cols = selected_cols)
print(data.process_data)
result:
col_2 col_3
0 1 2
1 3 4
2 3 6

How to add named variables to class init (or any function)

I have this class signature (the init signature):
class TensorDataset(Dataset):
def __init__(self, *tensors)
this class is initialized in this line:
dataset = TensorDataset(all_input_ids, all_attention_mask, all_token_type_ids, all_labels)
now I would like to wrap the this class, so I created CustomDataset:
class CustomDataset(Dataset):
def __init__(self, *tensors, **keywords):
self.tensor_dataset = TensorDataset(*tensors)
self.all_text = keywords["all_text"]
and I also tried:
class CustomDataset(Dataset):
def __init__(self, *tensors, all_text=None):
self.tensor_dataset = TensorDataset(*tensors)
self.all_text = all_text
But when I try to initialize this class like this:
dataset = CustomDataset(all_input_ids, all_attention_mask, all_token_type_ids, all_labels, all_text: all_text)
I get an error:
End of statement expected
Statement expected, found Py:RPAR
What am I doing wrong?
I'm not sure what are you trying with all_text: all text, but named args should go like
all_text=all_text
so try that.

Categories