Python class can't find member - python

I tried to put some basic preprocessing operations of a pandas dataframe into a seperate class:
import pandas as pd
import numpy as np
from numba import jit
class MyClass:
def _init_(self):
pass
#jit
def preprocess_dataframe(self, path):
self.df = pd.read_csv(path, index_col=False, delimiter=' ' , names=['Time', 'Downloads', 'ServerID', 'Server', 'Date'], usecols=['Time', 'Downloads', 'Server', 'Date'])
print(self.df.head(5))
self.df['Date'] = self.df['Date'].astype(str)
self.df['Timestamp'] = pd.to_datetime(self.df['Time'] +' '+ self.df['Date'], format='%H:%M:%S %Y%m%d')
self.df[['Server_alone', 'Instance']] = self.df['Server'].str.split('-' ,expand=True)
self.df.drop(columns=['Time'], inplace=True)
self.df['Date'] = pd.to_datetime(self.df['Date'], format='%Y-%m-%d')
self.df.set_index(self.df['Date'])
return self.df
When I call this function in my main script (see below) I receive the error:
AttributeError: module 'MyClass' has no attribute 'preprocess_dataframe'
This is the relevant part of my main script:
import MyClass as mc
path = 'Data.txt'
df = mc.preprocess_dataframe(path)
>>>AttributeError: module 'MyClass' has no attribute 'preprocess_dataframe'
I looked up several other questions including this. However, nothing solved my issue despite I think that the fix is quite easy. Thank you for your help!

You haven't created an instance of the MyClass.
You could rectify it by:
df = mc().preprocess_dataframe(path)
Also change the import statement as well to : from filename import MyClass as mc
You could also make preprocess_dataframe a staticmethod as mentioned in comments.

You should make the method static
import pandas as pd
import numpy as np
from numba import jit
class MyClass:
#jit
#staticmethod
def preprocess_dataframe(path):
df = pd.read_csv(path, index_col=False, delimiter=' ' , names=['Time', 'Downloads', 'ServerID', 'Server', 'Date'], usecols=['Time', 'Downloads', 'Server', 'Date'])
print(self.df.head(5))
df['Date'] = df['Date'].astype(str)
df['Timestamp'] = pd.to_datetime(df['Time'] +' '+ df['Date'], format='%H:%M:%S %Y%m%d')
df[['Server_alone', 'Instance']] = df['Server'].str.split('-' ,expand=True)
df.drop(columns=['Time'], inplace=True)
sdf['Date'] = pd.to_datetime(df['Date'], format='%Y-%m-%d')
df.set_index(df['Date'])
return df
and call it the following way
from filename import MyClass
path = 'Data.txt'
df = MyClass.preprocess_dataframe(path)

Related

python: AttributeError: 'list' object has no attribute 'groupby'

I am following a Youtube tutorial on a streamlit application, however the error
"AttributeError: 'list' object has no attribute 'groupby'"
occured when I was trying to group my list that I scraped from wikipedia, the instructor had the exact code as me but didn't face a problem, where am I missing out exactly?
import streamlit as st
import pandas as pd
#st.cache
def load_data():
url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
html = pd.read_html(url, header = 0)
df = html[0]
return df
df = load_data()
df = df.groupby('GICS Sector')
I fixed it, I just had to reassign the df variable to it's first index
import streamlit as st
import pandas as pd
#st.cache
def load_data():
url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
html = pd.read_html(url, header=0)
df = html[0]
return df
df = load_data()
df = df[0]
df = df.groupby("GICS Sector")

Hand over settings in a class to a pd.read_csv() function

Hi i am pretty new to python. I developed the following class:
import pandas as pd
import os
class Class1:
def __init__(self, path, cols = None, settings = {"sep" : ";", "encoding" : "unicode_escape", "header" : "infer", "decimal" :"."
, "skiprows" : None, "names" : None, "skipfooter" : 0, "engine" : "python"} ):
self.raw = self._load_raw(path = path, s = settings, cols = cols)
def _load_raw(self, path, s, cols = None):
df = pd.read_csv(path, sep = s["sep"], encoding = s["encoding"], decimal = s["decimal"], skiprows = s["skiprows"], skipfooter = s["skipfooter"]
, engine = s["engine"], header = s["header"], names = s["names"], usecols = cols)
return df
Inside of the class is a function which reads a csv file into a pd.DataFrame. I am wondering if there is a smart way of developing the class without handing over such a setting dictionary to read the dataframe later on when creating an object. Lets suppose the csv file is much more easy and just need 1 argument e.g. "sep" and not all the other arguments, but then the class needs also to be able to read csv files which require more arguments. Is there a pythonic way to just hand over as many as required ?
for example 1 object just needs "sep", and another object neeeds all of the settings parameters defined in the docs of pd.read_csv for example, but they can be both created with the same class

Class that returns a transformed dataframe

I'm trying to create a class that takes the path and name of the CSV file, converts it to a dataframe, deletes some columns, converts another one to datetime, as in the code
import os
from pathlib import Path
import pandas as pd
import datetime
class Plans:
def __init__(self, file , path):
self.file = file
self.path = path
self.df = pd.Dataframe()
def get_dataframe(self):
os.chdir(self.path)
self.df = pd.read_csv(self.file, encoding="latin-1", low_memory=False, sep=';')
if 'data' in df.columns:
self.tipo = 'sales'
self.df['data'] = pd.to_datetime(df['data'])
return clean_unused_data()
def clean_unused_data(self):
columns = ['id', 'docs', 'sequence','data_in','received', 'banc', 'return', 'status', 'return_cod',
'bank_account_return', 'id_transcript', 'id_tx','type_order']
for item in columns:
del self.df[item]
del columns[:]
return self.df
When I call an object of the class it gives an error with the clean_unused_data function
returns the following error:
__getattr__ raise AttributeError(f"module 'pandas' has no attribute '{name}'")
Also, I would like to do more dataframe transformations in the Plans class. but since this first one failed, I was a little lost.
Thanks for the help and I apologize for the lack of intimacy with python
I think the error refers to calling an attribute that does not exist in Pandas. From what I can see you wrote pd.DataFrame as pd.Dataframe. Notice the capitalization.
Try the following:
def __init__(self, file , path):
self.file = file
self.path = path
self.df = pd.DataFrame()
Probably one of the columns you are trying to delete is not actually in your file. You can handle the exception or remove this column label from your array.

how to fix error with quandl function get (Status 404) (Quandl Error QECx02)?

Hi everyone python is throwing this error everytime i try to run this code.
I have tried both methods that are applied there in the code and its not running with either of :
api_key = open('apikey.txt', 'r').read()
for x in friddy_states[0][1]:
query ='CMHC/HPPU50_BC'+str(x)
df= quandl.get(query, authtoken=api_key)
and also tried this way:
quandl.ApiConfig.api_key = 'MY API FROM QUANDL'
for x in friddy_states[0][1]:
query ='CMHC/HPPU50_BC'+str(x)
df= quandl.get(query)
both methods showed on quandl documentation and noone is working!
THIS IS THE ACTUAL CODE:
import quandl
import pandas as pd
import pickle
api_key = open('apikey.txt', 'r').read()
quandl.ApiConfig.api_key = 'MY API FROM QUANDL'
df = quandl.get('CMHC/HPPU50_BC', authoken= api_key)
friddy_states =
pd.read_html('https://simple.wikipedia.org/wiki/List_of_U.S._states')
main_df = pd.DataFrame()
for x in friddy_states[0][1]:
query ='CMHC/HPPU50_BC'+str(x)
df= quandl.get(query, authtoken=api_key)
if main_df.empty:
main_df = df
else:
main_df = main_df.join(df)
print(main_df.head())
got super stuck, help is appreciate
I am noticing a missing t in your get request for authtoken.
df = quandl.get('CMHC/HPPU50_BC', authoken= api_key)

Return DataFrame using ipywidgets Button

I'm currently creating a Class that inherits a DataFrame from pandas. I'm interested in developing a method called 'new_filter' that is a fancier execution of a DataFrame command:
import pandas as pd
from ipywidgets import widgets
from IPython.display import display
import numpy as np
class Result(pd.DataFrame):
#property
def _constructor(self):
return Result
def _filter_done(self, c):
self._column_name = self._filter_dd.value
self._expression = self._filter_txt.value
return self[eval('self.'+ self._column_name +' '+self._expression)]
def new_filter(self):
self._filter_dd = widgets.Dropdown(options=list(self.columns),
description='Column:')
self._filter_txt = widgets.Text(description='Expr:')
self._filter_button = widgets.Button(description = 'Done')
self._filter_box = widgets.VBox([self._filter_dd, self._filter_txt, self._filter_button])
display(self._filter_box)
self._filter_button.on_click(self._filter_done)
After creating an object like:
test = Result(np.random.randn(3,4), columns=['A','B','C','D']) #just an example
test_2 = test.new_filter()
Then, for example:
Widget Output
What I want is that 'test_2' be an object from 'Result' class. Is there any solution to this?
First, you will have to return something in the function new_filter. Second, if you want the same object to be modified, it is a bit hard I think. One thing you can do is to have an object which has a trait which can be updated in _filter_done.
Here is a small example of how you can do it:
import pandas as pd
from ipywidgets import widgets
from IPython.display import display
import numpy as np
class Result(pd.DataFrame):
#property
def _constructor(self):
return Result
def _filter_done(self, obj, c):
## obj is the obejct to be modified.
## Updating its data attribute to have the filtered data.
self._column_name = self._filter_dd.value
self._expression = self._filter_txt.value
obj.data = self[eval('self.'+ self._column_name +' '+self._expression)]
def new_filter(self):
self._filter_dd = widgets.Dropdown(options=list(self.columns),
description='Column:')
self._filter_txt = widgets.Text(description='Expr:')
self._filter_button = widgets.Button(description = 'Done')
self._filter_box = widgets.VBox([self._filter_dd, self._filter_txt, self._filter_button])
display(self._filter_box)
result_obj = FilterResult()
self._filter_button.on_click(lambda arg: self._filter_done(result_obj, arg))
return result_obj
from traitlets import HasTraits
from traittypes import DataFrame
class FilterResult(HasTraits):
data = DataFrame()
With the same example code as in your question, i.e.,
test = Result(np.random.randn(3,4), columns=['A', 'B', 'C','D']) #just an example
test_2 = test.new_filter()
You can see that whenever you click on done, the updated dataframe is in test_2.data.

Categories