Return DataFrame using ipywidgets Button - python

I'm currently creating a Class that inherits a DataFrame from pandas. I'm interested in developing a method called 'new_filter' that is a fancier execution of a DataFrame command:
import pandas as pd
from ipywidgets import widgets
from IPython.display import display
import numpy as np
class Result(pd.DataFrame):
#property
def _constructor(self):
return Result
def _filter_done(self, c):
self._column_name = self._filter_dd.value
self._expression = self._filter_txt.value
return self[eval('self.'+ self._column_name +' '+self._expression)]
def new_filter(self):
self._filter_dd = widgets.Dropdown(options=list(self.columns),
description='Column:')
self._filter_txt = widgets.Text(description='Expr:')
self._filter_button = widgets.Button(description = 'Done')
self._filter_box = widgets.VBox([self._filter_dd, self._filter_txt, self._filter_button])
display(self._filter_box)
self._filter_button.on_click(self._filter_done)
After creating an object like:
test = Result(np.random.randn(3,4), columns=['A','B','C','D']) #just an example
test_2 = test.new_filter()
Then, for example:
Widget Output
What I want is that 'test_2' be an object from 'Result' class. Is there any solution to this?

First, you will have to return something in the function new_filter. Second, if you want the same object to be modified, it is a bit hard I think. One thing you can do is to have an object which has a trait which can be updated in _filter_done.
Here is a small example of how you can do it:
import pandas as pd
from ipywidgets import widgets
from IPython.display import display
import numpy as np
class Result(pd.DataFrame):
#property
def _constructor(self):
return Result
def _filter_done(self, obj, c):
## obj is the obejct to be modified.
## Updating its data attribute to have the filtered data.
self._column_name = self._filter_dd.value
self._expression = self._filter_txt.value
obj.data = self[eval('self.'+ self._column_name +' '+self._expression)]
def new_filter(self):
self._filter_dd = widgets.Dropdown(options=list(self.columns),
description='Column:')
self._filter_txt = widgets.Text(description='Expr:')
self._filter_button = widgets.Button(description = 'Done')
self._filter_box = widgets.VBox([self._filter_dd, self._filter_txt, self._filter_button])
display(self._filter_box)
result_obj = FilterResult()
self._filter_button.on_click(lambda arg: self._filter_done(result_obj, arg))
return result_obj
from traitlets import HasTraits
from traittypes import DataFrame
class FilterResult(HasTraits):
data = DataFrame()
With the same example code as in your question, i.e.,
test = Result(np.random.randn(3,4), columns=['A', 'B', 'C','D']) #just an example
test_2 = test.new_filter()
You can see that whenever you click on done, the updated dataframe is in test_2.data.

Related

Initialize class more efficiently in Python

I have this code in which I initialize a class (Adapter) by the name I get from the request.
It seems to be a bit clumsy, and I'm sure there's a better/cleaner way of doing it.
from adapters.gofirst_adapter import GoFirstAdapter
from adapters.spicejet_adapter import SpiceJetAdapter
from adapters.airasia_adapter import AirAsiaAdapter
class Adapter():
def __init__(self, adapter_name):
if(adapter_name=='goFirst'):
gofirst_adapter = GoFirstAdapter()
self.adapter = gofirst_adapter
if(adapter_name=='spiceJet'):
spicejet_adapter = SpiceJetAdapter()
self.adapter = spicejet_adapter
if(adapter_name=='airAsia'):
airasia_adapter = AirAsiaAdapter()
self.adapter = airasia_adapter
What I'm aiming at is to have a list of the adapter names such as:
adapters = ['goFirst', 'spiceJet', 'airAsia']
and create the classes by the list.
Thank you.
You can use a dict to map the parameter to the specific class:
from adapters.gofirst_adapter import GoFirstAdapter
from adapters.spicejet_adapter import SpiceJetAdapter
from adapters.airasia_adapter import AirAsiaAdapter
class Adapter():
adapters = {'goFirst':GoFirstAdapter, 'spiceJet':SpiceJetAdapter, 'airAsia':AirAsiaAdapter}
def __init__(self, adapter_name):
self.adapter = self.adapters[adapter_name]()
something like this could work:
from adapters.gofirst_adapter import GoFirstAdapter
from adapters.spicejet_adapter import SpiceJetAdapter
from adapters.airasia_adapter import AirAsiaAdapter
class Adapter():
_adapter_builders = {
"goFirst": GoFirstAdapter,
"spiceJet": SpiceJetAdapter,
"airAsia": AirAsiaAdapter,
}
def __init__(self, adapter_name):
self.adapter = self._adapter_builders[adapter_name]()
Also it will raise an exception when the key is not available in the dictionary.

Python Protocol for Building a `pandas.DataFrame`

Hello SO and community!
Guess, my question somewhat resonates with this one.
However, trust the below task is a little bit different from that referenced above, namely to extract, transform, load data utilizing pandas.DataFrame, and I am stuck implementing Protocol for the purpose.
The code is below:
import io
import pandas as pd
import re
import requests
from functools import cache
from typing import Protocol
from zipfile import ZipFile
from pandas import DataFrame
#cache
def extract_can_from_url(url: str, **kwargs) -> DataFrame:
'''
Returns DataFrame from downloaded zip file from url
Parameters
----------
url : str
url to download from.
**kwargs : TYPE
additional arguments to pass to pd.read_csv().
Returns
-------
DataFrame
'''
name = url.split('/')[-1]
if os.path.exists(name):
with ZipFile(name, 'r').open(name.replace('-eng.zip', '.csv')) as f:
return pd.read_csv(f, **kwargs)
else:
r = requests.get(url)
with ZipFile(io.BytesIO(r.content)).open(name.replace('-eng.zip', '.csv')) as f:
return pd.read_csv(f, **kwargs)
class ETL(Protocol):
# =============================================================================
# Maybe Using these items for dataclass:
# url: str
# meta: kwargs(default_factory=dict)
# =============================================================================
def __init__(self, url: str, **kwargs) -> None:
return None
def download(self) -> DataFrame:
return DataFrame
def retrieve_series_ids(self) -> list[str]:
return list[str]
def transform(self) -> DataFrame:
return DataFrame
def sum_up_series_ids(self) -> DataFrame:
return DataFrame
class ETLCanadaFixedAssets(ETL):
def __init__(self, url: str, **kwargs) -> None:
self.url = url
self.kwargs = kwargs
#cache
def download(self) -> DataFrame:
self.df = extract_can_from_url(URL, index_col=0, usecols=range(14))
return self.df
def retrieve_series_ids(self) -> list[str]:
# =========================================================================
# Columns Specific to URL below, might be altered
# =========================================================================
self._columns = {
"Prices": 0,
"Industry": 1,
"Flows and stocks": 2,
"VECTOR": 3,
}
self.df_cut = self.df.loc[:, tuple(self._columns)]
_q = (self.df_cut.iloc[:, 0].str.contains('2012 constant prices')) & \
(self.df_cut.iloc[:, 1].str.contains('manufacturing', flags=re.IGNORECASE)) & \
(self.df_cut.iloc[:, 2] == 'Linear end-year net stock')
self.df_cut = self.df_cut[_q]
self.series_ids = sorted(set(self.df_cut.iloc[:, -1]))
return self.series_ids
def transform(self) -> DataFrame:
# =========================================================================
# Columns Specific to URL below, might be altered
# =========================================================================
self._columns = {
"VECTOR": 0,
"VALUE": 1,
}
self.df = self.df.loc[:, tuple(self._columns)]
self.df = self.df[self.df.iloc[:, 0].isin(self.series_ids)]
return self.df
def sum_up_series_ids(self) -> DataFrame:
self.df = pd.concat(
[
self.df[self.df.iloc[:, 0] == series_id].iloc[:, [1]]
for series_id in self.series_ids
],
axis=1
)
self.df.columns = self.series_ids
self.df['sum'] = self.df.sum(axis=1)
return self.df.iloc[:, [-1]]
UPD
Instantiating the class ETLCanadaFixedAssets
df = ETLCanadaFixedAssets(URL, index_col=0, usecols=range(14)).download().retrieve_series_ids().transform().sum_up_series_ids()
returns an error, however, expected:
AttributeError: 'DataFrame' object has no attribute 'retrieve_series_ids'
Please can anyone provide a guidance for how to put these things together (namely how to retrieve the DataFrame which might have been retrieved otherwise using the procedural approach by calling the functions within the last class as they appear within the latter) and point at those mistakes which were made above?
Probably, there is another way to do this elegantly using injection.
Thank you very much in advance!
All the functions of ETLCanadaFixedAssets and ETL classes should return self. This will allow you to call the functions of the class on the return value of the functions, so you can chain them together. You could add one more function that retrieves the encapsulated dataframe but that will always be called last, as the moment you call this function you cannot chain other functions any more. What you are trying to build is called fluent API you may read more about it here
For example:
class ETL(Protocol):
def download(self) -> ETL:
...
def retrieve_series_ids(self) -> ETL:
...
def transform(self) -> ETL:
...
def sum_up_series_ids(self) -> ETL:
...
#property
def dataframe(self) -> DataFrame:
...
Note you will need the following import line to be able to use the class annotation inside the class definition
from __future__ import annotations

Class that returns a transformed dataframe

I'm trying to create a class that takes the path and name of the CSV file, converts it to a dataframe, deletes some columns, converts another one to datetime, as in the code
import os
from pathlib import Path
import pandas as pd
import datetime
class Plans:
def __init__(self, file , path):
self.file = file
self.path = path
self.df = pd.Dataframe()
def get_dataframe(self):
os.chdir(self.path)
self.df = pd.read_csv(self.file, encoding="latin-1", low_memory=False, sep=';')
if 'data' in df.columns:
self.tipo = 'sales'
self.df['data'] = pd.to_datetime(df['data'])
return clean_unused_data()
def clean_unused_data(self):
columns = ['id', 'docs', 'sequence','data_in','received', 'banc', 'return', 'status', 'return_cod',
'bank_account_return', 'id_transcript', 'id_tx','type_order']
for item in columns:
del self.df[item]
del columns[:]
return self.df
When I call an object of the class it gives an error with the clean_unused_data function
returns the following error:
__getattr__ raise AttributeError(f"module 'pandas' has no attribute '{name}'")
Also, I would like to do more dataframe transformations in the Plans class. but since this first one failed, I was a little lost.
Thanks for the help and I apologize for the lack of intimacy with python
I think the error refers to calling an attribute that does not exist in Pandas. From what I can see you wrote pd.DataFrame as pd.Dataframe. Notice the capitalization.
Try the following:
def __init__(self, file , path):
self.file = file
self.path = path
self.df = pd.DataFrame()
Probably one of the columns you are trying to delete is not actually in your file. You can handle the exception or remove this column label from your array.

How to pass a range in a def function

Want to pass a range for the web-scraping function, not sure how it's done. This is to make my code more reusable so that I can scrape different ranges with different dates, say 2016... 2017... 2018... Code looks like this:
import numpy as np
import pandas as pd
import requests
def game_id2017(game_id):
games_played_2017 = []
games_played_2018 = []
print('Getting data...')
for game_id in range():
url = 'https://statsapi.web.nhl.com/api/v1/game/{}/boxscore'.format(game_id)
r_2017 = requests.get(url)
game_data_2017 = r_2017.json()
for homeaway in ['home','away']:
game_dict_2017 = dict()
game_dict_2017['team'] = game_data_2017.get('teams').get(homeaway).get('team').get('name')
game_dict_2017['teamID'] = game_data_2017.get('teams').get(homeaway).get('team').get('id')
game_dict_2017['homeaway'] = homeaway
game_dict_2017['game_id'] = game_id
games_played_2017.append(game_dict_2017)
game_id2017(20170201, 20170210, 1)
TypeError: game_id2017() takes 1 positional argument but 3 were given
game_id2017(*game_id)
for id in game_id:
then use game_id like a list
Pass a list:
import numpy as np
import pandas as pd
import requests
def game_id2017(game_id):
print('Getting data...')
for a_game_id in range(len(game_id)):
# use a_game_id
game_id2017([20170201, 20170210, 1])

ValueError: expected a dict or pandas.DataFrame

I am trying to create a categorical vBar that will show the total number of migrants picked up for various operations, however, when I pass the 'groupby' pandas object into the column data source, I keep getting an error and I'm not quite sure what I'm doing wrong.
I have looked in a few places for similar problems, but I can't seem to find any answers.
Can anyone point me in the right direction?
#Imports
import pandas as pd
from bokeh.io import curdoc
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource
from bokeh.models.tools import HoverTool
from bokeh.models import Button
from bokeh.layouts import row
#Global variables
viewFilter='Operation'
#Data
df = pd.read_csv('data.csv')
grouped = df.groupby(viewFilter)['Total Migrants']
source = ColumnDataSource(grouped)
#grouped = df.groupby(viewFilter)['Total Migrants'].sum()
#source = ColumnDataSource(pd.DataFrame(grouped))
operations = source.data[viewFilter].tolist()
# Fig Creation Function
def create_figure():
global viewFilter
p=figure(x_range=operations)
p.vbar(x=viewFilter, top='Total Migrants',
source=source, width=0.70)
p.title.text='Demo Chart'
p.xaxis.axis_label = viewFilter
p.yaxis.axis_label = 'Total Migrants'
#Hover took
hover = HoverTool()
hover.tooltips=[
("Total Migrants Rescued", "#{Total Migrants}")]
hover.mode='vline'
p.add_tools(hover)
return p
#Update Data with Ship-level aggregation
def shipUpdate():
print("Ship Button was Pushed")
#Widgets
shipButton = Button(label='Ship Level')
shipButton.on_click(shipUpdate)
#Implement Layout
layout = row(shipButton, create_figure())
#Add Layout to Document
curdoc().add_root(layout)
It seems that if I explicitly pass in a pandas dataframe object, it solves this error:
source = ColumnDataSource(pd.DataFrame(grouped))
Your version of Bokeh is too old. Support for passing Pandas GroupBy objects was added in version 0.12.7. If you want to be able to pass GroupBy objects directly to initialize a CDS (e.g. to access all the automatic summary statistics that creates), you will need to upgrade to a newer release.
Looks like wrong parameter value has passed in groupby() method or in ColumnDataSource()
Syntax:
DataFrame.groupby(by=None, axis=0, level=None, as_index=True, sort=True, group_keys=True, squeeze=False, observed=False, **kwargs)
Parameter by-> list, str, dict
And the code snippet of the constructer of ColumnDataSource is below -
def __init__(self, *args, **kw):
''' If called with a single argument that is a dict or
pandas.DataFrame, treat that implicitly as the "data" attribute.
'''
if len(args) == 1 and "data" not in kw:
kw["data"] = args[0]
# TODO (bev) invalid to pass args and "data", check and raise exception
raw_data = kw.pop("data", {})
if not isinstance(raw_data, dict):
if pd and isinstance(raw_data, pd.DataFrame):
raw_data = self._data_from_df(raw_data)
elif pd and isinstance(raw_data, pd.core.groupby.GroupBy):
raw_data = self._data_from_groupby(raw_data)
else:
raise ValueError("expected a dict or pandas.DataFrame, got %s" % raw_data)
super(ColumnDataSource, self).__init__(**kw)
self.data.update(raw_data)

Categories