I am following a Youtube tutorial on a streamlit application, however the error
"AttributeError: 'list' object has no attribute 'groupby'"
occured when I was trying to group my list that I scraped from wikipedia, the instructor had the exact code as me but didn't face a problem, where am I missing out exactly?
import streamlit as st
import pandas as pd
#st.cache
def load_data():
url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
html = pd.read_html(url, header = 0)
df = html[0]
return df
df = load_data()
df = df.groupby('GICS Sector')
I fixed it, I just had to reassign the df variable to it's first index
import streamlit as st
import pandas as pd
#st.cache
def load_data():
url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
html = pd.read_html(url, header=0)
df = html[0]
return df
df = load_data()
df = df[0]
df = df.groupby("GICS Sector")
Related
I'm currently working on a problem error in my output code: AttributeError: 'NoneType' object has no attribute 'values'
import json,requests
link = "https://some.com.br/api/v1/integration/customers.json"
headers = {'iliot-company-token': '3r5s$ddfdassss'}
def get_data(page):
Parameters = {"page": page}
clients2 = requests.get(link, headers=headers, json=Parameters)
lista_clients2 = clients2.json
print(lista_clients2())
#Get it all at once
all_page_data = list(map(get_data, range(1,51)))
#If you want to make a dataframe
import pandas as pd
df = pd.DataFrame(all_page_data)
#You can also split out json-formatted data, if it's in a single column
full_df = pd.json_normalize(df[0])
I am trying to create a python web application to show a time series graph with gold price distribution. I have added app layout, app calls back, and update_graph function as well. after that, I tried to take a copy of my data frame and save it to a new data frame as 'dff' but it is throwing an error. Also at the end of the code, I have put 'return fig' and it is throwing an error as well. I am fairly new to python and need help to figure out what's wrong with my code. Below is the entire code.
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input,Output
from urllib.request import urlopen, Request
url = "http://goldpricez.com/gold/history/lkr/years-3"
req = Request(url=url)
html = urlopen(req).read()
df = pd.read_html(url) # this will give you a list of dataframes from html
df1 = df[3]
first_val = df1.iloc[0][0]
date = df1[0]
price = df1[1]
data = [df1[0],df1[1]]
headers = ["Date", "Price"]
df3 = pd.concat(data, axis=1, keys=headers)
from datetime import datetime
df3['Date'] = df3['Date'].apply(lambda x: datetime.strptime(x, '%d-%m-%Y'))
df3['Year'] = pd.DatetimeIndex(df3['Date']).year
df3['Month'] = pd.DatetimeIndex(df3['Date']).month
df3['Day'] = pd.DatetimeIndex(df3['Date']).day
df3['WDay'] = df3['Date'].dt.dayofweek
df3['WeekDayName'] = pd.DatetimeIndex(df3['Date']).day_name()
print(df3['WDay'])
writer = pd.ExcelWriter('pandas_simple.xlsx', engine='xlsxwriter')
# Convert the dataframe to an XlsxWriter Excel object.
df3.to_excel(writer, sheet_name='Sheet1')
# Close the Pandas Excel writer and output the Excel file.
writer.save()
app = dash.Dash(__name__)
app.layout = html.Div([
html.H1("Gold Price Analyst" , style={'text-align' : 'center'}),
dcc.Graph(id='this_year_graph', figure={})
])
#app.callback(
[Output(component_id='this_year_graph', component_property='figure')]
)
def update_graph():
dff = df3.copy()
dff = [df4['Date'],df4['Price']]
fig = px.line(dff , x=dff['Date'], y=dff['Price'])
return fig
if __name__ =='__main__' :
app.run_server(debug=True)
def update_graph():
dff = df3.copy()
dff = [df4['Date'],df4['Price']]
fig = px.line(dff , x=dff['Date'], y=dff['Price'])
return fig
I am working with this Google Sheets sheet here
(https://docs.google.com/spreadsheets/d/1I2VIGfJOyod-13Fke8Prn8IkhpgZWbirPBbosm8EFCc/edit?usp=sharing)
and I want to create a similar dataframe that ONLY consists of the cells that contain "OOO" at the end (I have highlighted them in yellow for clarity). As an example, here's a small snippet of what I want to get out of it:
(https://docs.google.com/spreadsheets/d/1rRWgESE7kPTvchOL0RxEcqjEnY9oUsiMnov-qagHg7I/edit?usp=sharing)
Basically I want to create my own 'schedule' here.
import os
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import pandas as pd
from googleapiclient import discovery
DATA_DIR = '/path/here/'
scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive',
'https://www.googleapis.com/auth/spreadsheets']
path = os.path.join(DATA_DIR, 'client_secret.json')
credentials = ServiceAccountCredentials.from_json_keyfile_name(path, scope)
client = gspread.authorize(credentials)
service = discovery.build('sheets', 'v4', credentials=credentials)
spreadsheet_id = 'Dcon19'
debug = False
spreadsheet = client.open(spreadsheet_id).sheet1
data = spreadsheet.get_all_values()
index = str(data[0][0])
headers = data.pop(0)
df_index = []
def conv_pd_df():
df = pd.DataFrame(data, columns=headers, index=None)
df = df.set_index(index)
df_index.append(df.index.values)
mask = df.applymap(lambda x: key in str(x))
df1 = df[mask.any(axis=1)]
return df1
def highlight(df1):
df2 = pd.DataFrame(columns=headers[1:], index=df_index) # blank dataframe
df2 = df2.fillna('none', inplace=True)
for col in df1:
update_row = df1[df1[col].str.contains("OOO")]
if not update_row.empty:
try:
df2.update(update_row, overwrite=True)
except AttributeError as e:
print(f'Error {e}')
df2.to_csv('/path/dcon.csv', header=True)
if __name__ == '__main__':
if not debug:
df1 = conv_pd_df()
highlight(df1)
Now the only thing I'm getting back as df2 is a blank dataframe because I come to the error AttributeError: 'NoneType' object has no attribute 'to_csv' when I try to save the resultant df2.
Does anyone know how to get this working, or a more efficient way to accomplish this?
This is my first real personal project so any help would be appreciated!
The error you reference is because of the way you used fillna. df2.fillna('none', inplace=True) will return None which is what you are seeing as an error when to try to send df2.to_csv...
Try something like this for your highlight function.
def highlight(df1):
df2 = pd.DataFrame(columns=headers[1:], index=df_index) # blank dataframe
df2.fillna('none', inplace=True)
for col in df1:
update_row = df1[df1[col].str.contains("OOO")]
if not update_row.empty:
try:
df2.update(update_row, overwrite=True)
except AttributeError as e:
print(f'Error {e}')
df2.to_csv('/path/dcon.csv', header=True)
I tried to put some basic preprocessing operations of a pandas dataframe into a seperate class:
import pandas as pd
import numpy as np
from numba import jit
class MyClass:
def _init_(self):
pass
#jit
def preprocess_dataframe(self, path):
self.df = pd.read_csv(path, index_col=False, delimiter=' ' , names=['Time', 'Downloads', 'ServerID', 'Server', 'Date'], usecols=['Time', 'Downloads', 'Server', 'Date'])
print(self.df.head(5))
self.df['Date'] = self.df['Date'].astype(str)
self.df['Timestamp'] = pd.to_datetime(self.df['Time'] +' '+ self.df['Date'], format='%H:%M:%S %Y%m%d')
self.df[['Server_alone', 'Instance']] = self.df['Server'].str.split('-' ,expand=True)
self.df.drop(columns=['Time'], inplace=True)
self.df['Date'] = pd.to_datetime(self.df['Date'], format='%Y-%m-%d')
self.df.set_index(self.df['Date'])
return self.df
When I call this function in my main script (see below) I receive the error:
AttributeError: module 'MyClass' has no attribute 'preprocess_dataframe'
This is the relevant part of my main script:
import MyClass as mc
path = 'Data.txt'
df = mc.preprocess_dataframe(path)
>>>AttributeError: module 'MyClass' has no attribute 'preprocess_dataframe'
I looked up several other questions including this. However, nothing solved my issue despite I think that the fix is quite easy. Thank you for your help!
You haven't created an instance of the MyClass.
You could rectify it by:
df = mc().preprocess_dataframe(path)
Also change the import statement as well to : from filename import MyClass as mc
You could also make preprocess_dataframe a staticmethod as mentioned in comments.
You should make the method static
import pandas as pd
import numpy as np
from numba import jit
class MyClass:
#jit
#staticmethod
def preprocess_dataframe(path):
df = pd.read_csv(path, index_col=False, delimiter=' ' , names=['Time', 'Downloads', 'ServerID', 'Server', 'Date'], usecols=['Time', 'Downloads', 'Server', 'Date'])
print(self.df.head(5))
df['Date'] = df['Date'].astype(str)
df['Timestamp'] = pd.to_datetime(df['Time'] +' '+ df['Date'], format='%H:%M:%S %Y%m%d')
df[['Server_alone', 'Instance']] = df['Server'].str.split('-' ,expand=True)
df.drop(columns=['Time'], inplace=True)
sdf['Date'] = pd.to_datetime(df['Date'], format='%Y-%m-%d')
df.set_index(df['Date'])
return df
and call it the following way
from filename import MyClass
path = 'Data.txt'
df = MyClass.preprocess_dataframe(path)
Hi everyone python is throwing this error everytime i try to run this code.
I have tried both methods that are applied there in the code and its not running with either of :
api_key = open('apikey.txt', 'r').read()
for x in friddy_states[0][1]:
query ='CMHC/HPPU50_BC'+str(x)
df= quandl.get(query, authtoken=api_key)
and also tried this way:
quandl.ApiConfig.api_key = 'MY API FROM QUANDL'
for x in friddy_states[0][1]:
query ='CMHC/HPPU50_BC'+str(x)
df= quandl.get(query)
both methods showed on quandl documentation and noone is working!
THIS IS THE ACTUAL CODE:
import quandl
import pandas as pd
import pickle
api_key = open('apikey.txt', 'r').read()
quandl.ApiConfig.api_key = 'MY API FROM QUANDL'
df = quandl.get('CMHC/HPPU50_BC', authoken= api_key)
friddy_states =
pd.read_html('https://simple.wikipedia.org/wiki/List_of_U.S._states')
main_df = pd.DataFrame()
for x in friddy_states[0][1]:
query ='CMHC/HPPU50_BC'+str(x)
df= quandl.get(query, authtoken=api_key)
if main_df.empty:
main_df = df
else:
main_df = main_df.join(df)
print(main_df.head())
got super stuck, help is appreciate
I am noticing a missing t in your get request for authtoken.
df = quandl.get('CMHC/HPPU50_BC', authoken= api_key)