Merging empty pandas DF with rows from separate DF on Google Sheets - python

I am working with this Google Sheets sheet here
(https://docs.google.com/spreadsheets/d/1I2VIGfJOyod-13Fke8Prn8IkhpgZWbirPBbosm8EFCc/edit?usp=sharing)
and I want to create a similar dataframe that ONLY consists of the cells that contain "OOO" at the end (I have highlighted them in yellow for clarity). As an example, here's a small snippet of what I want to get out of it:
(https://docs.google.com/spreadsheets/d/1rRWgESE7kPTvchOL0RxEcqjEnY9oUsiMnov-qagHg7I/edit?usp=sharing)
Basically I want to create my own 'schedule' here.
import os
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import pandas as pd
from googleapiclient import discovery
DATA_DIR = '/path/here/'
scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive',
'https://www.googleapis.com/auth/spreadsheets']
path = os.path.join(DATA_DIR, 'client_secret.json')
credentials = ServiceAccountCredentials.from_json_keyfile_name(path, scope)
client = gspread.authorize(credentials)
service = discovery.build('sheets', 'v4', credentials=credentials)
spreadsheet_id = 'Dcon19'
debug = False
spreadsheet = client.open(spreadsheet_id).sheet1
data = spreadsheet.get_all_values()
index = str(data[0][0])
headers = data.pop(0)
df_index = []
def conv_pd_df():
df = pd.DataFrame(data, columns=headers, index=None)
df = df.set_index(index)
df_index.append(df.index.values)
mask = df.applymap(lambda x: key in str(x))
df1 = df[mask.any(axis=1)]
return df1
def highlight(df1):
df2 = pd.DataFrame(columns=headers[1:], index=df_index) # blank dataframe
df2 = df2.fillna('none', inplace=True)
for col in df1:
update_row = df1[df1[col].str.contains("OOO")]
if not update_row.empty:
try:
df2.update(update_row, overwrite=True)
except AttributeError as e:
print(f'Error {e}')
df2.to_csv('/path/dcon.csv', header=True)
if __name__ == '__main__':
if not debug:
df1 = conv_pd_df()
highlight(df1)
Now the only thing I'm getting back as df2 is a blank dataframe because I come to the error AttributeError: 'NoneType' object has no attribute 'to_csv' when I try to save the resultant df2.
Does anyone know how to get this working, or a more efficient way to accomplish this?
This is my first real personal project so any help would be appreciated!

The error you reference is because of the way you used fillna. df2.fillna('none', inplace=True) will return None which is what you are seeing as an error when to try to send df2.to_csv...
Try something like this for your highlight function.
def highlight(df1):
df2 = pd.DataFrame(columns=headers[1:], index=df_index) # blank dataframe
df2.fillna('none', inplace=True)
for col in df1:
update_row = df1[df1[col].str.contains("OOO")]
if not update_row.empty:
try:
df2.update(update_row, overwrite=True)
except AttributeError as e:
print(f'Error {e}')
df2.to_csv('/path/dcon.csv', header=True)

Related

python: AttributeError: 'list' object has no attribute 'groupby'

I am following a Youtube tutorial on a streamlit application, however the error
"AttributeError: 'list' object has no attribute 'groupby'"
occured when I was trying to group my list that I scraped from wikipedia, the instructor had the exact code as me but didn't face a problem, where am I missing out exactly?
import streamlit as st
import pandas as pd
#st.cache
def load_data():
url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
html = pd.read_html(url, header = 0)
df = html[0]
return df
df = load_data()
df = df.groupby('GICS Sector')
I fixed it, I just had to reassign the df variable to it's first index
import streamlit as st
import pandas as pd
#st.cache
def load_data():
url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
html = pd.read_html(url, header=0)
df = html[0]
return df
df = load_data()
df = df[0]
df = df.groupby("GICS Sector")

How skip to another loop in python if no data returned by the API?

I have a python code that loops through multiple location and pulls data from a third part API. Below is the code sublocation_idsare location id coming from a directory.
As you can see from the code the data gets converted to a data frame and then saved to a Excel file. The current issue I am facing is if the API does not returns data for publication_timestamp for certain location the loop stops and does not proceeds and I get error as shown below the code.
How do I avoid this and skip to another loop if no data is returned by the API?
for sub in sublocation_ids:
city_num_int = sub['id']
city_num_str = str(city_num_int)
city_name = sub['name']
filter_text_new = filter_text.format(city_num_str)
data = json.dumps({"filters": [filter_text_new], "sort_by":"created_at", "size":2})
r = requests.post(url = api_endpoint, data = data).json()
articles_list = r["articles"]
articles_list_normalized = json_normalize(articles_list)
df = articles_list_normalized
df['publication_timestamp'] = pd.to_datetime(df['publication_timestamp'])
df['publication_timestamp'] = df['publication_timestamp'].apply(lambda x: x.now().strftime('%Y-%m-%d'))
df.to_excel(writer, sheet_name = city_name)
writer.save()
Key Error: publication_timestamp
Change this bit of code:
df = articles_list_normalized
if 'publication_timestamp' in df.columns:
df['publication_timestamp'] = pd.to_datetime(df['publication_timestamp'])
df['publication_timestamp'] = df['publication_timestamp'].apply(lambda x: x.now().strftime('%Y-%m-%d'))
df.to_excel(writer, sheet_name = city_name)
else:
continue
If the API literally returns no data i.e. {} then you might even do the check before normalizing it:
if articles_list:
df = json_normalize(articles_list)
# ... rest of code ...
else:
continue

Loop and add function component as index

I would like to change the index of the following code. Instead of having 'close' as the index, I want to have the corresponding x from the function. As sometimes like in this example even if i provide 4 curr only 3 are available. Meaning that I cannot add the list as the index after looping as the size changes. Thank you for your help. I should add that even with the set_index(x) the index remain 'close'.
The function daily_price_historical retrieve prices from a public API . There are exactly 7 columns from which I select the the first one (close).
The function:
def daily_price_historical(symbol, comparison_symbol, all_data=False, limit=1, aggregate=1, exchange=''):
url = 'https://min-api.cryptocompare.com/data/histoday?fsym={}&tsym={}&limit={}&aggregate={}'\
.format(symbol.upper(), comparison_symbol.upper(), limit, aggregate)
if exchange:
url += '&e={}'.format(exchange)
if all_data:
url += '&allData=true'
page = requests.get(url)
data = page.json()['Data']
df = pd.DataFrame(data)
df.drop(df.index[-1], inplace=True)
return df
The code:
curr = ['1WO', 'ABX','ADH', 'ALX']
d_price = []
for x in curr:
try:
close = daily_price_historical(x, 'JPY', exchange='CCCAGG').close
d_price.append(close).set_index(x)
except:
pass
d_price = pd.concat(d_price, axis=1)
d_price = d_price.transpose()
print(d_price)
The output:
0
close 2.6100
close 0.3360
close 0.4843
The function daily_price_historical returns a dataframe, so daily_price_historical(x, 'JPY', exchange='CCCAGG').close is a pandas Series. The title of a Series is its name, but you can change it with rename. So you want:
...
close = daily_price_historical(x, 'JPY', exchange='CCCAGG').close
d_price.append(close.rename(x))
...
In your original code, d_price.append(close).set_index(x) raised a AttributeError: 'NoneType' object has no attribute 'set_index' exception because append on a list returns None but the exception was raised after the append and was silently swallowed by the catchall except: pass.
What to remember from that: never use the very dangerous :
try:
...
except:
pass
which hides any error.
Try this small code
import pandas as pd
import requests
curr = ['1WO', 'ABX','ADH', 'ALX']
def daily_price_historical(symbol, comparison_symbol, all_data=False, limit=1, aggregate=1, exchange=''):
url = 'https://min-api.cryptocompare.com/data/histoday?fsym={}&tsym={}&limit={}&aggregate={}'\
.format(symbol.upper(), comparison_symbol.upper(), limit, aggregate)
if exchange:
url += '&e={}'.format(exchange)
if all_data:
url += '&allData=true'
page = requests.get(url)
data = page.json()['Data']
df = pd.DataFrame(data)
df.drop(df.index[-1], inplace=True)
return df
d_price = []
lables_ind = []
for idx, x in enumerate(curr):
try:
close = daily_price_historical(x, 'JPY', exchange='CCCAGG').close
d_price.append(close[0])
lables_ind.append(x)
except:
pass
d_price = pd.DataFrame(d_price,columns=["0"])
d_price.index = lables_ind
print(d_price)
Output
0
1WO 2.6100
ADH 0.3360
ALX 0.4843

how to fix error with quandl function get (Status 404) (Quandl Error QECx02)?

Hi everyone python is throwing this error everytime i try to run this code.
I have tried both methods that are applied there in the code and its not running with either of :
api_key = open('apikey.txt', 'r').read()
for x in friddy_states[0][1]:
query ='CMHC/HPPU50_BC'+str(x)
df= quandl.get(query, authtoken=api_key)
and also tried this way:
quandl.ApiConfig.api_key = 'MY API FROM QUANDL'
for x in friddy_states[0][1]:
query ='CMHC/HPPU50_BC'+str(x)
df= quandl.get(query)
both methods showed on quandl documentation and noone is working!
THIS IS THE ACTUAL CODE:
import quandl
import pandas as pd
import pickle
api_key = open('apikey.txt', 'r').read()
quandl.ApiConfig.api_key = 'MY API FROM QUANDL'
df = quandl.get('CMHC/HPPU50_BC', authoken= api_key)
friddy_states =
pd.read_html('https://simple.wikipedia.org/wiki/List_of_U.S._states')
main_df = pd.DataFrame()
for x in friddy_states[0][1]:
query ='CMHC/HPPU50_BC'+str(x)
df= quandl.get(query, authtoken=api_key)
if main_df.empty:
main_df = df
else:
main_df = main_df.join(df)
print(main_df.head())
got super stuck, help is appreciate
I am noticing a missing t in your get request for authtoken.
df = quandl.get('CMHC/HPPU50_BC', authoken= api_key)

How do I save data from websocket?

I have successfully subscribed to a websocket and am receiving data. I am waiting to save my data so I can use it in a data frame for further analysis.
My code so far is only returning empty lists and dataframes.
Code:
RETURNS EMPTY LIST
wsClient = GDAX.WebsocketClient(url="wss://ws-feed.gdax.com", products="LTC-USD")
df1 = []
for i in wsClient.start():
df1 = df1.append(wsClient.start())
Code:
RETURNS EMPTY LIST AND DATAFRAME
wsClient = GDAX.WebsocketClient(url="wss://ws-feed.gdax.com", products="LTC-USD")
dfs = []
for i in wsClient.start():
dfs.append(wsClient.start())
df1 = pd.concat(dfs)
You need to implement your own custom on_message method to be able to get the websocket information:
import time
import gdax
import pandas as pd
results = []
class myWebsocketClient(gdax.WebsocketClient):
def on_open(self):
self.url = "wss://ws-feed.gdax.com/"
self.products = ["LTC-USD"]
def on_message(self, msg):
if 'price' in msg and 'type' in msg:
results.append(msg['price'])
wsClient = myWebsocketClient()
wsClient.start()
time.sleep(5)
df = pd.DataFrame(results, columns = ["Price"])
print(df.head())
wsClient.close()
This will run for 5 seconds, and outputs:
Price
0 153.13000000
1 151.14000000
2 140.52000000
3 140.52000000
4 152.62000000
-- Socket Closed --

Categories