Generating Dataframe from JSON URL in a column in another DataFrame - python

I am trying to generate one dataframe based on Json Url in another Dataframe called Data
import requests
import pandas as pd
import numpy as np
resp = requests.get('https://financialmodelingprep.com/api/v3/company/stock/list')
txt = resp.json()
Data = pd.DataFrame(txt['symbolsList'])
Data = Data.assign(keymetric= 'https://financialmodelingprep.com/api/v3/company-key-metrics/'+ Data.symbol + '?period=quarter')
Data = Data.assign(profile= 'https://financialmodelingprep.com/api/v3/company/profile/'+ Data.symbol)
Data = Data.assign(financials= 'https://financialmodelingprep.com/api/v3/financial-statement-growth/'+ Data.symbol + '?period=quarter')
I have 3 problems:
1) when I am downloading the JSON URL in the Dataframe ('Data') I don't have in the output the symbol
in the code below 'AAPL'
resp = requests.get('https://financialmodelingprep.com/api/v3/company-key-metrics/AAPL?period=quarter')
txt = resp.json()
key= pd.DataFrame(txt['metrics'])
2) I don't know how to automate the code above, using as an import the column 'keymetrics' in the dataframe 'Data'
3) once the process is done I am trying to have just one dataframe instead of having one per each symbol
Expected output for keymetrics. Each column should be divided not all aggregated under one column called 'keymetric'

This code can work.
import pandas as pd
import requests
resp = requests.get('https://financialmodelingprep.com/api/v3/company/stock/list')
txt = resp.json()
Data = pd.DataFrame(txt['symbolsList'])
def get_value(symbol):
resp_keymetric = requests.get(f'https://financialmodelingprep.com/api/v3/company-key-metrics/{symbol}?period=quarter')
resp_profile = requests.get(f'https://financialmodelingprep.com/api/v3/company/profile/{symbol}?period=quarter')
resp_financials = requests.get(f'https://financialmodelingprep.com/api/v3/financial-statement-growth/{symbol}?period=quarter')
try:
txt_keymetric = resp_keymetric.json()['metrics'][0]
txt_profile = resp_profile.json()['profile']
txt_financials = resp_financials.json()['growth'][0]
df_keymetric = pd.DataFrame([txt_keymetric])
df_profile = pd.DataFrame([txt_profile])
df_financials = pd.DataFrame([txt_financials])
df = pd.concat([df_keymetric, df_profile, df_financials], axis=1)
return df
except:
pass
result = []
for symbol in Data['symbol'].values.tolist()[:5]:
try:
df = get_value(symbol)
result.append(df)
except:
pass
result_df = pd.concat(result, axis=0)
print(result_df)

Expected output for keymetrics. Each column should be divided not all aggregated under one column called 'keymetric'
current output

Related

Python: How to set up bounds for index

I'm trying to convert json file to excel and modify it.
After normalizing the json and try to add columns I get an error saying index 20 is out of bounds for axis 0 with size 19. However, when I normalize 3 things from JSON I don't get this error but when I normalize just 2 things I get an error.
Here's my code
def get_data(link :str):
resp = requests.get(link) #reading link
txt = resp.json()
data = pd.DataFrame(txt['products']) #data
return txt
def main():
#get json data from link
json = get_data(link = 'https://0f91c5da166bc1b5a70cce01e1f0370c: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#shopeclat.myshopify.com/admin/api/2022-07/products.json')
v = pd.json_normalize(json['products'], record_path =['variants'],meta=['id','title','body_html', 'vendor','product_type','created_at','updated_at','status','image','tags'],record_prefix='varients_')
i = pd.json_normalize(json['products'], record_path =['images'],meta=['id','title','body_html', 'vendor','product_type','created_at','updated_at','status','image','tags'],record_prefix='images_')
#merging all three dataset on id
df = [v,i]
final_df = reduce(lambda left,right: pd.merge(left,right,on=['id'],
how='outer'), df)
print("Exporting csv files ....")
final_df.to_csv('Bound.csv',index = False)
if __name__ == '__main__':
main()
Maybe .explode() is what you want:
import requests
import pandas as pd
url = "https://0f91c5da166bc1b5a70cce01e1f0370c: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#shopeclat.myshopify.com/admin/api/2022-07/products.json"
df = (
pd.DataFrame(requests.get(url).json()["products"])
.explode("variants")
.explode("options")
.explode("images")
)
df = pd.concat(
[
df,
df.pop("variants").apply(pd.Series).add_prefix("v_"),
df.pop("options").apply(pd.Series).add_prefix("o_"),
df.pop("images").apply(pd.Series).add_prefix("imgs_"),
],
axis=1,
)
df.to_csv("out.csv", index=False)
Creates out.csv (screenshot from Libre Office):

Add rows back to the top of a dataframe

I have a raw dataframe that looks like this
I am trying to import this data as a csv, do some calculations on the data, and then export the data. Before doing this, however, I need to remove the three lines of "header information", but keep the data as I will need to add it back to the dataframe prior to exporting. I have done this using the following lines of code:
import pandas as pd
data = pd.read_csv(r"test.csv", header = None)
info = data.iloc[0:3,]
data = data.iloc[3:,]
data.columns = data.iloc[0]
data = data[1:]
data = data.reset_index(drop = True)
The problem I am having is, how do I add the rows stored in "info" back to the top of the dataframe to make the format equivalent to the csv I imported.
Thank you
You can just use the append() function of pandas to merge two data frames. Please check by printing the final_data.
import pandas as pd
data = pd.read_csv(r"test.csv", header = None)
info = data.iloc[0:3,]
data = data.iloc[3:,]
data.columns = data.iloc[0]
data = data[1:]
data = data.reset_index(drop = True)
# Here first row of data is column header so converting back to row
data = data.columns.to_frame().T.append(data, ignore_index=True)
data.columns = range(len(data.columns))
final_data = info.append(data)
final_data = final_data.reset_index(drop = True)

How do I get the groupby operation to work?

Can't get Pandas Groupby operation to work.
I suspect I need to convert the data to a pandas dataframe first? However, I can't seem to get that to work either.
import requests
import json
import pandas as pd
baseurl = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBM-DA0321EN-SkillsNetwork/labs/module%201/datasets/githubposting.json"
response = requests.get(baseurl)
data = response.json()
print(data)
def get_number_of_jobs(technology):
number_of_jobs = 0
number_of_jobs=data.groupby('technology').sum().loc[technology,:][0]
return technology,number_of_jobs
print(get_number_of_jobs('python'))
Thanks
data is a list of dictionaries, not DataFrame, so it doesn't have groupby. You don't really need it anyway, you can create the DataFrame while replacing the A and B columns with the first values in the json response and search for 'Python' there, it's already a single entry
baseurl = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBM-DA0321EN-SkillsNetwork/labs/module%201/datasets/githubposting.json"
response = requests.get(baseurl)
data = response.json()
df = pd.DataFrame(columns=list(data[0].values()), data=[d.values() for d in data[1:]])
number_of_jobs = df.loc[df['technology'] == 'Python', 'number of job posting'].iloc[0]
print(number_of_jobs) # 51

convert json to dataframe in for loops in python

I'm trying to call the data using api and making a dataframe using for loops with returned json. I am able to create the first dataframe but my for loop only returns the first json -> dataframe. After a few days struggle, I decided to ask guidance from experts here..
import requests
import json
import pandas as pd
# create an Empty DataFrame object
df = pd.DataFrame()
# api header
headers = {"Accept": "application/json","Authorization": "api_secret"}
#email for loops
email_list = ["abc#gmail.com", "xyz#gmail.com"]
#supposed to read 2 emails in the list and append each df but only reads the first one...#
for i in email_list:
querystring = {"where":i}
response = requests.request("GET", "https://example.com/api/2.0/export", headers=headers, params=querystring)
with open('test.jsonl', 'w') as writefile:
writefile.write(response.text)
data = [json.loads(line) for line in open('test.jsonl', 'r')]
FIELDS = ["event"]
df = pd.json_normalize(data)[FIELDS]
df = df.append(df)
I wonder if I need to change something in df append but I can't pinpoint where needs to be changed. thank you so much in advance!
df = pd.json_normalize(data)[FIELDS]
df = df.append(df)
overwrites the dataframe each time instead, create a new one before appending:
df2 = pd.json_normalize(data)[FIELDS]
df = df.append(df2)

Extract json data in web page using pd.read_json()?

Trying to extract the table from this page "https://www.hkex.com.hk/Market-Data/Statistics/Consolidated-Reports/Monthly-Bulletin?sc_lang=en#select1=0&select2=28". By inspect/network function of chorme, the data request link is "https://www.hkex.com.hk/eng/stat/smstat/mthbull/rpt_turnover_short_selling_current_month_1910.json?_=1574650413485". This links looks like json format when access directly. However, the codes using this link does not work.
My codes:
import pandas as pd
url="https://www.hkex.com.hk/eng/stat/smstat/mthbull/rpt_turnover_short_selling_current_month_1910.json?_=1574650413485"
df = pd.read_json(url)
print(df.info(verbose=True))
print(df)
also tried:
url="https://www.hkex.com.hk/eng/stat/smstat/mthbull/rpt_turnover_short_selling_current_month_1910.json?"
You can try downloading the json first and then convert it back to DataFrame
import pandas as pd
url='https://www.hkex.com.hk/eng/stat/smstat/mthbull/rpt_turnover_short_selling_current_month_1910.json?_=1574650413485'
import urllib.request, json
with urllib.request.urlopen(url) as r:
data = json.loads(r.read().decode())
df = pd.DataFrame(data['tables'][0]['body'])
columns = [item['text'] for item in data['tables'][0]['header']]
row_count = max(df['row'])
new_df = pd.DataFrame(df.text.values.reshape((row_count,-1)),columns = columns)

Categories