Convert Yahoo Finance List to Dataframe - python

import pandas as pd
import urllib
import time
import sys
baseurl = "https://query.yahooapis.com/v1/public/yql?"
yql_bs_query = 'select * from yahoo.finance.historicaldata where symbol = "YHOO" and startDate = "2009-09-11" and endDate = "2010-03-10"'
yql_bs_url = baseurl + urllib.parse.urlencode({'q':yql_bs_query}) + "&format=json&diagnostics=true&env=store%3A%2F%2Fdatatables.org%2Falltableswithkeys&callback="
bs_json = pd.io.json.read_json(yql_bs_url)
bs_json.values
YHOO = bs_json.values.tolist()
Not able to convert this list in dataframe.

It is converting to a DataFrame but the frame has only 1 column and 5 rows since the form of the JSON is:
{u'query': {u'count': 124,
u'created': u'2017-01-26T05:44:52Z',
u'diagnostics': {u'build-version': u'2.0.84',
...
You just need to download the JSON separately, index in to get the quote data, and then convert that to a DataFrame:
# same code as above here:
import pandas as pd
import urllib
import time
import sys
baseurl = "https://query.yahooapis.com/v1/public/yql?"
yql_bs_query = 'select * from yahoo.finance.historicaldata where symbol = "YHOO" and startDate = "2009-09-11" and endDate = "2010-03-10"'
yql_bs_url = baseurl + urllib.parse.urlencode({'q':yql_bs_query}) + "&format=json&diagnostics=true&env=store%3A%2F%2Fdatatables.org%2Falltableswithkeys&callback="
# now that you have the URL:
import requests
# download json data and convert to dict
data = requests.get(yql_bs_url).json()
# get quote data
quote = data["query"]["results"]["quote"]
# convert to dataframe
quote = pd.DataFrame.from_dict(quote)

Related

How to get remaining number of API requests in search-tweets-python-v2?

I am using search-tweets-python-v2 to search for tweets in a 7 day frame. Specifically, I am using
tweets = collect_results(query,
max_tweets=10000,
result_stream_args=search_args) # change this if you need to
My code:
from datetime import datetime as dt
import pandas as pd
from searchtweets import ResultStream, gen_request_parameters, load_credentials
from searchtweets import collect_results
search_args = load_credentials("twitter_keys.yaml", yaml_key="search_tweets_v2",env_overwrite=False)
query_ts = "dummy query"
query = gen_request_parameters(query_ts, results_per_call=100, granularity = None, start_time='2022-11-16', end_time = '2022-11-22')
tweets = collect_results(query, max_tweets=10000, result_stream_args=search_args)
print(tweets)
How do I get to know how many API calls were made or how many I have remaining?

how to use var from calculations in pandas_gbq to query?

Hi I am trying to do a query with pandas_gbq however I can't call on the variable. How do I do so? I am using the pandas_gbq library
def dataBQ(maxDate):
#load data
dat = pd.read_csv("data/source/rawdat.csv", delimiter=";")
#convert to datetime format
dat['date_key']=pd.to_datetime(dat['date_key'],format='%d/%m/%Y').dt.date
#get latest date
maxDate = dat['date_key'].max()
dataTraffic = """
SELECT *
from
`fileData` where
date_key > {maxDate}
"""
dataBQ = pandas_gbq.read_gbq(dataBQ , project_id=projectId)
how do I do a reference maxdate in the query of dataTraffic?

How to pass a range in a def function

Want to pass a range for the web-scraping function, not sure how it's done. This is to make my code more reusable so that I can scrape different ranges with different dates, say 2016... 2017... 2018... Code looks like this:
import numpy as np
import pandas as pd
import requests
def game_id2017(game_id):
games_played_2017 = []
games_played_2018 = []
print('Getting data...')
for game_id in range():
url = 'https://statsapi.web.nhl.com/api/v1/game/{}/boxscore'.format(game_id)
r_2017 = requests.get(url)
game_data_2017 = r_2017.json()
for homeaway in ['home','away']:
game_dict_2017 = dict()
game_dict_2017['team'] = game_data_2017.get('teams').get(homeaway).get('team').get('name')
game_dict_2017['teamID'] = game_data_2017.get('teams').get(homeaway).get('team').get('id')
game_dict_2017['homeaway'] = homeaway
game_dict_2017['game_id'] = game_id
games_played_2017.append(game_dict_2017)
game_id2017(20170201, 20170210, 1)
TypeError: game_id2017() takes 1 positional argument but 3 were given
game_id2017(*game_id)
for id in game_id:
then use game_id like a list
Pass a list:
import numpy as np
import pandas as pd
import requests
def game_id2017(game_id):
print('Getting data...')
for a_game_id in range(len(game_id)):
# use a_game_id
game_id2017([20170201, 20170210, 1])

Convert pandas dataframe to .hyper extract

I have an SQL output in a pandas dataframe, that I would like to first convert to a .hyper Tableau extract, and then publish to Tableau server via the Extract API. When I run my code(below), I get the error: 'module' object is not callable for tdefile = tableausdk.HyperExtract(outfilename). I believe my code is correct, but maybe modules were installed incorrectly? Has anyone seen this error?
print("Importing modules...")
import pandas as pd
import pyodbc
import re
import numpy as np
import cx_Oracle
import smtplib
import schedule
import time
import win32com.client as win32
import tableauserverclient as TSC
import os
import tableausdk
from pandleau import *
from tableausdk import *
from tableausdk.HyperExtract import *
print("Done importing modules.")
server = x
db = y
conn_sql = pyodbc.connect(#fill in your connection data)
### sql query - change from getdate() - 4 to TD# ##
sql_1 = """
select
* from test
"""
df = pd.read_sql_query(sql_1, conn_sql)
df.head()
def job(df, outfilename):
if os.path.isfile(outfilename):
os.remove(outfilename)
os.remove('DataExtract.log')
try:
tdefile = tableausdk.HyperExtract(outfilename)
except:
#os.remove(outfilename)
os.system('del ' + outfilename)
os.system('del DataExtract.log')
tdefile = tableausdk.HyperExtract(outfilename)
# define the table definition
tableDef = tableausdk.TableDefinition()
# create a list of column names
colnames = df.columns
# create a list of column types
coltypes = df.dtypes
# create a dict for the field maps
# Define type maps
# Caveat: I am not including all of the possibilities here
fieldMap = {
'float64' : tde.Types.Type.DOUBLE,
'float32' : tde.Types.Type.DOUBLE,
'int64' : tde.Types.Type.DOUBLE,
'int32' : tde.Types.Type.DOUBLE,
'object': tde.Types.Type.UNICODE_STRING,
'bool' : tde.Types.Type.BOOLEAN,
'datetime64[ns]': tde.Types.Type.DATE,
}
# for each column, add the appropriate info the Table Definition
for i in range(0, len(colnames)):
cname = colnames[i] #header of column
coltype = coltypes[i] #pandas data type of column
ctype = fieldMap.get(str(coltype)) #get integer field type in Tableau Speak
tableDef.addColumn(cname, ctype)
# add the data to the table
with tdefile as extract:
table = extract.addTable("Extract", tableDef)
for r in range(0, df.shape[0]):
row = tde.Row(tableDef)
for c in range(0, len(coltypes)):
if df.iloc[r,c] is None:
row.setNull(c)
elif str(coltypes[c]) in ('float64', 'float32', 'int64', 'int32'):
try:
row.setDouble(c, df.iloc[r,c])
except:
row.setNull(c)
elif str(coltypes[c]) == 'object':
try:
row.setString(c, df.iloc[r,c])
except:
row.setNull(c)
elif str(coltypes[c]) == 'bool':
row.setBoolean(c, df.iloc[r,c])
elif str(coltypes[c]) == 'datetime64[ns]':
try:
row.setDate(c, df.iloc[r,c].year, df.iloc[r,c].month, df.iloc[r,c].day )
except:
row.setNull
else:
row.setNull(c)
# insert the row
table.insert(row)
tdefile.close()
#df_tableau = pandleau(df_1)
#df_tableau.set_spatial('SpatialDest', indicator=True)
#df_tableau.to_tableau('test.hyper', add_index=False)
job(df, 'test_1.hyper')

Converting Flattened JSON to Dataframe in Python 2.7

I am trying to read some data using REST API and write that on a DB table. I have written the below code. But unfortunately, I am kind of stuck with the flattened JSON. Can you please help with a way to convert JSON to Data frame.
Code
import requests
import json
import pandas
from pandas.io.json import json_normalize
from flatten_json import flatten
j_username = 'ABCD'
j_password = '12456'
query = '"id = 112233445566"'
print query
r=requests.get('Url' % query, auth= (j_username,j_password))
print r.json()
first_response = r.json()
string_data = json.dumps(r.json())
normalized_r = json_normalize(r.json())
print flatten(r.json())
r_flattened = flatten(r.json())
r_flattened_str = json.dumps(flatten(r.json()))
print type (flatten(r.json()))
Flattened JSON Output is as below
{
'data_0_user-35': u'Xyz',
'data_0_user-34': None,
'data_0_user-37': u'CC',
'data_0_user-36': None,
'data_0_user-31': u'Regular',
'data_0_user-33': None,
'data_0_user-32': None,
'data_0_target-rcyc_id': 0101,
'data_0_to-mail': None,
'data_0_closing-version': None,
'data_0_user-44': None,
'data_0_test-reference': None,
'data_0_request-server': None,
'data_0_target-rcyc_type': u'regular type',
'data_0_project': None,
'data_0_user-01': u'Application Name',
'data_0_user-02': None,
'data_0_user-03': None, .......
.......
......
..... }
Expected Output is
data_0_user-35 data_0_user-34 data_0_user-37 .........
XYZ None CC ........
I finally cracked this. This code will read the data from a REST API and convert that into a data frame and eventually write in a Oracle database. Thanks to my friend and some of the wonderful people in the community whose answers helped me to come to this.
import requests
from pandas.io.json import json_normalize
import datetime as dt
import pandas as pd
import cx_Oracle
date = dt.datetime.today().strftime("%Y-%m-%d")
date = "'%s'" % date
query2 = '"creation-time=%s"' % date
r = requests.get('url?query=%s' % query2,
auth=('!username', 'password#'))
response_data_json = r.json()
response_data_normalize = json_normalize(response_data_json['data'])
subset = response_data_normalize.loc[:, ('value1', 'value2')]
Counter = subset['value1'].max()
converted_value = getattr(Counter, "tolist", lambda x=Counter: x)()
frame = pd.DataFrame()
for i in range(2175, converted_value + 1): #2175 is just a reference number to start the comparison from....specific to my work
id = '"id = %s"' % i
r = requests.get('url?&query=%s' % id, auth=('!username', 'password#'))
response_data_json1 = r.json()
response_data_normalize1 = json_normalize(response_data_json1['data'])
sub = response_data_normalize1.loc[:, ('value1', 'value2', 'value3', 'value4')]
frame = frame.append(sub, ignore_index=True)
con = cx_Oracle.connect('USERNAME','PASSWORD',cx_Oracle.makedsn('HOSTNAME',PORTNUMBER,'SERVICENAME'))
cur = con.cursor()
rows = [tuple(x) for x in frame.values]
print rows
cur.executemany('''INSERT INTO TABLENAME(Value1, Value2,Value3,Value4) VALUES (:1,:2,:3,:4)''',rows)
con.commit()
cur.close()
con.close()

Categories