I'm pulling data from the NHL API for player stats based on individual games. I'm trying to make a loop that calls the data, parses the JSON, creates a dict which I then can create a data frame from for an entire team. The code before my looping looks like this:
API_URL = "https://statsapi.web.nhl.com/api/v1"
response = requests.get(API_URL + "/people/8477956/stats?stats=gameLog", params={"Content-Type": "application/json"})
data = json.loads(response.text)
df_list_dict = []
for game in data['stats'][0]['splits']:
curr_dict = game['stat']
curr_dict['date'] = game['date']
curr_dict['isHome'] = game['isHome']
curr_dict['isWin'] = game['isWin']
curr_dict['isOT'] = game['isOT']
curr_dict['team'] = game['team']['name']
curr_dict['opponent'] = game['opponent']['name']
df_list_dict.append(curr_dict)
df = pd.DataFrame.from_dict(df_list_dict)
print(df)
This gives me a digestible data frame for a single player. (/people/{player}/....
I want to iterate through a list (the list being an NHL team), while adding a column that identifies the player and concatenates the created data frames. My attempt thus far looks like this:
import requests
import json
import pandas as pd
Rangers = ['8478550', '8476459', '8479323', '8476389', '8475184', '8480817', '8480078', '8476624', '8481554', '8482109', '8476918', '8476885', '8479324',
'8482073', '8479328', '8480833', '8478104', '8477846', '8477380', '8477380', '8477433', '8479333', '8479991']
def callapi(player):
response = (requests.get(f'https://statsapi.web.nhl.com/api/v1/people/{player}/stats?stats=gameLog', params={"Content-Type": "application/json"}))
data = json.loads(response.text)
df_list_dict = []
for game in data['stats'][0]['splits']:
curr_dict = game['stat']
curr_dict['date'] = game['date']
curr_dict['isHome'] = game['isHome']
curr_dict['isWin'] = game['isWin']
curr_dict['isOT'] = game['isOT']
curr_dict['team'] = game['team']['name']
curr_dict['opponent'] = game['opponent']['name']
df_list_dict.append(curr_dict)
df = pd.DataFrame.from_dict(df_list_dict)
print(df)
for player in Rangers:
callapi(player)
print(callapi)
When this is printed I can see all the data frames that were created. I cannot use curr_dict[] to add a column based on the list position (the player ID) because must be a slice or integer, not string.
What I'm hoping to do is make this one data frame in which the stats are identified by a player id column.
My python knowledge is very scattered, I feel as if with the progress I've made I should know how to complete this but I've simply hit a wall. Any help would be appreciated.
You can use concurrent.futures to parallelize the requests before concatenating them all together, and json_normalize to parse the json.
import concurrent.futures
import json
import os
import pandas as pd
import requests
class Scrape:
def main(self) -> pd.DataFrame:
rangers = ["8478550", "8476459", "8479323", "8476389", "8475184", "8480817", "8480078",
"8476624", "8481554", "8482109", "8476918", "8476885", "8479324", "8482073",
"8479328", "8480833", "8478104", "8477846", "8477380", "8477380", "8477433",
"8479333", "8479991"]
with concurrent.futures.ProcessPoolExecutor(max_workers=os.cpu_count()) as executor:
return pd.concat(executor.map(self.get_stats, rangers)).reset_index(drop=True).fillna(0)
#staticmethod
def get_stats(player: str) -> pd.DataFrame:
url = f"https://statsapi.web.nhl.com/api/v1/people/{player}/stats?stats=gameLog"
with requests.Session() as request:
response = request.get(url, timeout=30)
if response.status_code != 200:
print(response.raise_for_status())
data = json.loads(response.text)
df = (pd.
json_normalize(data=data, record_path=["stats", "splits"])
.rename(columns={"team.id": "team_id", "team.name": "team_name",
"opponent.id": "opponent_id", "opponent.name": "opponent_name"})
).assign(player_id=player)
df = df[df.columns.drop(list(df.filter(regex="link|gamePk")))]
df.columns = df.columns.str.split(".").str[-1]
if "faceOffPct" not in df.columns:
df["faceOffPct"] = 0
return df
if __name__ == "__main__":
stats = Scrape().main()
print(stats)
I am using search-tweets-python-v2 to search for tweets in a 7 day frame. Specifically, I am using
tweets = collect_results(query,
max_tweets=10000,
result_stream_args=search_args) # change this if you need to
My code:
from datetime import datetime as dt
import pandas as pd
from searchtweets import ResultStream, gen_request_parameters, load_credentials
from searchtweets import collect_results
search_args = load_credentials("twitter_keys.yaml", yaml_key="search_tweets_v2",env_overwrite=False)
query_ts = "dummy query"
query = gen_request_parameters(query_ts, results_per_call=100, granularity = None, start_time='2022-11-16', end_time = '2022-11-22')
tweets = collect_results(query, max_tweets=10000, result_stream_args=search_args)
print(tweets)
How do I get to know how many API calls were made or how many I have remaining?
I have a question about rate limits.
I take a data from the CSV and enter it into the query and the output is stored in a list.
I get an error because I make too many requests at once.
(I can only make 20 requests per second). How can I determine the rate limit?
import requests
import pandas as pd
df = pd.read_csv("Data_1000.csv")
list = []
def requestSummonerData(summonerName, APIKey):
URL = "https://euw1.api.riotgames.com/lol/summoner/v3/summoners/by-name/" + summonerName + "?api_key=" + APIKey
response = requests.get(URL)
return response.json()
def main():
APIKey = (str)(input('Copy and paste your API Key here: '))
for index, row in df.iterrows():
summonerName = row['Player_Name']
responseJSON = requestSummonerData(summonerName, APIKey)
ID = responseJSON ['accountId']
ID = int(ID)
list.insert(index,ID)
df["accountId"]= list
If you already know you can only make 20 requests per second, you just need to work out how long to wait between each request:
Divide 1 second by 20, which should give you 0.05. So you just need to sleep for 0.05 of a second between each request and you shouldn't hit the limit (maybe increase it a bit if you want to be safe).
import time at the top of your file and then time.sleep(0.05) inside of your for loop (you could also just do time.sleep(1/20))
The following link provides data in JSON regarding a BTC adress -> https://blockchain.info/address/1GA9RVZHuEE8zm4ooMTiqLicfnvymhzRVm?format=json.
The bitcoin adress can be viewed here --> https://blockchain.info/address/1GA9RVZHuEE8zm4ooMTiqLicfnvymhzRVm
As you can see in the first transaction on 2014-10-20 19:14:22, the TX had 10 inputs from 10 adresses. I want to retreive these adresses using the API, but been struggling to get this to work. The following code only retrieves the first adress instead of all 10, see code. I know it has to do with the JSON structure, but I cant figure it out.
import json
import urllib2
import sys
#Random BTC adress (user input)
btc_adress = ("1GA9RVZHuEE8zm4ooMTiqLicfnvymhzRVm")
#API call to blockchain
url = "https://blockchain.info/address/"+(btc_adress)+"?format=json"
json_obj = urllib2.urlopen(url)
data = json.load(json_obj)
#Put tx's into a list
txs_list = []
for txs in data["txs"]:
txs_list.append(txs)
#Cut the list down to 5 recent transactions
listcutter = len(txs_list)
if listcutter >= 5:
del txs_list[5:listcutter]
# Get number of inputs for tx
recent_tx_1 = txs_list[1]
total_inputs_tx_1 = len(recent_tx_1["inputs"])
The block below needs to put all 10 input adresses in the list 'Output_adress'. It only does so for the first one;
output_adress = []
output_adress.append(recent_tx_1["inputs"][0]["prev_out"]["addr"])
print output_adress
Your help is always appreciated, thanks in advance.
Because you only add one address to it. Change it to this:
output_adress = []
for i in xrange(len(recent_tx_1["inputs"])):
output_adress.append(recent_tx_1["inputs"][i]["prev_out"]["addr"])
print output_adress
I am using google search api
But by default it shows 4 and maximum 8 results per page. I want more results per page.
Add the rsz=8 parameter to this google search demonstration code,
then use the start=... parameter to control which group of results you receive.
This, for example, gives you 50 results:
import urllib
import json
import sys
import itertools
def hits(astr):
for start in itertools.count():
query = urllib.urlencode({'q':astr, 'rsz': 8, 'start': start*8})
url = 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&%s'%(query)
search_results = urllib.urlopen(url)
results = json.loads(search_results.read())
data = results['responseData']
if data:
hits = data['results']
for h in hits:
yield h['url']
else:
raise StopIteration
def showmore(astr,num):
for i,h in enumerate(itertools.islice(hits(astr),num)):
print('{i}: {h}'.format(i=i,h=h))
if __name__=='__main__':
showmore(sys.argv[1],50)