Python check if list item is in text file - python

output_file = open("processed.txt", "a")
with open('text.json') as json_file:
object_list = json.load(json_file)
receive_txids = []
for object in object_list:
if object['category'] == 'receive':
receive_txids.append(object['txid'])
with open('list.txt', "r") as list_file:
for txid in receive_txids:
if txid not in list_file:
print "for ea txid " + txid
print "NEW TRANSACTION";
output_file.write(txid + '\n')
that's my code
this is list.txt contents
db8cd79b4a0eafc6368bb65d2ff34d7d9c3d2016bee8528ab945a3d4bbad982d
2a93476e65b5500bc3d69856ddd512854d4939f1aabf488ac2806ec346a898a3
45b629a779e6e0bf6d160c37833a27f1f2cc1bfa34632d166cccae83e69eb6fe
bf5b7bc1aeaf7fb43f5d39b549278bee6665872bd74274dd5fad80d043002a3e
1e5f49fa1d0df059b9d7da8452cde9fb5a312c823401f5ed4ed4eafb5f98c1b0
7dc7cd4afcebaf8f17575be8b9acf06adcaadfe7fa5528453246307aa36e6ea0
aefdb89b461c118529bec78b35fed46cc5d7050b39902552fa2408361284c746
ec6abb67828c79cbf0b74131f0acfddc509efc9743bed0811d2316007cdcc482
text.json looks something like this:
[
{
"account" : "",
"address" : "D8xWhR8LqSdSLTxRWwouQ3EiSnvcjLmdo6",
"category" : "receive",
"amount" : 1000.00000000,
"confirmations" : 1963,
"blockhash" : "4569322b4c8c98fba3ef4c7bda91b53b4ee82d268eae2ff7658bc0d3753c00ff",
"blockindex" : 2,
"blocktime" : 1394242415,
"txid" : "45b629a779e6e0bf6d160c37833a27f1f2cc1bfa34632d166cccae83e69eb6fe",
"time" : 1394242265,
"timereceived" : 1394242265
},
{
"account" : "",
"address" : "D8xWhR8LqSdSLTxRWwouQ3EiSnvcjLmdo6",
"category" : "receive",
"amount" : 11.00000000,
"confirmations" : 1194,
"blockhash" : "eff3d32177bf19629fe0f8076807acbb02b34aedcbce1c27a19ce9872daecb7c",
"blockindex" : 6,
"blocktime" : 1394290663,
"txid" : "bf5b7bc1aeaf7fb43f5d39b549278bee6665872bd74274dd5fad80d043002a3e",
"time" : 1394290582,
"timereceived" : 1394290582
},
{
"account" : "",
"address" : "DKLMkLZmiSVXtEavDpQ4dasjZvC178QoM9",
"category" : "receive",
"amount" : 1.00000000,
"confirmations" : 1183,
"blockhash" : "7b5d3ebeb994dbff0940504db9e407bd90cad8a5a1ace05dcba4bc508ca27aff",
"blockindex" : 9,
"blocktime" : 1394291510,
"txid" : "1e5f49fa1d0df059b9d7da8452cde9fb5a312c823401f5ed4ed4eafb5f98c1b0",
"time" : 1394291510,
"timereceived" : 1394291578
},
{
"account" : "",
"address" : "DKLMkLZmiSVXtEavDpQ4dasjZvC178QoM9",
"category" : "receive",
"amount" : 1.00000000,
"confirmations" : 1179,
"blockhash" : "4d9bd6d2988bc749022c41d125f1134796aa314e0d0bde34eba855ad88e76a7f",
"blockindex" : 21,
"blocktime" : 1394291642,
"txid" : "7dc7cd4afcebaf8f17575be8b9acf06adcaadfe7fa5528453246307aa36e6ea0",
"time" : 1394291629,
"timereceived" : 1394291629
},
{
"account" : "",
"address" : "DKLMkLZmiSVXtEavDpQ4dasjZvC178QoM9",
"category" : "receive",
"amount" : 1.00000000,
"confirmations" : 1179,
"blockhash" : "4d9bd6d2988bc749022c41d125f1134796aa314e0d0bde34eba855ad88e76a7f",
"blockindex" : 20,
"blocktime" : 1394291642,
"txid" : "aefdb89b461c118529bec78b35fed46cc5d7050b39902552fa2408361284c746",
"time" : 1394291637,
"timereceived" : 1394291637
},
{
"account" : "",
"address" : "DKLMkLZmiSVXtEavDpQ4dasjZvC178QoM9",
"category" : "receive",
"amount" : 11.00000000,
"confirmations" : 34,
"blockhash" : "df34d9d44e87cd3315755d3e7794b10729fc3f5853c218ec237c43a89d918eb7",
"blockindex" : 5,
"blocktime" : 1394364125,
"txid" : "ec6abb67828c79cbf0b74131f0acfddc509efc9743bed0811d2316007cdcc482",
"time" : 1394348464,
"timereceived" : 1394348464
}
]
I cannot for the life of me find out why this isn't working. It's printing "NEW TRANSACTION" every time it iterates through.
I want to check for each txid (transaction id) in json.txt, if it already exists in list.txt. If not, I want to write it to "processed.txt"

File objects don't support containment tests; you'd need to read your text file.
It'd be easiest for you to put all transaction ids in a set, then remove all txids found in your list.txt using set operations. Whatever is left are new transactions to write to the file:
with open('text.json') as json_file:
recieve_txids = {o['txid'] for o in json.load(json_file) if o['category'] == 'recieve'}
with open('list.txt', "r") as list_file:
recieve_txids -= {l.strip() for l in list_file}
with open('processed.txt', "w") as output_file:
for txid in recieve_txids:
output_file.write(txid + '\n')
If you needed access to the original JSON objects still, use a dictionary, with the txid as the key, then remove all elements from the dictionary found in the file:
with open('text.json') as json_file:
recieve_txids = {o['txid']: o for o in json.load(json_file) if o['category'] == 'recieve'}
with open('list.txt', "r") as list_file:
for line in list_file:
txid = l.strip()
if txid in recieve_txids:
del recieve_txids[txid]

The open() function returns an iterator not a list so you can use the in operator only once. Repeatedly using in on the same iterator only checks an empty list returned by a file already having reached is end.
Furthermore each line still contains the trailing line separator characters so you should use strip('\n\r') to get rid of them.
And to quickly check if an item is in a list you should use a set.
Something like this should work:
transaction_ids = set()
with open('list.txt', 'r') as list_file:
for line in list_file:
transaction_ids.add(line.rstrip('\n\r')
for txid in receive_txids:
if txid not in transaction_ids:
print "NEW TRANSACTION";

Related

Removing items from JSON using Python loop

how do I iterate over the data and keep object keys that have the string "Java" in the value and remove keys with the string "Javascript" in the value? In addition to the iterations I already have in my code. For example:
this key has the word 'Java' in the value.
"value" : "A vulnerability in the encryption implementation of EBICS messages in the open source librairy ebics-java/ebics-java-client allows an attacker sniffing network traffic to decrypt EBICS payloads. This issue affects: ebics-java/ebics-java-client versions prior to 1.2."
the current code below iterates thru other JSON items (that are also needed), but not the Java/Javascript issue.
from encodings import utf_8
import json
from zipfile import ZipFile
from urllib.request import urlretrieve
from io import BytesIO
import os
url = "https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-2022.json.zip"
urlretrieve(url, "nvdcve-1.1-2022.json.zip")
with ZipFile('nvdcve-1.1-2022.json.zip', 'r') as zip:
zip.extractall('.')
with open('nvdcve-1.1-2022.json', encoding='utf-8') as x:
data = json.load(x)
#function to sort through without rewriting code with parameters/arguments passed into the function(variable)
def base_score(metric):
if 'baseMetricV3' not in metric['impact']:
#no values = 0 so it will auto sort by ID
return (0, metric['cve']['CVE_data_meta']['ID'])
#sorts by ID if two or more base scores are equal
return (metric['impact']['baseMetricV3']['cvssV3']['baseScore'], metric['cve']['CVE_data_meta']['ID'])
#return allows assigment of function output to new variable
#direct python to open json file using specific encoding to avoid encoding error
for CVE_Item in data['CVE_Items']:
for node in CVE_Item['configurations']['nodes']:
#removes items while iterating through them
node['cpe_match'][:] = [item for item in node['cpe_match'] if item['vulnerable']]
#also check children objects for vulnerable
if node['children']:
for children_node in node['children']:
children_node['cpe_match'][:] = [item for item in children_node['cpe_match'] if item['vulnerable']]
#sorts data in descending order using reverse
data['CVE_Items'].sort(reverse=True, key=base_score)
#write file to current working directory
with open('sorted_nvdcve-1.1-2022.json', 'w') as new_file:
new_file.write(json.dumps(data, indent=4))
if os.path.exists('nvdcve-1.1-2022.json.zip'):
os.remove('nvdcve-1.1-2022.json.zip')
else:
print("The file does not exist")
if os.path.exists('nvdcve-1.1-2022.json'):
os.remove('nvdcve-1.1-2022.json')
else:
print("The file does not exist")
here is the link to the original JSON file (too large to post entire text here):
https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-2022.json.zip
the key 'value' is located in the 'description' list.
here is a sample of the JSON text:
{
"CVE_data_type" : "CVE",
"CVE_data_format" : "MITRE",
"CVE_data_version" : "4.0",
"CVE_data_numberOfCVEs" : "15972",
"CVE_data_timestamp" : "2022-11-01T07:00Z",
"CVE_Items" : [ {
"cve" : {
"data_type" : "CVE",
"data_format" : "MITRE",
"data_version" : "4.0",
"CVE_data_meta" : {
"ID" : "CVE-2022-0001",
"ASSIGNER" : "secure#intel.com"
},
"problemtype" : {
"problemtype_data" : [ {
"description" : [ {
"lang" : "en",
"value" : "NVD-CWE-noinfo"
} ]
} ]
},
"references" : {
"reference_data" : [ {
"url" : "https://www.intel.com/content/www/us/en/security-center/advisory/intel-sa-00598.html",
"name" : "https://www.intel.com/content/www/us/en/security-center/advisory/intel-sa-00598.html",
"refsource" : "MISC",
"tags" : [ "Vendor Advisory" ]
}, {
"url" : "http://www.openwall.com/lists/oss-security/2022/03/18/2",
"name" : "[oss-security] 20220318 Xen Security Advisory 398 v2 - Multiple speculative security issues",
"refsource" : "MLIST",
"tags" : [ "Mailing List", "Third Party Advisory" ]
}, {
"url" : "https://www.oracle.com/security-alerts/cpujul2022.html",
"name" : "N/A",
"refsource" : "N/A",
"tags" : [ "Patch", "Third Party Advisory" ]
}, {
"url" : "https://security.netapp.com/advisory/ntap-20220818-0004/",
"name" : "https://security.netapp.com/advisory/ntap-20220818-0004/",
"refsource" : "CONFIRM",
"tags" : [ "Third Party Advisory" ]
} ]
},
"description" : {
"description_data" : [ {
"lang" : "en",
"value" : "JavaScript sharing of branch predictor selectors between contexts in some Intel(R) Processors may allow an authorized user to potentially enable information disclosure via local access."
} ]
}
Add this inside the for CVE_Item loop.
CVE_Item['cve']['description']['description_data'] = [
d for d in CVE_Item['cve']['description']['description_data']
if 'Java' in d['value'] and 'JavaScript' not in d['value']]
The modified loop looks like:
for CVE_Item in data['CVE_Items']:
CVE_Item['cve']['description']['description_data'] = [
d for d in CVE_Item['cve']['description']['description_data']
if 'Java' in d['value'] and 'JavaScript' not in d['value']]
for node in CVE_Item['configurations']['nodes']:
#removes items while iterating through them
node['cpe_match'][:] = [item for item in node['cpe_match'] if item['vulnerable']]
#also check children objects for vulnerable
if node['children']:
for children_node in node['children']:
children_node['cpe_match'][:] = [item for item in children_node['cpe_match'] if item['vulnerable']]

How do I parse nested json objects?

I am trying to load a JSON file to parse the contents nested in the root object. Currently I have the JSON file open and loaded as such:
with open(outputFile.name) as f:
data = json.load(f)
For the sake of the question here is an example of what the contents of the JSON file are like:
{
"rootObject" :
{
"person" :
{
"address" : "some place ave. 123",
"age" : 47,
"name" : "Joe"
},
"kids" :
[
{
"age" : 20,
"name" : "Joey",
"studySubject":"math"
},
{
"age" : 16,
"name" : "Josephine",
"studySubject":"chemistry"
}
],
"parents" :
{
"father" : "Joseph",
"mother" : "Joette"
}
How do I access the nested objects in "rootObject", such as "person", "kids" and its contents, and "parents"?
Below code using recursive function can extract values using specific key in a nested dictionary or 'lists of dictionaries':
data = {
"rootObject" :
{
"person" :
{
"address" : "some place ave. 123",
"age" : 47,
"name" : "Joe"
},
"kids" :
[
{
"age" : 20,
"name" : "Joey",
"studySubject":"math"
},
{
"age" : 16,
"name" : "Josephine",
"studySubject":"chemistry"
}
],
"parents" :
{
"father" : "Joseph",
"mother" : "Joette"
}
}}
def get_vals(nested, key):
result = []
if isinstance(nested, list) and nested != []: #non-empty list
for lis in nested:
result.extend(get_vals(lis, key))
elif isinstance(nested, dict) and nested != {}: #non-empty dict
for val in nested.values():
if isinstance(val, (list, dict)): #(list or dict) in dict
result.extend(get_vals(val, key))
if key in nested.keys(): #key found in dict
result.append(nested[key])
return result
get_vals(data, 'person')
Output
[{'address': 'some place ave. 123', 'age': 47, 'name': 'Joe'}]
The code for loading the JSON object should look like this:
from json import loads, load
with open("file.json") as file:
var = loads(load(file))
# loads() transforms the string in a python dict object

Eliminate keys from list of dict python

i am pulling out information from this websites API:
https://financialmodelingprep.com/
to be specific i need the data from the income statements:
https://financialmodelingprep.com/developer/docs/#Company-Financial-Statements
what i get back from the API is a list, which contains 36 dictionarys with the following Data:
[ {
"date" : "2019-09-28",
"symbol" : "AAPL",
"fillingDate" : "2019-10-31 00:00:00",
"acceptedDate" : "2019-10-30 18:12:36",
"period" : "FY",
"revenue" : 260174000000,
"costOfRevenue" : 161782000000,
"grossProfit" : 98392000000,
"grossProfitRatio" : 0.378178,
"researchAndDevelopmentExpenses" : 16217000000,
"generalAndAdministrativeExpenses" : 18245000000,
"sellingAndMarketingExpenses" : 0.0,
"otherExpenses" : 1807000000,
"operatingExpenses" : 34462000000,
"costAndExpenses" : 196244000000,
"interestExpense" : 3576000000,
"depreciationAndAmortization" : 12547000000,
"ebitda" : 81860000000,
"ebitdaratio" : 0.314636,
"operatingIncome" : 63930000000,
"operatingIncomeRatio" : 0.24572,
"totalOtherIncomeExpensesNet" : 422000000,
"incomeBeforeTax" : 65737000000,
"incomeBeforeTaxRatio" : 0.252666,
"incomeTaxExpense" : 10481000000,
"netIncome" : 55256000000,
"netIncomeRatio" : 0.212381,
"eps" : 2.97145,
"epsdiluted" : 2.97145,
"weightedAverageShsOut" : 18595652000,
"weightedAverageShsOutDil" : 18595652000,
"link" : "https://www.sec.gov/Archives/edgar/data/320193/000032019319000119/0000320193-19-000119-index.html",
"finalLink" : "https://www.sec.gov/Archives/edgar/data/320193/000032019319000119/a10-k20199282019.htm"
}, ...
]
What i dont need in the dictionary are the keys:
fillingDate, acceptedDate, link, finalLink
I managed to remove them, but my problem is that now that piece of code i wrote spits out those dictionaries way too often, and i am not able to understand why...
Here is what i tried:
import requests
import json
url = "https://financialmodelingprep.com/api/v3/income-statement/AAPL?apikey=b60bb3d1967bb15bfb9daaa4426e77dc"
response = requests.get(url)
data = response.text
dataList = json.loads(data)
entriesToRemove = {
'fillingDate' : 0,
'acceptedDate' : 0,
'link' : 0,
'finalLink' : 0
}
removedEntries = []
newDict = {}
for index in range(len(dataList)):
for key in dataList[index]:
newDict[key] = dataList[index].get(key)
if key in entriesToRemove:
removedEntries = newDict.pop(key)
print(json.dumps(newDict, indent=4))
Thanks in advance
OP:
for each key in the dictionary, the dictionary gets printed a new time.
Reason:
for index in range(len(dataList)):
for key in dataList[index]:
newDict[key] = dataList[index].get(key)
if key in entriesToRemove:
removedEntries = newDict.pop(key)
print(json.dumps(newDict, indent=4)) # notice this line
The reason why the dictionary is printed for each key is because you have a print(json.dumps(newDict, indent=4)) statement inside the loop for each key-val iteration over the dictionary.
To eradicate the highlighted keys from a list of dict, you could iterate over the list and create another list of dict without the unnecessary keys:
s = [ {
"date" : "2019-09-28",
"symbol" : "AAPL",
"fillingDate" : "2019-10-31 00:00:00",
"acceptedDate" : "2019-10-30 18:12:36",
"period" : "FY",
"revenue" : 260174000000,
"costOfRevenue" : 161782000000,
"grossProfit" : 98392000000,
"grossProfitRatio" : 0.378178,
"researchAndDevelopmentExpenses" : 16217000000,
"generalAndAdministrativeExpenses" : 18245000000,
"sellingAndMarketingExpenses" : 0.0,
"otherExpenses" : 1807000000,
"operatingExpenses" : 34462000000,
"costAndExpenses" : 196244000000,
"interestExpense" : 3576000000,
"depreciationAndAmortization" : 12547000000,
"ebitda" : 81860000000,
"ebitdaratio" : 0.314636,
"operatingIncome" : 63930000000,
"operatingIncomeRatio" : 0.24572,
"totalOtherIncomeExpensesNet" : 422000000,
"incomeBeforeTax" : 65737000000,
"incomeBeforeTaxRatio" : 0.252666,
"incomeTaxExpense" : 10481000000,
"netIncome" : 55256000000,
"netIncomeRatio" : 0.212381,
"eps" : 2.97145,
"epsdiluted" : 2.97145,
"weightedAverageShsOut" : 18595652000,
"weightedAverageShsOutDil" : 18595652000,
"link" : "https://www.sec.gov/Archives/edgar/data/320193/000032019319000119/0000320193-19-000119-index.html",
"finalLink" : "https://www.sec.gov/Archives/edgar/data/320193/000032019319000119/a10-k20199282019.htm"
}
]
res = []
ignored_keys = ['fillingDate', 'acceptedDate', 'link', 'finalLink']
for dd in s:
for k,v in dd.items():
if k not in ignored_keys:
res.append({k: v})
print(res)
EDIT:
one-liner:
print({k:v for dd in s for k,v in dd.items() if k not in ignored_keys})

python searching through dict that contain list of nested dict

I am looking to pull all the "symbol" from a Dict that looks like this:
file_data = json.load(f)
{
"symbolsList" : [ {
"symbol" : "SPY",
"name" : "SPDR S&P 500",
"price" : 261.56,
"exchange" : "NYSE Arca"
}, {
"symbol" : "CMCSA",
"name" : "Comcast Corporation Class A Common Stock",
"price" : 35.49,
"exchange" : "Nasdaq Global Select"
}, {
"symbol" : "KMI",
"name" : "Kinder Morgan Inc.",
"price" : 13.27,
"exchange" : "New York Stock Exchange"
}
}
after looking up I found a way to access certain symbol. but I would like to get all the symbol in a form of list or dict doesn't really matter to me.
this is what I got:
print([next(item for item in file_data["symbolsList"] if item["symbol"] == "SPY")])
I know that the problem is with the next function I just don't know how to get all the symbols
you can use a list comprehension:
[e['symbol'] for e in d['symbolsList']]
output:
['SPY', 'CMCSA', 'KMI']
the same thing using a for loop:
result = []
for e in d['symbolsList']
result.append(e['symbol'])

Scraping different style of Json

I am familiar with scraping data in this format.
{"data":[{"assists":0,"assistsPerGame":0.0000,"evAssists":0,"evPoints":0,"gamesPlayed":1,"goals":0,"penaltyMinutes":0,"playerBirthCity":"Windsor","playerBirthCountry":"CAN","playerBirthDate":"1996-02-07",
import csv
import requests
outfile = open("NHL_Recent.csv","a",newline='')
writer = csv.writer(outfile)
writer.writerow(["Player","Pos","GP","G","A","P","+/-","PIM","PPG","PPP","SHG","SHP","GWG","OTG","S","S%","TOI","Shifts/PG","FOW%"])
req = requests.get('http://www.nhl.com/stats/rest/skaters?isAggregate=true&reportType=basic&isGame=true&reportName=skatersummary&sort=[{%22property%22:%22shots%22,%22direction%22:%22DESC%22}]&cayenneExp=gameDate%3E=%222017-11-4%22%20and%20gameDate%3C=%222017-11-10%22%20and%20gameTypeId=2')
data = req.json()['data']
for item in data:
Player = item['playerName']
Pos = item['playerPositionCode']
GP = item['gamesPlayed']
But not in this manner.
"totalItems" : 600,
"totalEvents" : 0,
"totalGames" : 600,
"totalMatches" : 0,
"wait" : 10,
"dates" : [ {
"date" : "2017-10-04",
"totalItems" : 4,
"totalEvents" : 0,
"totalGames" : 4,
"totalMatches" : 0,
"games" : [ {
"gamePk" : 2017020001,
"link" : "/api/v1/game/2017020001/feed/live",
"gameType" : "R",
"season" : "20172018",
"gameDate" : "2017-10-04T23:00:00Z",
"status" : {
"abstractGameState" : "Final",
"codedGameState" : "7",
"detailedState" : "Final",
"statusCode" : "7",
"startTimeTBD" : false
},
"teams" : {
"away" : {
"leagueRecord" : {
"wins" : 1,
"losses" : 0,
"ot" : 0,
"type" : "league"
},
"score" : 7,
"team" : {
"id" : 10,
"name" : "Toronto Maple Leafs",
"link" : "/api/v1/teams/10",
"venue" : {
"name" : "Air Canada Centre",
"link" : "/api/v1/venues/null",
"city" : "Toronto",
"timeZone" : {
"id" : "America/Toronto",
"offset" : -5,
"tz" : "EST"
}
},
"abbreviation" : "TOR",
"teamName" : "Maple Leafs",
"locationName" : "Toronto",
"firstYearOfPlay" : "1926",
"division" : {
"id" : 17,
"name" : "Atlantic",
"link" : "/api/v1/divisions/17"
},
"conference" : {
"id" : 6,
"name" : "Eastern",
"link" : "/api/v1/conferences/6"
},
"franchise" : {
"franchiseId" : 5,
"teamName" : "Maple Leafs",
"link" : "/api/v1/franchises/5
This is what I have so far with no success.
import csv
import requests
import os
outfile = open("NHL DIF JSON.csv","a",newline='')
writer = csv.writer(outfile)
writer.writerow(["Date","Game","gamep"])
req = requests.get('https://statsapi.web.nhl.com/api/v1/schedule?startDate=2017-10-04&endDate=2018-04-30&expand=schedule.teams,schedule.linescore,schedule.broadcasts.all,schedule.ticket,schedule.game.content.media.epg,schedule.radioBroadcasts,schedule.metadata,schedule.game.seriesSummary,seriesSummary.series&leaderCategories=&leaderGameTypes=R&site=en_nhl&teamId=&gameType=&timecode=')
data = req.json()['dates']
for item in data:
Date = item['date']
##for item in games:
Game = item['0']
gamep = item['gamePk']
print(Date,Game)
writer.writerow([Date,Game,gamep])
outfile.close()
os.system("taskkill /f /im pythonw.exe")
I Would like to pull the "gamePk", "gameDate" from totalGames along with the teamNames within "teams" and other categories. I eventually would like to put that into a csv with the gamePk, gameDate, teams, score, etc. I'm just not sure how to get through the individual categories, any help would be greatly appreciated! Thanks!
It's normal json data, just a bit complicated. You can get the date from data['dates'][i]['date']. For the teams, score, etc you have to iterate over data['dates'][i]['games'].
req = requests.get('https://statsapi.web.nhl.com/api/v1/schedule?startDate=2017-10-04&endDate=2018-04-30&expand=schedule.teams,schedule.linescore,schedule.broadcasts.all,schedule.ticket,schedule.game.content.media.epg,schedule.radioBroadcasts,schedule.metadata,schedule.game.seriesSummary,seriesSummary.series&leaderCategories=&leaderGameTypes=R&site=en_nhl&teamId=&gameType=&timecode=')
data = req.json()
my_data =[]
for item in data['dates']:
date = item['date']
games = item['games']
for game in games:
gamePk = game['gamePk']
gameDate = game['gameDate']
team_away, team_home = game['teams']['away'], game['teams']['home']
team_away_score = team_away['score']
team_home_score = team_home['score']
team_away_name = team_away['team']['name']
team_home_name = team_home['team']['name']
my_data.append([date, gamePk, gameDate, team_away_name, team_home_name, team_away_score, team_home_score])
headers = ["Date","Game","gamep","gameDate","team_away_name","team_home_name","team_away_score","team_home_score"]
with open("my_file.csv", "a", newline='') as f:
writer = csv.writer(f)
writer.writerow(headers)
writer.writerows(my_data)
As for your last question, you can get the 'pk' from data['gameData']['game']['pk']. The player, event, triCode and coordinates values are a little harder to get because some items don't have 'players' and 'team' keys, or the 'coordinates' dict is empty.
In this case the dict.get method can be helpful because it will return None (or you can set a default value) if you try to access a non-existent key.
Still you have to design your code according to the structure of the json data, example:
req = requests.get('https://statsapi.web.nhl.com/api/v1/game/2017020001/feed/live?site=en_nhl')
data = req.json()
my_data = []
pk = data['gameData']['game']['pk']
for item in data['liveData']['plays']['allPlays']:
players = item.get('players')
if players:
player_a = players[0]['player']['fullName'] if len(players) > 0 else None
player_b = players[1]['player']['fullName'] if len(players) > 1 else None
else:
player_a, player_b = None, None
event = item['result']['event']
triCode = item.get('team', {}).get('triCode')
coordinates_x, coordinates_y = item['coordinates'].get('x'), item['coordinates'].get('y')
my_data.append([pk, player_a, player_b, event, triCode, coordinates_x, coordinates_y])
for row in my_data:
print(row)

Categories