Parsing data in a dict

Parsing data in a dict - python

I have a dict that I am trying to obtain certain data from, an example of this dict is as follows:
{
'totalGames': 1,
'dates': [{
'totalGames': 1,
'totalMatches': 0,
'matches': [],
'totalEvents': 0,
'totalItems': 1,
'games': [{
'status': {
'codedGameState': '7',
'abstractGameState': 'Final',
'startTimeTBD': False,
'detailedState': 'Final',
'statusCode': '7',
},
'season': '20172018',
'gameDate': '2018-05-20T19:00:00Z',
'venue': {'link': '/api/v1/venues/null',
'name': 'Bell MTS Place'},
'gameType': 'P',
'teams': {'home': {'leagueRecord': {'wins': 9,
'losses': 8, 'type': 'league'}, 'score': 1,
'team': {'link': '/api/v1/teams/52',
'id': 52, 'name': 'Winnipeg Jets'}},
'away': {'leagueRecord': {'wins': 12,
'losses': 3, 'type': 'league'}, 'score': 2,
'team': {'link': '/api/v1/teams/54',
'id': 54, 'name': 'Vegas Golden Knights'}}},
'content': {'link': '/api/v1/game/2017030325/content'},
'link': '/api/v1/game/2017030325/feed/live',
'gamePk': 2017030325,
}],
'date': '2018-05-20',
'events': [],
}],
'totalMatches': 0,
'copyright': 'NHL and the NHL Shield are registered trademarks of the National Hockey League. NHL and NHL team marks are the property of the NHL and its teams. \xa9 NHL 2018. All Rights Reserved.',
'totalEvents': 0,
'totalItems': 1,
'wait': 10,
}
I am interested obtaining the score for a certain team if they played that night, for example if my team of interest is the Vegas Golden Knights I would like to create a variable that contains their score (2 in this case). I am completely stuck on this so any help would be greatly appreciated!

This just turns into ugly parsing but is easily doable following the JSON structure; would recommend flattening the structure for your purposes. With that said, if you'd like to find the score of a particular team on a particular date, you could do this:
def find_score_by_team(gamedict, team_of_interest, date_of_interest):
for date in gamedict['dates']:
for game in date['games']:
if game['gameDate'].startswith(date_of_interest):
for advantage in game['teams']:
if game['teams'][advantage]['team']['name'] == team_of_interest:
return game['teams'][advantage]['score']
return -1
Example query:
>>> d = {'totalGames':1,'dates':[{'totalGames':1,'totalMatches':0,'matches':[],'totalEvents':0,'totalItems':1,'games':[{'status':{'codedGameState':'7','abstractGameState':'Final','startTimeTBD':False,'detailedState':'Final','statusCode':'7',},'season':'20172018','gameDate':'2018-05-20T19:00:00Z','venue':{'link':'/api/v1/venues/null','name':'BellMTSPlace'},'gameType':'P','teams':{'home':{'leagueRecord':{'wins':9,'losses':8,'type':'league'},'score':1,'team':{'link':'/api/v1/teams/52','id':52,'name':'WinnipegJets'}},'away':{'leagueRecord':{'wins':12,'losses':3,'type':'league'},'score':2,'team':{'link':'/api/v1/teams/54','id':54,'name':'VegasGoldenKnights'}}},'content':{'link':'/api/v1/game/2017030325/content'},'link':'/api/v1/game/2017030325/feed/live','gamePk':2017030325,}],'date':u'2018-05-20','events':[],}],'totalMatches':0,'copyright':'NHLandtheNHLShieldareregisteredtrademarksoftheNationalHockeyLeague.NHLandNHLteammarksarethepropertyoftheNHLanditsteams.\xa9NHL2018.AllRightsReserved.','totalEvents':0,'totalItems':1,'wait':10,}
>>> find_score_by_team(d, 'VegasGoldenKnights', '2018-05-20')
2
This returns -1 if the team didn't play that night, otherwise it returns the team's score.

Related

Webscrapping a site which contains JSON data

I am working on a site to get the job data from it. The site response does not have full information when I used beautifulsoup. So tried to achieve it using Pandas. Still no luck. Can someone help me here?
import pandas as pd
import requests
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36'}
url = f'https://hirist.com'
# r = requests.get(url, headers, verify=False)
payload = {"pageNo": "1",
"query": "software engineer",
"loc": '17',
"minexp": '0',
"maxexp": '0',
"range": '0',
"boost": '0',
"searchRange": '4',
"searchOp": 'AND',
"jobType": "1"
}
jsonData = requests.post(url, headers=headers,
json=payload, verify=False).json()
df = pd.DataFrame(jsonData)
print(df)

Try the following approach:
import pandas as pd
import requests
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36',
'Referer' : 'https://www.hirist.com/',
'Authorization' : 'Bearer undefined',
'Origin' : 'https://www.hirist.com',
}
payload = {
"pageNo" : "1",
"query" : "software engineer",
"loc" : '17',
"minexp" : '0',
"maxexp" : '0',
"range" : '0',
"boost" : '0',
"searchRange" : '4',
"searchOp" : 'AND',
"jobType" : "1"
}
jsonData = requests.get("https://jobseeker-api.hirist.com/jobfeed/-1/search", headers=headers, params=payload, verify=False).json()
print(jsonData)
Giving you output starting:
{'count': 58, 'jobs': [{'id': 982486, 'title': 'Software Engineer - ASP/C# (1-4 yrs)', 'introText': '<p><p><b>Position : Software Engineer</b><br/><br/><b>Experience : 1- 4 Years</b><br/><br/><b>Job type : Permanent</b><br/><br/><b>Skills Required :</b><br/><br/>- Extensive knowledge in <b>Asp.net, C# and SQL.</b><br/><br/>- Ability to troubleshoot and solve complex technical problems.<br/><br/>- Great interpersonal and communication skills<br/><br/>- Must have good analytical and problem-solving skills.<br/><br/>- Good Time Management and Planning skills.<br/><br/><b>Roles & Responsibility :</b><br/><br/>- Producing clean, efficient code based on specifications.<br/><br/>- Fixing and improving existing software<br/><br/>- Integrate software components and third-party programs<br/><br/>- Verify and deploy programs and systems<br/><br/>- Troubleshoot, debug and upgrade existing software<br/><br/>- Gather and evaluate user feedback<br/><br/>- Recommend and execute improvements<br/><br/>- Create technical documentation for reference and reporting<br/><br/>- Prefer Immediate Joiners</p></p>', 'jobdesignation': 'Software Developer', 'min': 1, 'max': 4, 'createdBy': 93163, 'creatorDomainName': 'sapwood.net', 'categoryId': 1, 'jobDetailUrl': 'https://www.hirist.com/j/software-engineer-aspc-1-4-yrs-982486.html?ref=ambitionbox', 'femaleCandidate': 0, 'differentlyAbled': 0, 'exDefence': 0, 'workFromHome': 0, 'femaleBackWorkForce': 0, 'confidential': 0, 'premium': 0, 'star': 0, 'applyStatus': 1, 'applyCount': 42, 'createdTimeMs': 1643024958613, 'createdTime': 1642982400000, 'createdTimeNoMillis': None, 'tagIdString': '206 387 91 7', 'tags': [{'id': 206, 'name': 'C#'}, {'id': 387, 'name': 'SQL Server'}, {'id': 91, 'name': 'ASP'}, {'id': 7, 'name': '.Net'}], 'locations': [{'id': 70, 'name': 'Cochin/Kochi'}, {'id': 17, 'name': 'Kerala'}], 'showcase': None, 'diversity': None, 'companyStatus': 1, 'createdByAlias': 'Cochin/Kochi/Kerala', 'applyUrl': '', 'videoUrl': '', 'assessmentFlags': 0, 'mediaResume': 0, 'industry': '', 'functionalArea': 18, 'minSal': 1, 'maxSal': 6, 'hits': 373, 'otherLocation': '', 'minBatch': None, 'maxBatch': None, 'brandJobFlag': 0, 'companyDomain': None, 'lableId': None, 'companyData': {'companyId': 0, 'companyName': 'Sapwood Ventures', 'companyNameNotAnalyzed': 'Sapwood Ventures', 'companyStatus': 1, 'logoPath': None}, 'recruiter': {'recruiterId': 93163, 'recruiterName': 'Hemaa R', 'designation': 'Senior Manager - Team & Key Accounts', 'profilePicUrl': '', 'logoPath': '', 'recruiterActions': 34}, 'jobStatusInfo': None, 'location': [{'id': 70, 'name': 'Cochin/Kochi'}, {'id': 17, 'name': 'Kerala'}], 'saved': 0, 'applied': 0}, {'id': 997211, 'title': 'Tetherfi Technologies - Software Engineer - Java/J2EE (3-10 yrs)', 'introText': "<p>The Right Individual :<br/><br/>The ideal candidate will have a passion for technology and software building. Attention to detail and an analytical mind are essential qualities in this role. You will have to work on both technical and design aspects of software projects. A proactive approach to problem-solving as well as a detailed understanding of coding is essential. If finding issues and fixing them with beautiful, meticulous code are among the talents that make you tick, we'd like to hear from you.<br/><br/>Required Functional Skill :<br/><br/>1. 4+ years of experience in java and familiarity in Spring boot, JPA.<br/><br/>2. Extensive Hands-on experience in JAVA Java SE.<br/><br/>3. Well versed with Object Oriented Programming Concepts.<br/><br/>4. Prior experience on JAVA Spring / Spring boot framework.<br/><br/>5. Familiarity with java application servers JBoss, WebLogic.<br/><br/>6. Have in-depth knowledge and self-driven interest to work with JAVA Servlets.<br/><br/>7. Experience in deploying solutions for cross integrations among OEMs in CC or UC environment is preferred.<br/><br/>Role and Responsibilities :<br/><br/>1. Candidate will be part of our Global Delivery center team liaising with Product Strategist and Product Owner to enhance Tetherfi's Products based on Web chat, CC & UC Product Streams.<br/><br/>2. Will develop, enhance and support Tetherfi's existing projects and future projects.<br/><br/>Required Professional & Interpersonal Qualities :<br/><br/>- Bachelor's Degree in appropriate field of study or equivalent work experience.<br/><br/>- Experienced with all ancillary technologies necessary for Internet applications: HTTP, TCP/IP, POP/SMTP, etc.</p>", 'jobdesignation': 'Software Engineer', 'min': 3, 'max': 10, 'createdBy': 72249, 'creatorDomainName': 'tetherfi.com', 'categoryId': 1, 'jobDetailUrl': 'https://www.hirist.com/j/tetherfi-technologies-software-engineer-javaj2ee-997211.html?ref=ambitionbox', 'femaleCandidate': 0, 'differentlyAbled': 0, 'exDefence': 0, 'workFromHome': 0, 'femaleBackWorkForce': 0, 'confidential': 0, 'premium': 0, 'star': 0, 'applyStatus': 1, 'applyCount': 4, 'createdTimeMs': 1645156975704, 'createdTime': 1645142400000, 'createdTimeNoMillis': None, 'tagIdString': '5 2850 25 279 87 237 11100 19', 'tags': [{'id': 5, 'name': 'Java'}, {'id': 2850, 'name': 'Spring Boot'}, {'id': 25, 'name': 'J2EE'}, {'id': 279, 'name': 'Servlets'}, {'id': 87, 'name': 'JBOSS'}, {'id': 237, 'name': 'WebLogic'}, {'id': 11100, 'name': 'Application Server'}, {'id': 19, 'name': 'OOPS'}], 'locations': [{'id': 88, 'name': 'Anywhere in India/Multiple Locations'}, {'id': 3, 'name': 'Bangalore'}, {'id': 6, 'name': 'Chennai'}, {'id': 7, 'name': 'Pune'}, {'id': 17, 'name': 'Kerala'}, {'id': 31, 'name': 'Karnataka'}], 'showcase': None, 'diversity': None, 'companyStatus': 1, 'createdByAlias': 'Anywhere in India/Multiple Locations/Bangalore/Chennai/Pune/Kerala/Karnataka', 'applyUrl': '', 'videoUrl': '', 'assessmentFlags': 0, 'mediaResume': 0, 'industry': '', 'functionalArea': 16, 'minSal': 5, 'maxSal': 14, 'hits': 15, 'otherLocation': '', 'minBatch': None, 'maxBatch': None, 'brandJobFlag': 0, 'companyDomain': None, 'lableId': None, 'companyData': {'companyId': 0, 'companyName': 'Tetherfi Technologies Pvt Ltd', 'companyNameNotAnalyzed': 'Tetherfi Technologies Pvt Ltd', 'companyStatus': 1, 'logoPath': None}, 'recruiter': {'recruiterId': 72249, 'recruiterName': 'Laxman Shenoy', 'designation': 'Deputy Manager HR', 'profilePicUrl': '', 'logoPath': '', 'recruiterActions': 2}, 'jobStatusInfo': None, 'location': [{'id': 88, 'name': 'Anywhere in India/Multiple Locations'}, {'id': 3, 'name': 'Bangalore'}, {'id': 6, 'name': 'Chennai'}, {'id': 7, 'name': 'Pune'}, {'id': 17, 'name': 'Kerala'}, {'id': 31, 'name': 'Karnataka'}], 'saved': 0, 'applied': 0}, {'id': 1003219, 'title': 'Senior Software Engineer - Python/Django (3-8 yrs)', 'introText': "<p><p><p><b>Position / Designation :</b> Software Engineer /Senior Software Engineer<br/><br/><b>Location</b> <b>: </b>Chennai<br/><br/><b>Experience</b> <b>: </b>0-3 years for SE, 3+ years for SSE, <br/><br/><b>CTC : <br/></b><br/>SE - 4 to 6 L.P.A<br/><br/>SSE- 7-11 L.P.A<br/><br/>The ideal candidate is a self-motivated, multi-tasker, and demonstrated team player. You will be a lead developer responsible for the development of new software products and enhancements to existing products. You should excel in working with large-scale applications and frameworks and have outstanding communication and leadership skills. <br/><br/><b>Responsibilities : <br/></b><br/>- Writing clean, high-quality, high-performance, maintainable code<br/><br/>- Develop and support software including applications, database integration, interfaces, and new functionality enhancements.<br/><br/>- Coordinate cross-functionally to ensure the project meets business objectives and compliance standards.<br/><br/>- Support test and deployment of new products and features.<br/><br/>- Participate in code reviews.<br/><br/><b>Qualifications : <br/></b><br/>- Bachelor's degree in Computer Science (or related field)<br/><br/>- 3+ years of work experience in Python, Django.<br/><br/>- Expertise in Object-Oriented Design, Database Design, and XML Schema<br/><br/>- Experience with Agile or Scrum software development methodologies<br/><br/>- Ability to multi-task, organize and prioritize work.</p></p></p>", 'jobdesignation': None, 'min': 3, 'max': 8, 'createdBy': 98899, 'creatorDomainName': 'gmail.com', 'categoryId': 1, 'jobDetailUrl': 'https://www.hirist.com/j/senior-software-engineer-pythondjango-3-8-yrs-1003219.html?ref=ambitionbox', 'femaleCandidate': 1, 'differentlyAbled': 0, 'exDefence': 0, 'workFromHome': 1, 'femaleBackWorkForce': 0, 'confidential': 0, 'premium': 0, 'star': 0, 'applyStatus': 1, 'applyCount': 98, 'createdTimeMs': 1646059583336, 'createdTime': 1646006400000, 'createdTimeNoMillis': None, 'tagIdString': '9 592 50 280 97 30357 3429 4422 11 2339 2807', 'tags': [{'id': 9, 'name': 'Python'}, {'id': 592, 'name': 'Agile'}, {'id': 50, 'name': 'Django'}, {'id': 280, 'name': 'Scrum'}, {'id': 97, 'name': 'XML'}, {'id': 30357, 'name': 'Object Modeling'}, {'id': 3429, 'name': 'Database Schema'}, {'id': 4422, 'name': 'Database Architecture'}, {'id': 11, 'name': 'MySQL'}, {'id': 2339, 'name': 'Python Architect'}, {'id': 2807, 'name': 'PySpark'}], 'locations': [{'id': 3, 'name': 'Bangalore'}, {'id': 6, 'name': 'Chennai'}, {'id': 84, 'name': 'Coimbatore'}, {'id': 17, 'name': 'Kerala'}], 'showcase': None, 'diversity': None, 'companyStatus': 2, 'createdByAlias': 'Bangalore/Chennai/Coimbatore/Kerala', 'applyUrl': '', 'videoUrl': '', 'assessmentFlags': 0, 'mediaResume': 0, 'industry': '0', 'functionalArea': 16, 'minSal': 16, 'maxSal': 31, 'hits': 538, 'otherLocation': '', 'minBatch': None, 'maxBatch': None, 'brandJobFlag': 0, 'companyDomain': None, 'lableId': None, 'companyData': {'companyId': 0, 'companyName': 'AR Consultant', 'companyNameNotAnalyzed': 'AR Consultant', 'companyStatus': 2, 'logoPath': None}, 'recruiter': {'recruiterId': 98899, 'recruiterName': 'Afzal', 'designation': 'Recruiter', 'profilePicUrl': 'https://edgar.hirist.com/media/recruiterpics/2022/01/25/2022-01-25-19-12-23-98899.jpg', 'logoPath': '', 'recruiterActions': 11}, 'jobStatusInfo': None, 'location': [{'id': 3, 'name': 'Bangalore'}, {'id': 6, 'name': 'Chennai'}, {'id': 84, 'name': 'Coimbatore'}, {'id': 17, 'name': 'Kerala'}], 'saved': 0, 'applied': 0}, {'id': 967513, 'title': 'Software Test Engineer - Java/Selenium (0-2 yrs)', 'introText': "<p>Immediate joiners required for a reputed client <br/><br/>Only Male Kerala candidates <br/><br/>Position : Software Test Engineer<br/><br/>Experience : 0-2 years<br/><br/>Job

How to select, map and count data from JSON API with Python?

I am new to Python and am struggling to find the right method for the following:
I have 2 API responses, one is a list of devices, the other one is a list of organizations.
Each device is linked to an organization with an Organization ID.
organizations = [
{
'name': 'Aperture Science Inc.',
'description': 'Just a corporation!',
'id': 1
},
{
'name': 'Software Development Inc',
'description': "Making the world's next best app!",
'id': 2
}
]
devices = [
{
'id': 1,
'organizationId': 2,
'nodeClass': 'WINDOWS_WORKSTATION',
'displayName': 'DESKTOP_01'
},{
'id': 2,
'organizationId': 2,
'nodeClass': 'WINDOWS_SERVER',
'displayName': 'SERVER_01'
},{
'id': 3,
'organizationId': 1,
'nodeClass': 'WINDOWS_WORSTATION',
'displayName': 'DESKTOP_0123'
}
]
The OrganizationID in devices = the id in organizations.
I want to get a result with the number of Servers and workstations respectively for each organizations, like this:
results = [
{
'Organization Name' : 'Aperture Science Inc.',
'Number of Workstations': 1,
'Number of Servers': 0,
'Total devices': 1
},
{
'Organization Name' : 'Software Development Inc',
'Number of Workstations': 1,
'Number of Servers': 1,
'Total devices': 2
}
I started with this
wks_sum = sum(d.nodeClass == "WINDOWS_WORKSTATION" for d in devices)
print(wks_sum)
but I get this error:
AttributeError: 'dict' object has no attribute 'nodeClass'
and at the very end I convert and save in a csv file:
df = pd.DataFrame(results)
df.to_csv('results.csv', index=False)
I am struggling doing the count of each device types and also to map devices to the right organization name and would really appreciate some help :)
EDIT:
Thanks to #Vincent, I could come up with:
for device in devices:
for organization in organizations:
organization["workstations"] = organization.get("workstations", [])
organization["servers"] = organization.get("servers", [])
if device["organizationId"] != organization["id"]:
continue
if device["nodeClass"].__eq__("WINDOWS_SERVER"):
organization["servers"].append(device["nodeClass"])
elif device["nodeClass"].__eq__("WINDOWS_WORKSTATION"):
organization["workstations"].append(device["nodeClass"])
break
results = [
{
"Organization Name": organization["name"],
"Number of Workstations": len(organization["workstations"]),
"Number of Servers": len(organization["servers"]),
"Total devices": len(organization["workstations"] + organization["servers"]),
} for organization in organizations
]
# print(f"{results = }")
print(results)
# convert and save in a csv file
df = pd.DataFrame(results)
df.to_csv('results.csv', index=False)

This code will achieve you goal:
organizations = [
{
'name': 'Aperture Science Inc.',
'description': 'Just a corporation!',
'id': 1
},
{
'name': 'Software Development Inc',
'description': "Making the world's next best app!",
'id': 2
}
]
devices = [
{
'id': 1,
'organizationId': 2,
'nodeClass': 'WINDOWS_WORKSTATION',
'displayName': 'DESKTOP_01'
},{
'id': 2,
'organizationId': 2,
'nodeClass': 'WINDOWS_SERVER',
'displayName': 'SERVER_01'
},{
'id': 3,
'organizationId': 1,
'nodeClass': 'WINDOWS_WORSTATION',
'displayName': 'DESKTOP_0123'
}
]
for device in devices:
for organization in organizations:
organization["workstations"] = organization.get("workstations", [])
organization["servers"] = organization.get("servers", [])
if device["organizationId"] != organization["id"]:
continue
if device["displayName"].startswith("SERVER_"):
organization["servers"].append(device["nodeClass"])
elif device["displayName"].startswith("DESKTOP_"):
organization["workstations"].append(device["nodeClass"])
break
results = [
{
"Organization Name": organization["name"],
"Number of Workstations": len(organization["workstations"]),
"Number of Servers": len(organization["servers"]),
"Total devices": len(organization["workstations"] + organization["servers"]),
} for organization in organizations
]
print(f"{results = }")
Result:
[{'Organization Name': 'Aperture Science Inc.', 'Number of Workstations': 1, 'Number of Servers': 0, 'Total devices': 1}, {'Organization Name': 'Software Development Inc', 'Number of Workstations': 1, 'Number of Servers': 1, 'Total devices': 2}]
Indeed you can do it using obscure lib such as pandas, but I think a good slow code like this is better to know what is done and easier to modify if needed.
To deal with a huge amount of data, you should dump into two sql tables using sqlite3 for example and deal with SQL.

How save a json file in python from api response when the class is a list and object is not serializable

I have tried to find the answer but I could not find it
I am looking for the way to save in my computer a json file from python.
I call the API
configuration = api.Configuration()
configuration.api_key['X-XXXX-Application-ID'] = 'xxxxxxx'
configuration.api_key['X-XXX-Application-Key'] = 'xxxxxxxx1'
## List our parameters as search operators
opts= {
'title': 'Deutsche Bank',
'body': 'fraud',
'language': ['en'],
'published_at_start': 'NOW-7DAYS',
'published_at_end': 'NOW',
'per_page': 1,
'sort_by': 'relevance'
}
try:
## Make a call to the Stories endpoint for stories that meet the criteria of the search operators
api_response = api_instance.list_stories(**opts)
## Print the returned story
pp(api_response.stories)
except ApiException as e:
print('Exception when calling DefaultApi->list_stories: %s\n' % e)
I got the response like this
[{'author': {'avatar_url': None, 'id': 1688440, 'name': 'Pranav Nair'},
'body': 'The law firm will investigate whether the bank or its officials have '
'engaged in securities fraud or unlawful business practices. '
'Industries: Bank Referenced Companies: Deutsche Bank',
'categories': [{'confident': False,
'id': 'IAB11-5',
'level': 2,
'links': {'_self': 'https://,
'parent': 'https://'},
'score': 0.39,
'taxonomy': 'iab-qag'},
{'confident': False,
'id': 'IAB3-12',
'level': 2,
'links': {'_self': 'https://api/v1/classify/taxonomy/iab-qag/IAB3-12',
'score': 0.16,
'taxonomy': 'iab-qag'},
'clusters': [],
'entities': {'body': [{'indices': [[168, 180]],
'links': {'dbpedia': 'http://dbpedia.org/resource/Deutsche_Bank'},
'score': 1.0,
'text': 'Deutsche Bank',
'types': ['Bank',
'Organisation',
'Company',
'Banking',
'Agent']},
{'indices': [[80, 95]],
'links': {'dbpedia': 'http://dbpedia.org/resource/Securities_fraud'},
'score': 1.0,
'text': 'securities fraud',
'types': ['Practice', 'Company']},
'hashtags': ['#DeutscheBank', '#Bank', '#SecuritiesFraud'],
'id': 3004661328,
'keywords': ['Deutsche',
'behalf',
'Bank',
'firm',
'investors',
'Deutsche Bank',
'bank',
'fraud',
'unlawful'],
'language': 'en',
'links': {'canonical': None,
'coverages': '/coverages?story_id=3004661328',
'permalink': 'https://www.snl.com/interactivex/article.aspx?KPLT=7&id=58657069',
'related_stories': '/related_stories?story_id=3004661328'},
'media': [],
'paragraphs_count': 1,
'published_at': datetime.datetime(2020, 5, 19, 16, 8, 5, tzinfo=tzutc()),
'sentences_count': 2,
'sentiment': {'body': {'polarity': 'positive', 'score': 0.599704},
'title': {'polarity': 'neutral', 'score': 0.841333}},
'social_shares_count': {'facebook': [],
'google_plus': [],
'source': {'description': None,
'domain': 'snl.com',
'home_page_url': 'http://www.snl.com/',
'id': 8256,
'links_in_count': None,
'locations': [{'city': 'Charlottesville',
'country': 'US',
'state': 'Virginia'}],
'logo_url': None,
'name': 'SNL Financial',
'scopes': [{'city': None,
'country': 'US',
'level': 'national',
'state': None},
{'city': None,
'country': None,
'level': 'international',
'state': None}],
'title': None},
'summary': {'sentences': ['The law firm will investigate whether the bank or '
'its officials have engaged in securities fraud or '
'unlawful business practices.',
'Industries: Bank Referenced Companies: Deutsche '
'Bank']},
'title': "Law firm to investigate Deutsche Bank's US ops on behalf of "
'investors',
'translations': {'en': None},
'words_count': 26}]
In the documentation says "Stories you retrieve from the API are returned as JSON objects by default. These JSON story objects contain 22 top-level fields, whereas a full story object will contain 95 unique data points"
The class is a list. When I have tried to save json file I have the error "TypeError: Object of type Story is not JSON serializable".
How I can save a json file in my computer?

The response you got is not json, json uses double quotes, but here its single quotes. Copy paste your response in the following link to see the issues
http://json.parser.online.fr/.
If you change it like
[{"author": {"avatar_url": None, "id": 1688440, "name": "Pranav Nair"},
"body": "......
It will work, You can use python json module to do it
import json
json.loads(the_dict_got_from_response).
But it should be the duty of the API provider to, To make it working you can json load the result you got.

How to parse Pyrebase OrderedDict

Pyrebase get() method returns a OrderedDict and I was wondering how would I parse it to get the value.
Here's how and when I use Pyrebase's get() method:
pyre_game = db.child("games/data").order_by_child("id").equal_to(
game_object).limit_to_first(1).get()
And when I call
pyre_game.val()
This is what I get: Here's what I get in the console:
OrderedDict([('-LKYjwhuEMjwadDcfWAl', {'category': 'Main game', 'cover': {'cloudinary_id': 'eohx6zgumfvvjlqgaac6', 'url': '//images.igdb.com/igdb/image/upload/t_thumb/eohx6zgumfvvjlqgaac6.jpg'}, 'developers': [16083], 'first_release_date': 1532563200000, 'genres': [9, 14, 32], 'id': 105176, 'name': 'Arcane Golf', 'platforms': [6], 'release_dates': [{'category': 0, 'date': 1532563200000, 'human': '2018-Jul-26', 'm': 7, 'platform': 6, 'region': 8, 'y': 2018}], 'screenshots': [{'cloudinary_id': 'tgdsmj4ybqndrq9xrxe7', 'url': '//images.igdb.com/igdb/image/upload/t_thumb/tgdsmj4ybqndrq9xrxe7.jpg'}, {'cloudinary_id': 'ryxzsrfw8zrlfa1fwuxz', 'url': '//images.igdb.com/igdb/image/upload/t_thumb/ryxzsrfw8zrlfa1fwuxz.jpg'}, {'cloudinary_id': 'krlxlyg3r46w3mrsrozx', 'url': '//images.igdb.com/igdb/image/upload/t_thumb/krlxlyg3r46w3mrsrozx.jpg'}, {'cloudinary_id': 'xkofnlley4atbqbpc4em', 'url': '//images.igdb.com/igdb/image/upload/t_thumb/xkofnlley4atbqbpc4em.jpg'}, {'cloudinary_id': 'atr178vq39rcksei1bhd', 'url': '//images.igdb.com/igdb/image/upload/t_thumb/atr178vq39rcksei1bhd.jpg'}, {'cloudinary_id': 'qo8znn18apizvlzbzec5', 'url': '//images.igdb.com/igdb/image/upload/t_thumb/qo8znn18apizvlzbzec5.jpg'}], 'summary': 'Arcane Golf is a miniature golf puzzle game set in a fantasy world full of dungeons, dangers, gems, and geometry. Play across 200 levels set in 4 unique courses inspired by classic adventure games!', 'updated_at': 1533116562596, 'videos': [{'name': 'Trailer', 'video_id': 'khDsYapla0M'}], 'websites': [{'category': 8, 'url': 'https://www.instagram.com/gold5games'}, {'category': 5, 'url': 'https://twitter.com/Gold5Games'}, {'category': 13, 'url': 'https://store.steampowered.com/app/897800'}]})])
How would I go to parse to get the value. The value is everything inside {} It starts with a category object

Hope this will work
for x in pyre_game.each():
print( x.key(), x.val() )

you can also alternatively typecast the OrderedDict and use it like a dictionary.
pyre_game = dict(db.child("games/data").order_by_child(game_object).limit_to_first(1).get().val())
print(pyre_game)

Looping through Get Request in Python

I am trying to scrape some ticketing inventory info using Stubhub's API, but I cannot seem to figure out how to loop through the get request.
I basically want to loop through multiple events. The eventid_list is a list of eventids. The code I have is below:
inventory_url = 'https://api.stubhub.com/search/inventory/v2'
for eventid in eventid_list:
data = {'eventid': eventid, 'rows':500}
inventory = requests.get(inventory_url, headers=headers, params=data)
inv = inventory.json()
print(inv)
listing_df = pd.DataFrame(inv['listing'])
When I run this, the dataframe only returns results for one event, instead of multiple. What am I doing wrong?
EDIT: print(inv) outputs something like this:
{
'eventId': 102994860,
'totalListings': 82,
'totalTickets': 236,
'minQuantity': 1,
'maxQuantity': 6,
'listing': [
{
'listingId': 1297697413,
'currentPrice': {'amount': 108.58, 'currency': 'USD'},
'listingPrice': {'amount': 88.4, 'currency': 'USD'},
'sectionId': 1638686,
'row': 'E',
'quantity': 6,
'sellerSectionName': 'FRONT MEZZANINE RIGHT',
'sectionName': 'Front Mezzanine Sides',
'seatNumbers': '2,4,6,8,10,12',
'zoneId': 240236,
'zoneName': 'Front Mezzanine',
'deliveryTypeList': [5],
'deliveryMethodList': [23, 24, 25],
'isGA': 0,
'dirtyTicketInd': False,
'splitOption': '2',
'ticketSplit': '1',
'splitVector': [1, 2, 3, 4, 6],
'sellerOwnInd': 0,
'score': 0.0
},
...
{
'listingId': 1297697417,
'currentPrice': {'amount': 108.58, 'currency': 'USD'},
'listingPrice': {'amount': 88.4, 'currency': 'USD'},
'sectionId': 1638686,
'row': 'D',
'quantity': 3,
'sellerSectionName': 'FRONT MEZZANINE RIGHT',
'sectionName': 'Front Mezzanine Sides',
'seatNumbers': '2,4,6',
'zoneId': 240236,
'zoneName': 'Front Mezzanine',
'deliveryTypeList': [5],
'deliveryMethodList': [23, 24, 25],
'isGA': 0,
'dirtyTicketInd': False,
'splitOption': '2',
'ticketSplit': '1',
'splitVector': [1, 3],
'sellerOwnInd': 0,
'score': 0.0
},
]
}

I'm guessing inventory.json()['listing'] is a list of events. If so, you can try this:
inventory_url = 'https://api.stubhub.com/search/inventory/v2'
def get_event(eventid):
"""Given an event id returns inventory['listing']"""
data = {'eventid': eventid, 'rows':500}
inventory = requests.get(inventory_url, headers=headers, params=data)
return inventory.json().get('listing', [])
# Concatenate output of all events
events = itertools.flatten(get_event(eventid) for eventid in eventid_list)
listing_df = pd.DataFrame(list(events))
This is just a starting point, you will have to deal with cases where inventory.statos_code != 200. The result probably is not very useful, so you may have to flat some of the attributes for the listing items line currentPrice and listingPrice:

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Parsing data in a dict - python

Related

Webscrapping a site which contains JSON data

How to select, map and count data from JSON API with Python?

How save a json file in python from api response when the class is a list and object is not serializable

How to parse Pyrebase OrderedDict

Looping through Get Request in Python

Categories

Resources