Retrieving data from a json object - python

I am writing a parser to extract the list of ads
response = requests.get(url).json()
items = response['data']
iter1 = []
for item in items:
iter1.append({
'name': item.get('name', 'NA'),
'owner': item.get('owner', 'NA'),
'date_published': item.get('date_published', 'NA'),
'images': item.get('images', 'NA'),
'short_url': item.get('short_url', 'NA')
})
At the moment, I get the following output. I need to make my conclusion shorter.
[
{
"name": "Announcement name",
"owner": {
"id": "58f84949700743"
},
"date_published": 1627666233,
"images": [
{
"id": "58fb7032ca5544fb5a2",
"num": 1,
"url": "https://cache3.com/images/orig/58/fb/58fb70f2132a554804fb5a2.jpg",
"width": 1936,
"height": 2581
},
{
"id": "58fb70f29e94ba0384507554",
"num": 2,
"url": "https://cache3.com/images/orig/58/fb/58fb70f29e94b384507554.jpg",
"width": 750,
"height": 1334
},
{
"id": "58fb70f2f8efdc109d76c2e5",
"num": 3,
"url": "https://cache3.com/images/orig/58/fb/58fb70f2fdc109d76c2e5.jpg",
"width": 750,
"height": 1334
}
],
"short_url": "https://short.com/p58gb7b9a4c80320f03"
}
]
I would like to bring to the form:
"name": "Announcement name", #Name
"id": "58f84949700743" #Owner ID
"date_published": 1627666233, #Date
"url": "https://cache3.com/images/orig/58/fb/58fb70f2132a554804fb5a2.jpg",#Url-img
"short_url": "https://short.com/p58gb7b9a4c80320f03" #Announcement url
How can I extract information from owner{.id} and images[.url] ?

dict=[ { "name": "Announcement name", "owner": { "id": "58f84949700743" }, "date_published": 1627666233, "images": [ { "id": "58fb7032ca5544fb5a2", "num": 1, "url": "https://cache3.com/images/orig/58/fb/58fb70f2132a554804fb5a2.jpg", "width": 1936, "height": 2581 }, { "id": "58fb70f29e94ba0384507554", "num": 2, "url": "https://cache3.com/images/orig/58/fb/58fb70f29e94b384507554.jpg", "width": 750, "height": 1334 }, { "id": "58fb70f2f8efdc109d76c2e5", "num": 3, "url": "https://cache3.com/images/orig/58/fb/58fb70f2fdc109d76c2e5.jpg", "width": 750, "height": 1334 } ], "short_url": "https://short.com/p58gb7b9a4c80320f03" } ]
result = {}
result["name"] = dict[0].get("name", 'NA')
result["id"] = dict[0].get('owner', {}).get('id', 'NA')
result["date_published"] = dict[0].get("date_published", 'NA')
result["url"] = []
result["short_url"] = dict[0].get("short_url", 'NA')
for img in dict[0].get("images", []):
if "url" in img:
result["url"].append(img["url"])
print(result)

You could replace:-
'owner': item.get('owner', 'NA'),
...with...
'id': item.get('owner', {}).get('id', 'NA'),

You could do it by only extracting the information you want:
items = response['data']
iter1 = []
for item in items:
iter1.append({
'name': item.get('name', 'NA'),
'id': item.get('owner', {}).get('id', 'NA'),
'date_published': item.get('date_published', 'NA'),
'urls': [entry.get('url', 'NA') for entry in item.get('images', [])],
'short_url': item.get('short_url', 'NA')
})
Result:
[{'name': 'Announcement name',
'id': '58f84949700743',
'date_published': 1627666233,
'urls': ['https://cache3.com/images/orig/58/fb/58fb70f2132a554804fb5a2.jpg',
'https://cache3.com/images/orig/58/fb/58fb70f29e94b384507554.jpg',
'https://cache3.com/images/orig/58/fb/58fb70f2fdc109d76c2e5.jpg'],
'short_url': 'https://short.com/p58gb7b9a4c80320f03'}]

Related

I'm trying to scrape data from a website using Scrapy. What's wrong with my code?

I'm trying to scrape data from https://www.premierleague.com/players. On the webpage, there are a list of players. I used an xpath expression response.xpath('//td/a/#href').getall() to get a list of relative urls for each player. I then iterated over the list of the relative urls and merged them with the homepage to get a variable called "absolute_url " which looks like this for one of the players "https://www.premierleague.com" + "/players/63289/Brenden-Aaronson/overview" https://www.premierleague.com/players/63289/Brenden-Aaronson/overview. I tested the xpath on scrapy shell and they produce the desired output on the scrapy shell...at least for the overview pages of players I tested. Where am I going wrong?
import scrapy
from urllib.parse import urljoin
class PlStatsSpider(scrapy.Spider):
name = 'pl_stats'
allowed_domains = ['premierleague.com']
start_urls = ['http://premierleague.com']
def parse(self, response):
url = 'http://premierleague.com'
for link in response.xpath('//td/a/#href').getall():
absolute_url = urljoin(url, link) #merging relative url
yield response.follow(absolute_url, callback=self.parse_players)
def parse_players(self, response):
yield {
'Name': response.xpath('//h1/div[#class="name t-colour"]/text()').get(),
'DOB': response.xpath('//div[#class="personalLists"]//div[#class="info"]/text()')[3].get().strip(),
'Height': response.xpath('//div[#class="personalLists"]//div[#class="info"]/text()')[5].get(),
'Club': response.xpath('//div[#class="info"]/a/text()').get().strip(),
'Weight': response.xpath('//div[#class="personalLists"]//div[#class="info"]/text()')[6].get(),
'Position': response.xpath('//section[#class="sideWidget playerIntro t2-topBorder"]//div[#class="info"]/text()')[2].get(),
'Nationality': response.xpath('//span[#class="playerCountry"]/text()').get()}
Most of your xpaths are a little too ambiguous for the many different pages you are trying to scrape data from. All of the player pages have slight variations that make extracting data using positional indexing nearly impossible. Additionally not every field is available for every player, such as the position and club. What you could do for those fields is iterate through their section elements and grabing all of the 'label','info' pairs and match whatever is available to your output.
For example:
import scrapy
class PlStatsSpider(scrapy.Spider):
name = 'pl_stats'
allowed_domains = ['premierleague.com']
start_urls = ['https://www.premierleague.com/players']
def parse(self, response):
for link in response.xpath('//td/a/#href').getall():
yield scrapy.Request(response.urljoin(link), callback=self.parse_players)
def parse_players(self, response):
section = response.xpath("//section[contains(#class,'sideWidget playerIntro')]")
info = {'label': [], 'info': []}
for classval in info.keys():
idents = section.xpath(f"./div[#class='{classval}']//text()").getall()
idents = set([i.strip() for i in idents if i.strip()])
info[classval] = list(idents)
item = {k.title(): v for k,v in zip(info['label'], info['info'])}
item.update({
'Name': response.xpath('//div[#class="name t-colour"]/text()').get(),
'DOB': response.xpath('//ul[#class="pdcol2"]//div[#class="info"]/text()').get().strip(),
'Height': response.xpath('//ul[#class="pdcol3"]/li/div[#class="info"]/text()').get(),
'Weight': response.xpath('//ul[#class="pdcol3"]/li[#class="u-hide"]/div[#class="info"]/text()').get(),
'Nationality': response.xpath('//span[#class="playerCountry"]/text()').get()
})
yield item
This is the json file that was produced after calling scrapy crawl pl_stats -o players.json.
[
{
"Position": "Defender",
"Name": "Max Aarons",
"DOB": "04/01/2000",
"Height": "178cm",
"Weight": null,
"Nationality": "England"
},
{
"Position": "Forward",
"Club": "Manchester City",
"Name": "Juli\u00e1n \u00c1lvarez",
"DOB": "31/01/2000",
"Height": "170cm",
"Weight": "71kg",
"Nationality": "Argentina"
},
{
"Position": "Defender",
"Club": "Leicester City",
"Name": "Daniel Amartey",
"DOB": "21/12/1994",
"Height": "186cm",
"Weight": "79kg",
"Nationality": "Ghana"
},
{
"Position": "Forward",
"Name": "Will Alves",
"DOB": "04/05/2005",
"Height": null,
"Weight": null,
"Nationality": "England"
},
{
"Position": "Midfielder",
"Club": "Brighton and Hove Albion",
"Name": "Steven Alzate",
"DOB": "08/09/1998",
"Height": "180cm",
"Weight": "75kg",
"Nationality": "Colombia"
},
{
"Position": "Defender",
"Name": "Marcos Alonso",
"DOB": "28/12/1990",
"Height": "188cm",
"Weight": null,
"Nationality": "Spain"
},
{
"Position": "Midfielder",
"Name": "Jaime Alvarado",
"DOB": "26/07/1999",
"Height": "179cm",
"Weight": null,
"Nationality": "Colombia"
},
{
"Position": "Midfielder",
"Club": "Newcastle United",
"Name": "Miguel Almir\u00f3n",
"DOB": "10/02/1994",
"Height": "174cm",
"Weight": "70kg",
"Nationality": "Paraguay"
},
{
"Position": "Goalkeeper",
"Name": "\u00c1lvaro Fern\u00e1ndez",
"DOB": "13/04/1998",
"Height": "185cm",
"Weight": null,
"Nationality": "Spain"
},
{
"Position": "Midfielder",
"Club": "Everton",
"Name": "Allan",
"DOB": "08/01/1991",
"Height": "173cm",
"Weight": "73kg",
"Nationality": "Brazil"
},
{
"Position": "Goalkeeper",
"Club": "Liverpool",
"Name": "Alisson",
"DOB": "02/10/1992",
"Height": "191cm",
"Weight": "91kg",
"Nationality": "Brazil"
},
{
"Position": "Defender",
"Name": "Ezgjan Alioski",
"DOB": "12/02/1992",
"Height": "173cm",
"Weight": null,
"Nationality": "North Macedonia"
},
{
"Position": "Midfielder",
"Name": "Dele Alli",
"DOB": "11/04/1996",
"Height": "188cm",
"Weight": null,
"Nationality": "England"
},
{
"Position": "Defender",
"Name": "Alex Telles",
"DOB": "15/12/1992",
"Height": "181cm",
"Weight": null,
"Nationality": "Brazil"
},
{
"Position": "Defender",
"Club": "Liverpool",
"Name": "Trent Alexander-Arnold",
"DOB": "07/10/1998",
"Height": "175cm",
"Weight": "69kg",
"Nationality": "England"
},
{
"Position": "Defender",
"Name": "Ajibola Alese",
"DOB": "17/01/2001",
"Height": null,
"Weight": null,
"Nationality": "England"
},
{
"Position": "Defender",
"Name": "Toby Alderweireld",
"DOB": "02/03/1989",
"Height": "186cm",
"Weight": null,
"Nationality": "Belgium"
},
{
"Position": "Defender",
"Club": "Manchester City",
"Name": "Nathan Ak\u00e9",
"DOB": "18/02/1995",
"Height": "180cm",
"Weight": "75kg",
"Nationality": "Netherlands"
},
{
"Position": "Defender",
"Club": "Brentford",
"Name": "Kristoffer Ajer",
"DOB": "17/04/1998",
"Height": "198cm",
"Weight": "92kg",
"Nationality": "Norway"
},
{
"Position": "Midfielder",
"Club": "Leicester City",
"Name": "Marc Albrighton",
"DOB": "18/11/1989",
"Height": "175cm",
"Weight": "74kg",
"Nationality": "England"
},
{
"Position": "Defender",
"Club": "Wolverhampton Wanderers",
"Name": "Rayan A\u00eft-Nouri",
"DOB": "06/06/2001",
"Height": "179cm",
"Weight": "70kg",
"Nationality": "France"
},
{
"Position": "Defender",
"Name": "Ryan Alebiosu",
"DOB": "17/12/2001",
"Height": null,
"Weight": null,
"Nationality": "England"
},
{
"Position": "Defender",
"Name": "Ahmed El Mohamady",
"DOB": "09/09/1987",
"Height": "183cm",
"Weight": null,
"Nationality": "Egypt"
},
{
"Position": "Defender",
"Name": "Derek Agyakwa",
"DOB": "19/12/2001",
"Height": null,
"Weight": null,
"Nationality": "Netherlands"
},
{
"Position": "Forward",
"Name": "Sergio Ag\u00fcero",
"DOB": "02/06/1988",
"Height": "173cm",
"Weight": null,
"Nationality": "Argentina"
},
{
"Position": "Defender",
"Name": "Tayo Adaramola",
"DOB": "14/11/2003",
"Height": null,
"Weight": null,
"Nationality": "Ireland"
},
{
"Position": "Goalkeeper",
"Club": "Liverpool",
"Name": "Adri\u00e1n",
"DOB": "03/01/1987",
"Height": "190cm",
"Weight": "80kg",
"Nationality": "Spain"
},
{
"Position": "Southampton",
"Club": "Forward",
"Name": "Che Adams",
"DOB": "13/07/1996",
"Height": "175cm",
"Weight": "70kg",
"Nationality": "Scotland"
},
{
"Position": "Southampton",
"Club": "Forward",
"Name": "Adam Armstrong",
"DOB": "10/02/1997",
"Height": "174cm",
"Weight": "69kg",
"Nationality": "England"
},
{
"Position": "Forward",
"Name": "Tammy Abraham",
"DOB": "02/10/1997",
"Height": "190cm",
"Weight": null,
"Nationality": "England"
}
]

How to compare two dictionaries and print if one of the values are above zero

I have been trying to work with a JSON object where I have been trying to get values from two different keys. What I want to do is to check if in object 1 contains in object 2 and has the value over 0 then I want to print it out.
get_json = json.dumps({
"attributes": {
"203": {
"id": "203",
"code": "sizefootwear_conf",
"label": "EU",
"options": [{
"id": "6320",
"label": "38",
"products": ["69813"]
},
{
"id": "6351",
"label": "38,5",
"products": ["69817"]
},
{
"id": "6335",
"label": "39",
"products": ["69818"]
},
{
"id": "6354",
"label": "40",
"products": ["69819"]
},
{
"id": "6338",
"label": "40,5",
"products": ["69820"]
},
{
"id": "6357",
"label": "41",
"products": ["69821"]
},
{
"id": "6326",
"label": "42",
"products": ["69822"]
},
{
"id": "6362",
"label": "42,5",
"products": ["69823"]
},
{
"id": "6341",
"label": "43",
"products": ["69824"]
},
{
"id": "6365",
"label": "44",
"products": ["69814"]
},
{
"id": "6344",
"label": "44,5",
"products": ["69815"]
},
{
"id": "6370",
"label": "45,5",
"products": ["69816"]
}
],
"position": "0"
},
"205": {
"id": "205",
"code": "sizefootwearus_conf",
"label": "US",
"options": [{
"id": "6319",
"label": "5,5",
"products": ["69813"]
},
{
"id": "6372",
"label": "6,0",
"products": ["69817"]
},
{
"id": "6334",
"label": "6,5",
"products": ["69818"]
},
{
"id": "6350",
"label": "7,0",
"products": ["69819"]
},
{
"id": "6337",
"label": "7,5",
"products": ["69820"]
},
{
"id": "6353",
"label": "8,0",
"products": ["69821"]
},
{
"id": "6325",
"label": "8,5",
"products": ["69822"]
},
{
"id": "6356",
"label": "9,0",
"products": ["69823"]
},
{
"id": "6340",
"label": "9,5",
"products": ["69824"]
},
{
"id": "6364",
"label": "10,0",
"products": ["69814"]
},
{
"id": "6343",
"label": "10,5",
"products": ["69815"]
},
{
"id": "6328",
"label": "11,5",
"products": ["69816"]
}
],
"position": "1"
},
"204": {
"id": "204",
"code": "sizefootwearuk_conf",
"label": "UK",
"options": [{
"id": "6318",
"label": "5,0",
"products": ["69813"]
},
{
"id": "6352",
"label": "5,5",
"products": ["69817"]
},
{
"id": "6743",
"label": "6,0-EU39",
"products": ["69818"]
},
{
"id": "6744",
"label": "6,0-EU40",
"products": ["69819"]
},
{
"id": "6355",
"label": "6,5",
"products": ["69820"]
},
{
"id": "6336",
"label": "7,0",
"products": ["69821"]
},
{
"id": "6361",
"label": "7,5",
"products": ["69822"]
},
{
"id": "6324",
"label": "8,0",
"products": ["69823"]
},
{
"id": "6363",
"label": "8,5",
"products": ["69824"]
},
{
"id": "6339",
"label": "9,0",
"products": ["69814"]
},
{
"id": "6366",
"label": "9,5",
"products": ["69815"]
},
{
"id": "6369",
"label": "10,5",
"products": ["69816"]
}
],
"position": "2"
}
},
"productStockAlert": {
"entity": "69825",
"map": {
"203": {
"label": "52,5",
"": "",
"6610": "6610",
"6498": "6498",
"6582": "6582",
"6516": "6516",
"6501": "6501",
"6518": "6518",
"6504": "6504",
"6395": "6395",
"6404": "6404",
"6533": "6533",
"6407": "6407",
"6530": "6530",
"6410": "6410",
"6413": "6413",
"6416": "6416",
"6534": "6534",
"6419": "6419",
"6422": "6422",
"6425": "6425",
"6398": "6398",
"6401": "6401",
"6531": "6531",
"6431": "6431",
"6443": "6443",
"6446": "6446",
"6495": "6495",
"6449": "6449",
"6452": "6452",
"6455": "6455",
"6458": "6458",
"6461": "6461",
"6807": "6807",
"6464": "6464",
"6434": "6434",
"6437": "6437",
"6558": "6558",
"6440": "6440",
"6480": "6480",
"6481": "6481",
"6382": "6382",
"6465": "6465",
"6631": "6631",
"6332": "6332",
"6466": "6466",
"6348": "6348",
"6634": "6634",
"6320": "6320",
"6351": "6351",
"6384": "6384",
"6659": "6659",
"6335": "6335",
"6388": "6388",
"6508": "6508",
"6354": "6354",
"6338": "6338",
"6389": "6389",
"6664": "6664",
"6357": "6357",
"6390": "6390",
"6506": "6506",
"6637": "6637",
"6326": "6326",
"6362": "6362",
"6391": "6391",
"6640": "6640",
"6341": "6341",
"6392": "6392",
"6560": "6560",
"6365": "6365",
"6344": "6344",
"6385": "6385",
"6838": "6838",
"6368": "6368",
"6386": "6386",
"6370": "6370",
"6643": "6643",
"6628": "6628",
"6329": "6329",
"6529": "6529",
"6387": "6387",
"6843": "6843",
"6347": "6347",
"6470": "6470",
"6360": "6360",
"6646": "6646",
"6472": "6472",
"6323": "6323",
"6564": "6564",
"6593": "6593",
"6474": "6474",
"6376": "6376",
"6565": "6565",
"6561": "6561",
"6567": "6567",
"6604": "6604",
"6607": "6607"
},
"205": {
"label": "18,0",
"": "",
"6513": "6513",
"6497": "6497",
"6583": "6583",
"6500": "6500",
"6821": "6821",
"6503": "6503",
"6532": "6532",
"6394": "6394",
"6403": "6403",
"6406": "6406",
"6409": "6409",
"6412": "6412",
"6415": "6415",
"6418": "6418",
"6421": "6421",
"6424": "6424",
"6397": "6397",
"6400": "6400",
"6430": "6430",
"6442": "6442",
"6445": "6445",
"6448": "6448",
"6451": "6451",
"6454": "6454",
"6457": "6457",
"6460": "6460",
"6463": "6463",
"6433": "6433",
"6436": "6436",
"6439": "6439",
"6555": "6555",
"6468": "6468",
"6507": "6507",
"6632": "6632",
"6331": "6331",
"6319": "6319",
"6635": "6635",
"6372": "6372",
"6334": "6334",
"6661": "6661",
"6350": "6350",
"6337": "6337",
"6663": "6663",
"6353": "6353",
"6619": "6619",
"6325": "6325",
"6621": "6621",
"6638": "6638",
"6356": "6356",
"6340": "6340",
"6623": "6623",
"6641": "6641",
"6364": "6364",
"6343": "6343",
"6625": "6625",
"6840": "6840",
"6367": "6367",
"6328": "6328",
"6644": "6644",
"6371": "6371",
"6346": "6346",
"6842": "6842",
"6359": "6359",
"6322": "6322",
"6647": "6647",
"6373": "6373",
"6566": "6566",
"6375": "6375",
"6562": "6562",
"6605": "6605",
"6608": "6608"
},
"204": {
"label": "17,0",
"": "",
"6611": "6611",
"6514": "6514",
"6496": "6496",
"6515": "6515",
"6499": "6499",
"6517": "6517",
"6502": "6502",
"6393": "6393",
"6505": "6505",
"6402": "6402",
"6405": "6405",
"6408": "6408",
"6411": "6411",
"6414": "6414",
"6417": "6417",
"6420": "6420",
"6423": "6423",
"6396": "6396",
"6399": "6399",
"6429": "6429",
"6745": "6745",
"6441": "6441",
"6444": "6444",
"6447": "6447",
"6450": "6450",
"6453": "6453",
"6456": "6456",
"6459": "6459",
"6462": "6462",
"6432": "6432",
"6435": "6435",
"6438": "6438",
"6467": "6467",
"6381": "6381",
"6633": "6633",
"6330": "6330",
"6349": "6349",
"6636": "6636",
"6318": "6318",
"6352": "6352",
"6660": "6660",
"6333": "6333",
"6743": "6743",
"6744": "6744",
"6355": "6355",
"6662": "6662",
"6336": "6336",
"6620": "6620",
"6361": "6361",
"6622": "6622",
"6639": "6639",
"6324": "6324",
"6363": "6363",
"6624": "6624",
"6642": "6642",
"6339": "6339",
"6366": "6366",
"6626": "6626",
"6839": "6839",
"6342": "6342",
"6627": "6627",
"6369": "6369",
"6645": "6645",
"6327": "6327",
"6358": "6358",
"6841": "6841",
"6345": "6345",
"6471": "6471",
"6648": "6648",
"6321": "6321",
"6473": "6473",
"6374": "6374",
"6563": "6563",
"6606": "6606",
"6609": "6609"
}
},
"child": {
"6320_6319_6318_": {
"entity": "69813",
"stock_number": 0,
"stock_status": false,
"productId": "69813",
"parent_url": "https://www.bstn.com/eu_en/p/jordan-jordan-why-not-zer0-4-dd4889-006-0250549"
},
"6365_6364_6339_": {
"entity": "69814",
"stock_number": 5,
"stock_status": true,
"productId": "69814",
"parent_url": "https://www.bstn.com/eu_en/p/jordan-jordan-why-not-zer0-4-dd4889-006-0250549"
},
"6344_6343_6366_": {
"entity": "69815",
"stock_number": 3,
"stock_status": true,
"productId": "69815",
"parent_url": "https://www.bstn.com/eu_en/p/jordan-jordan-why-not-zer0-4-dd4889-006-0250549"
},
"6370_6328_6369_": {
"entity": "69816",
"stock_number": 1,
"stock_status": true,
"productId": "69816",
"parent_url": "https://www.bstn.com/eu_en/p/jordan-jordan-why-not-zer0-4-dd4889-006-0250549"
},
"6351_6372_6352_": {
"entity": "69817",
"stock_number": 0,
"stock_status": false,
"productId": "69817",
"parent_url": "https://www.bstn.com/eu_en/p/jordan-jordan-why-not-zer0-4-dd4889-006-0250549"
},
"6335_6334_6743_": {
"entity": "69818",
"stock_number": 0,
"stock_status": false,
"productId": "69818",
"parent_url": "https://www.bstn.com/eu_en/p/jordan-jordan-why-not-zer0-4-dd4889-006-0250549"
},
"6354_6350_6744_": {
"entity": "69819",
"stock_number": 0,
"stock_status": false,
"productId": "69819",
"parent_url": "https://www.bstn.com/eu_en/p/jordan-jordan-why-not-zer0-4-dd4889-006-0250549"
},
"6338_6337_6355_": {
"entity": "69820",
"stock_number": 0,
"stock_status": false,
"productId": "69820",
"parent_url": "https://www.bstn.com/eu_en/p/jordan-jordan-why-not-zer0-4-dd4889-006-0250549"
},
"6357_6353_6336_": {
"entity": "69821",
"stock_number": 3,
"stock_status": true,
"productId": "69821",
"parent_url": "https://www.bstn.com/eu_en/p/jordan-jordan-why-not-zer0-4-dd4889-006-0250549"
},
"6326_6325_6361_": {
"entity": "69822",
"stock_number": 4,
"stock_status": true,
"productId": "69822",
"parent_url": "https://www.bstn.com/eu_en/p/jordan-jordan-why-not-zer0-4-dd4889-006-0250549"
},
"6362_6356_6324_": {
"entity": "69823",
"stock_number": 6,
"stock_status": true,
"productId": "69823",
"parent_url": "https://www.bstn.com/eu_en/p/jordan-jordan-why-not-zer0-4-dd4889-006-0250549"
},
"6341_6340_6363_": {
"entity": "69824",
"stock_number": 6,
"stock_status": true,
"productId": "69824",
"parent_url": "https://www.bstn.com/eu_en/p/jordan-jordan-why-not-zer0-4-dd4889-006-0250549"
}
}
}
}
)
So what I did is that I created two dicts within a list:
first_loop = []
second_loop = []
total_stock = 0
for idx, sizes in json_value["attributes"].items():
for getId in sizes["options"]:
first_loop.append({getId["label"]: getId["products"][0]})
break
for idx, test in json_value["productStockAlert"]["child"].items():
total_stock += test["stock_number"]
second_loop.append({test["productId"]: test["stock_number"]})
print("first_loop", first_loop)
print("second_loop", second_loop)
print("total_stock", total_stock)
which returns:
first_loop [{'38': '69813'}, {'38,5': '69817'}, {'39': '69818'}, {'40': '69819'}, {'40,5': '69820'}, {'41': '69821'}, {'42': '69822'}, {'42,5': '69823'}, {'43': '69824'}, {'44': '69814'}, {'44,5': '69815'}, {'45,5': '69816'}]
second_loop [{'69813': 0}, {'69814': 5}, {'69815': 3}, {'69816': 1}, {'69817': 0}, {'69818': 0}, {'69819': 0}, {'69820': 0}, {'69821': 3}, {'69822': 4}, {'69823': 6}, {'69824': 6}]
total_stock 28
My issue is how can I compare from first_loop where I check the ID (etc 69816) is in second_loop and has the value above 0, if its above 0 then I want to add it to a append it to a new list etc: 45,5 (1) (Which is the size number from first_loop and the number (value) from second_loop.
Output would end up:
>>> ["41 (3)", "42 (4)", "42,5 (6)", "43 (6)", "44 (5)", "44,5 (3)", "45,5 (1)"]
Basically, you just need to create id-label mapping, id-count mapping and merge them:
id_label_mapping = {o["products"][0]: o["label"] for o in next(iter(json_value["attributes"].values()))["options"]}
id_count_mapping = {o["productId"]: o["stock_number"] for o in json_value["productStockAlert"]["child"].values()}
result = [f"{l} ({id_count_mapping[k]})" for k, l in id_label_mapping.items() if id_count_mapping.get(k)]
In your code you've done 2 major mistakes which makes implementation of last step (merging) much harder.
You're creating list of dicts instead of single dict with different keys;
In first_loop you're using label as key, but in second_loop you're using productId.
If we will fix this 2 gaps, your code will work:
first_loop = {}
second_loop = {}
total_stock = 0
for idx, sizes in json_value["attributes"].items():
for getId in sizes["options"]:
first_loop[getId["products"][0]] = getId["label"]
break
for idx, test in json_value["productStockAlert"]["child"].items():
total_stock += test["stock_number"]
second_loop[test["productId"]] = test["stock_number"]
result = []
for product_id, label in first_loop.items():
count = second_loop.get(product_id)
if count: # filters both None (key doesn't exit) and 0
result.append(f"{label} ({count})")
print("result", result)
print("total_stock", total_stock)
Not sure if it's the most efficient way, but you could:
make dicts not lists, does it need to be a list?
swap the key-value of the first_loop
intersect the sets
get the values from original, print only if > 0
[Code not tested]
first_loop = {}
second_loop = {}
total_stock = 0
for idx, sizes in json_value["attributes"].items():
for getId in sizes["options"]:
first_loop[getId["products"][0]] = getId["label"]
break
for idx, test in json_value["productStockAlert"]["child"].items():
total_stock += test["stock_number"]
second_loop[test["productId"]] = test["stock_number"]
matching = set(first_loop.keys()).intersection(second_loop.keys())
for prod_id in matching:
stock = second_loop.get(prod_id)
if stock > 0:
print(f"{first_loop.get(prod_id) ({stock})")
Lastly, you have a break statement, that will make it run only one time... In which case you do not need a for loop...
It's quite too case specific, but hope it helps...

Expanding a dictionary using dot notation

I have the following json document:
{
"id": "5c26321bd8f4113d43b91141",
"idMemberCreator": "5b203bc7e47d817a8138bc37",
"data": {
"list": {
"name": "Sorji for QA",
"id": "5b0a2543b89acdbdb85f7b42"
},
"board": {
"shortLink": "iyCzZ5jx",
"name": "FlicksIO",
"id": "5b0a251f68a9e74b8ec3b3ac"
},
"card": {
"shortLink": "vOt2vO7v",
"idShort": 92,
"name": "New column in main for Storefront provider correlation.",
"id": "5b9c0023533f7c26424ea4ed",
"closed": true
},
"old": {
"closed": false
}
},
"type": "updateCard",
"date": "2018-12-28T14:24:27.455Z",
"limits": {},
"memberCreator": {
"id": "5b203bc7e47d817a8138bc37",
"avatarHash": "73bfa48c76c3c92615fe89ff79a6c5ae",
"avatarUrl": "https://trello-avatars.s3.amazonaws.com/73bfa48f79a6c5ae",
"fullName": "Marie Bond",
"idMemberReferrer": null,
"initials": "MB",
"username": "mb"
}
}
I would like to expand this out to be a single level with dot notation. That is, it should look like:
{
"id": "5c26321bd8f4113d43b91141",
"idMemberCreator": "5b203bc7e47d817a8138bc37",
"data.list.name": "Sorji for QA",
"data.list.id": "5b0a2543b89acdbdb85f7b42"
"data.board.shortLink": "iyCzZ5jx",
"data.board.name": "FlicksIO",
"data.board.id": "5b0a251f68a9e74b8ec3b3ac"
"data.card.shortLink": "vOt2vO7v",
"data.card.idShort": 92,
"data.card.name": "New column in main for Storefront provider correlation.",
"data.card.id": "5b9c0023533f7c26424ea4ed",
"data.card.closed": true
"data.old.closed": false
"type": "updateCard",
"date": "2018-12-28T14:24:27.455Z",
"limits": {},
"memberCreator.id": "5b203bc7e47d817a8138bc37",
"memberCreator.avatarHash": "73bfa48c76c3c92615fe89ff79a6c5ae",
"memberCreator.avatarUrl": "https://trello-avatars.s3.amazonaws.com/73bfa48f79a6c5ae",
"memberCreator.fullName": "Marie Bond",
"memberCreator.idMemberReferrer": null,
"memberCreator.initials": "MB",
"memberCreator.username": "mb"
}
Would it be possible to do this with a generator object? I've been working a lot on recursion today, and have been trying to move from while loops to using generator objects and yields, etc.
You can keep a parameter in the signature of the recursive function to store the paths:
data = {'id': '5c26321bd8f4113d43b91141', 'idMemberCreator': '5b203bc7e47d817a8138bc37', 'data': {'list': {'name': 'Sorji for QA', 'id': '5b0a2543b89acdbdb85f7b42'}, 'board': {'shortLink': 'iyCzZ5jx', 'name': 'FlicksIO', 'id': '5b0a251f68a9e74b8ec3b3ac'}, 'card': {'shortLink': 'vOt2vO7v', 'idShort': 92, 'name': 'New column in main for Storefront provider correlation.', 'id': '5b9c0023533f7c26424ea4ed', 'closed': True}, 'old': {'closed': False}}, 'type': 'updateCard', 'date': '2018-12-28T14:24:27.455Z', 'limits': {}, 'memberCreator': {'id': '5b203bc7e47d817a8138bc37', 'avatarHash': '73bfa48c76c3c92615fe89ff79a6c5ae', 'avatarUrl': 'https://trello-avatars.s3.amazonaws.com/73bfa48f79a6c5ae', 'fullName': 'Marie Bond', 'idMemberReferrer': None, 'initials': 'MB', 'username': 'mb'}}
def dot_paths(d, _paths = []):
for a, b in d.items():
if not b or not isinstance(b, dict):
yield ['.'.join(_paths+[a]), b]
else:
yield from dot_paths(b, _paths+[a])
import json
print(json.dumps(dict(dot_paths(data)), indent=4))
Output:
{
"id": "5c26321bd8f4113d43b91141",
"idMemberCreator": "5b203bc7e47d817a8138bc37",
"data.list.name": "Sorji for QA",
"data.list.id": "5b0a2543b89acdbdb85f7b42",
"data.board.shortLink": "iyCzZ5jx",
"data.board.name": "FlicksIO",
"data.board.id": "5b0a251f68a9e74b8ec3b3ac",
"data.card.shortLink": "vOt2vO7v",
"data.card.idShort": 92,
"data.card.name": "New column in main for Storefront provider correlation.",
"data.card.id": "5b9c0023533f7c26424ea4ed",
"data.card.closed": true,
"data.old.closed": false,
"type": "updateCard",
"date": "2018-12-28T14:24:27.455Z",
"limits": {},
"memberCreator.id": "5b203bc7e47d817a8138bc37",
"memberCreator.avatarHash": "73bfa48c76c3c92615fe89ff79a6c5ae",
"memberCreator.avatarUrl": "https://trello-avatars.s3.amazonaws.com/73bfa48f79a6c5ae",
"memberCreator.fullName": "Marie Bond",
"memberCreator.idMemberReferrer": null,
"memberCreator.initials": "MB",
"memberCreator.username": "mb"
}

Create Dictionaries from part of the json File

I am trying to create two seperate dictionaries, one that only holds the car information, and the second, which just holds the ticket information.
{
"cars": [{
"model": "toyota",
"plate": "A11",
"tickets": [{
"amount": 50,
"type": "A1"
},
{
"amount": 34,
"type": "A2"
}
]
},
{
"model": "mazda",
"plate": "A11",
"tickets": [{
"amount": 50,
"type": "A1"
},
{
"amount": 34,
"type": "A2"
}
]
}
]
}
import json
with open('jsonfile', 'r') as data:
cars_dict = json.load(data)
Then the loop to generate the two separate dicts. I created the loop, but still not achieving the result properly.
Desired output will be:
dict_cars = [{'Model':'Toyota', 'Plate':'A11'},
{'Model':'Mazda', 'Plate':'A13'}]
or
dict_cars = [{'Model':'Toyota', 'Plate':'A11', Tickets[......]},
{'Model':'Mazda', 'Plate':'A13'}, Tickets[......]]
dict_tickets = [{'amount:50',type:'A1'},
{'amount:34',type:'A2'},
{'amount:50',type:'A1'},
{'amount:34',type:'A2'}]
simply loop through the json object and parse the value:
d = {
"dict_cars": [{
"model": "toyota",
"plate": "A11",
"tickets": [{
"amount": 50,
"type": "A1"
},
{
"amount": 34,
"type": "A2"
}
]
},
{
"model": "mazda",
"plate": "A11",
"tickets": [{
"amount": 50,
"type": "A1"
},
{
"amount": 34,
"type": "A2"
}
]
}
]
}
dict_cars=[]
dict_tickets=[]
for i,j in d.items():
for k in j:
dict_cars.append({k.get('model',''):k.get("plate","")})
for t in k.get('tickets',""):
dict_tickets.append(t)
print("dict_cars = ",dict_cars)
print("dict_tickets = ",dict_tickets)
out
dict_cars = [{'toyota': 'A11'}, {'mazda': 'A11'}]
dict_tickets = [{'amount': 50, 'type': 'A1'}, {'amount': 34, 'type': 'A2'}, {'amount': 50, 'type': 'A1'}, {'amount': 34, 'type': 'A2'}]
import json
with open('file.json', 'r') as data:
text = data.read()
cars = json.loads(text)['cars']
tickets = []
for arr in cars:
for item in arr['tickets']:
tickets.append(item)
del arr['tickets']
print(tickets)
print(cars)
output:
[{u'amount': 50, u'type': u'A1'}, {u'amount': 34, u'type': u'A2'}, {u'amount': 50, u'type': u'A1'}, {u'amount': 34, u'type': u'A2'}]
[{u'plate': u'A11', u'model': u'toyota'}, {u'plate': u'A11', u'model': u'mazda'}]

Json format for python

I'm rewriting a view based on what I know the final output should be in json but it's returning the dictionary as a string.
new output
{
"results":
["
{
'plot': u'',
'runtime': u'N/A',
'description': u'x',
'videos': [
{
'id': 823,
'name': u'x',
'youtube_id': u'FtcubOnXgZk'
}
],
'country': u'India',
'writer': u'Neetu Varma, Ranjeev Verma',
'name': u'Chalk N Duster',
'id': 940,
'director': u'Jayant Gilatar',
'hot': True,
'content': u'x',
'actors': u'Shabana Azmi, Arya Babbar, Gavie Chahal, Juhi Chawla',
'year': 2015,
'images': [
{'small': '/media/cache/62/fd/62fd5158d281c042e3cf1f919183e94e.jpg', 'medium': '/media/cache/5e/32/5e32ebb1a4d25bba0d0c70b4b448e948.jpg'}],
'trailer_youtube_id': u'FtcubOnXgZk',
'type': 'movie',
'slug': u'chalk-n-duster',
'categories': [{'parent_id': 2, 'id': 226, 'name': u'Drama'}],
'shows': {
'starts': '2016-01-16',
'booking_url': u'',
'venue': {
'address': u'',
'id': 854,
'name': u'Nyali Cinemax',
'area': {
'id': 52,
'parent': {
'id': 48,
'name': u'Mombasa'
},
'name': u'Nyali'
}
},
'starts_time': '18:30:00'
}
}", "{'plot': u'' ....
old output
"results": [
{
"actors": "x",
"categories": [
{
"id": 299,
"name": "Biography",
"parent_id": 2
},
],
"content": "x",
"country": "x",
"description": "x",
"director": "x",
"hot": true,
"id": 912,
"images": [
{
"medium": "/media/cache/d2/b3/d2b3a7885e7c39bfc5c2b297b66619c5.jpg",
"small": "/media/cache/e2/d0/e2d01b2c7c77d3590536666de4a7fd7d.jpg"
}
],
"name": "Bridge of Spies",
"plot": "x",
"runtime": "141 min",
"shows": [
{
"booking_url": "",
"starts": "2015-11-27",
"starts_time": "16:30:00",
"venue": {
"address": "The Junction Shopping Mall",
"area": {
"id": 68,
"name": "Ngong Road",
"parent": {
"id": 2,
"name": "Nairobi"
}
},
"id": 1631,
"name": "Century Cinemax Junction"
}
},
],
"slug": "bridge-of-spies",
"trailer_youtube_id": "",
"type": "movie",
"videos": [
{
"id": "795",
"name": "Bridge of Spies",
"youtube_id": "2-2x3r1m2I4"
}
],
"writer": "Matt Charman, Ethan Coen, Joel Coen",
"year": 2015
}, ...
]
Here's the view, I know the shows should also be a list, but in order to start testing I'll need the data to come in the right format. If it's involves too much rewriting I'm okay with links and explanation.
#memoize(timeout=60*60)
def movies_json():
today = datetime.date.today()
movies = Movie.objects.filter(shows__starts__gte=today)
results = []
number = len(movies)
for movie in movies:
print "Now Remaining: {0}".format(number)
number -= 1
medium = get_thumbnail(movie.picture(), '185x274', crop='center', quality=99).url
small = get_thumbnail(movie.picture(), '50x74', crop='center', quality=99).url
movie_details = {
'director':movie.director,
'plot':movie.plot,
'actors':movie.actors,
'content':movie.content,
'country':movie.country,
'description':movie.description,
'hot':movie.hot,
'id':movie.id,
'images':[{'medium':medium, 'small':small}],
'name':movie.name,
'plot':movie.plot,
'runtime':movie.runtime,
'slug':movie.slug,
'type':'movie',
'writer':movie.writer,
'year':movie.year,
}
youtube_details = movie.videos.filter(youtube_id__isnull=False)[0]
movie_details['trailer_youtube_id'] = youtube_details.youtube_id if youtube_details.youtube_id else ""
movie_details['videos'] = [
{
'id':youtube_details.id,
'name':movie.name,
'youtube_id':youtube_details.youtube_id,
}
]
shows = []
for show in movie.shows.all():
show_details = {
'booking_url':show.booking_url,
'starts':show.starts.isoformat(),
'starts_time':show.starts_time.isoformat(),
'venue': {
'address':show.venue.address,
'area': {
'id': show.venue.area.id,
'name': show.venue.area.name,
'parent': {
'id': show.venue.area.parent.id,
'name': show.venue.area.parent.name,
}
},
'id': show.venue.id,
'name': show.venue.name,
}
}
shows.append(show_details)
movie_details['shows'] = show_details
category_list = []
for category in movie.categories.all():
category_details = {
'id':category.id,
'name':category.name,
'parent_id':category.parent.id,
}
category_list.append(category_details)
movie_details['categories'] = category_list
results.append(movie_details)
return results
The data is returned by django rest framework 0.4.0
import json
json_obj = json.load(json_string)

Categories