I'm trying to scrape data from https://www.premierleague.com/players. On the webpage, there are a list of players. I used an xpath expression response.xpath('//td/a/#href').getall() to get a list of relative urls for each player. I then iterated over the list of the relative urls and merged them with the homepage to get a variable called "absolute_url " which looks like this for one of the players "https://www.premierleague.com" + "/players/63289/Brenden-Aaronson/overview" https://www.premierleague.com/players/63289/Brenden-Aaronson/overview. I tested the xpath on scrapy shell and they produce the desired output on the scrapy shell...at least for the overview pages of players I tested. Where am I going wrong?
import scrapy
from urllib.parse import urljoin
class PlStatsSpider(scrapy.Spider):
name = 'pl_stats'
allowed_domains = ['premierleague.com']
start_urls = ['http://premierleague.com']
def parse(self, response):
url = 'http://premierleague.com'
for link in response.xpath('//td/a/#href').getall():
absolute_url = urljoin(url, link) #merging relative url
yield response.follow(absolute_url, callback=self.parse_players)
def parse_players(self, response):
yield {
'Name': response.xpath('//h1/div[#class="name t-colour"]/text()').get(),
'DOB': response.xpath('//div[#class="personalLists"]//div[#class="info"]/text()')[3].get().strip(),
'Height': response.xpath('//div[#class="personalLists"]//div[#class="info"]/text()')[5].get(),
'Club': response.xpath('//div[#class="info"]/a/text()').get().strip(),
'Weight': response.xpath('//div[#class="personalLists"]//div[#class="info"]/text()')[6].get(),
'Position': response.xpath('//section[#class="sideWidget playerIntro t2-topBorder"]//div[#class="info"]/text()')[2].get(),
'Nationality': response.xpath('//span[#class="playerCountry"]/text()').get()}
Most of your xpaths are a little too ambiguous for the many different pages you are trying to scrape data from. All of the player pages have slight variations that make extracting data using positional indexing nearly impossible. Additionally not every field is available for every player, such as the position and club. What you could do for those fields is iterate through their section elements and grabing all of the 'label','info' pairs and match whatever is available to your output.
For example:
import scrapy
class PlStatsSpider(scrapy.Spider):
name = 'pl_stats'
allowed_domains = ['premierleague.com']
start_urls = ['https://www.premierleague.com/players']
def parse(self, response):
for link in response.xpath('//td/a/#href').getall():
yield scrapy.Request(response.urljoin(link), callback=self.parse_players)
def parse_players(self, response):
section = response.xpath("//section[contains(#class,'sideWidget playerIntro')]")
info = {'label': [], 'info': []}
for classval in info.keys():
idents = section.xpath(f"./div[#class='{classval}']//text()").getall()
idents = set([i.strip() for i in idents if i.strip()])
info[classval] = list(idents)
item = {k.title(): v for k,v in zip(info['label'], info['info'])}
item.update({
'Name': response.xpath('//div[#class="name t-colour"]/text()').get(),
'DOB': response.xpath('//ul[#class="pdcol2"]//div[#class="info"]/text()').get().strip(),
'Height': response.xpath('//ul[#class="pdcol3"]/li/div[#class="info"]/text()').get(),
'Weight': response.xpath('//ul[#class="pdcol3"]/li[#class="u-hide"]/div[#class="info"]/text()').get(),
'Nationality': response.xpath('//span[#class="playerCountry"]/text()').get()
})
yield item
This is the json file that was produced after calling scrapy crawl pl_stats -o players.json.
[
{
"Position": "Defender",
"Name": "Max Aarons",
"DOB": "04/01/2000",
"Height": "178cm",
"Weight": null,
"Nationality": "England"
},
{
"Position": "Forward",
"Club": "Manchester City",
"Name": "Juli\u00e1n \u00c1lvarez",
"DOB": "31/01/2000",
"Height": "170cm",
"Weight": "71kg",
"Nationality": "Argentina"
},
{
"Position": "Defender",
"Club": "Leicester City",
"Name": "Daniel Amartey",
"DOB": "21/12/1994",
"Height": "186cm",
"Weight": "79kg",
"Nationality": "Ghana"
},
{
"Position": "Forward",
"Name": "Will Alves",
"DOB": "04/05/2005",
"Height": null,
"Weight": null,
"Nationality": "England"
},
{
"Position": "Midfielder",
"Club": "Brighton and Hove Albion",
"Name": "Steven Alzate",
"DOB": "08/09/1998",
"Height": "180cm",
"Weight": "75kg",
"Nationality": "Colombia"
},
{
"Position": "Defender",
"Name": "Marcos Alonso",
"DOB": "28/12/1990",
"Height": "188cm",
"Weight": null,
"Nationality": "Spain"
},
{
"Position": "Midfielder",
"Name": "Jaime Alvarado",
"DOB": "26/07/1999",
"Height": "179cm",
"Weight": null,
"Nationality": "Colombia"
},
{
"Position": "Midfielder",
"Club": "Newcastle United",
"Name": "Miguel Almir\u00f3n",
"DOB": "10/02/1994",
"Height": "174cm",
"Weight": "70kg",
"Nationality": "Paraguay"
},
{
"Position": "Goalkeeper",
"Name": "\u00c1lvaro Fern\u00e1ndez",
"DOB": "13/04/1998",
"Height": "185cm",
"Weight": null,
"Nationality": "Spain"
},
{
"Position": "Midfielder",
"Club": "Everton",
"Name": "Allan",
"DOB": "08/01/1991",
"Height": "173cm",
"Weight": "73kg",
"Nationality": "Brazil"
},
{
"Position": "Goalkeeper",
"Club": "Liverpool",
"Name": "Alisson",
"DOB": "02/10/1992",
"Height": "191cm",
"Weight": "91kg",
"Nationality": "Brazil"
},
{
"Position": "Defender",
"Name": "Ezgjan Alioski",
"DOB": "12/02/1992",
"Height": "173cm",
"Weight": null,
"Nationality": "North Macedonia"
},
{
"Position": "Midfielder",
"Name": "Dele Alli",
"DOB": "11/04/1996",
"Height": "188cm",
"Weight": null,
"Nationality": "England"
},
{
"Position": "Defender",
"Name": "Alex Telles",
"DOB": "15/12/1992",
"Height": "181cm",
"Weight": null,
"Nationality": "Brazil"
},
{
"Position": "Defender",
"Club": "Liverpool",
"Name": "Trent Alexander-Arnold",
"DOB": "07/10/1998",
"Height": "175cm",
"Weight": "69kg",
"Nationality": "England"
},
{
"Position": "Defender",
"Name": "Ajibola Alese",
"DOB": "17/01/2001",
"Height": null,
"Weight": null,
"Nationality": "England"
},
{
"Position": "Defender",
"Name": "Toby Alderweireld",
"DOB": "02/03/1989",
"Height": "186cm",
"Weight": null,
"Nationality": "Belgium"
},
{
"Position": "Defender",
"Club": "Manchester City",
"Name": "Nathan Ak\u00e9",
"DOB": "18/02/1995",
"Height": "180cm",
"Weight": "75kg",
"Nationality": "Netherlands"
},
{
"Position": "Defender",
"Club": "Brentford",
"Name": "Kristoffer Ajer",
"DOB": "17/04/1998",
"Height": "198cm",
"Weight": "92kg",
"Nationality": "Norway"
},
{
"Position": "Midfielder",
"Club": "Leicester City",
"Name": "Marc Albrighton",
"DOB": "18/11/1989",
"Height": "175cm",
"Weight": "74kg",
"Nationality": "England"
},
{
"Position": "Defender",
"Club": "Wolverhampton Wanderers",
"Name": "Rayan A\u00eft-Nouri",
"DOB": "06/06/2001",
"Height": "179cm",
"Weight": "70kg",
"Nationality": "France"
},
{
"Position": "Defender",
"Name": "Ryan Alebiosu",
"DOB": "17/12/2001",
"Height": null,
"Weight": null,
"Nationality": "England"
},
{
"Position": "Defender",
"Name": "Ahmed El Mohamady",
"DOB": "09/09/1987",
"Height": "183cm",
"Weight": null,
"Nationality": "Egypt"
},
{
"Position": "Defender",
"Name": "Derek Agyakwa",
"DOB": "19/12/2001",
"Height": null,
"Weight": null,
"Nationality": "Netherlands"
},
{
"Position": "Forward",
"Name": "Sergio Ag\u00fcero",
"DOB": "02/06/1988",
"Height": "173cm",
"Weight": null,
"Nationality": "Argentina"
},
{
"Position": "Defender",
"Name": "Tayo Adaramola",
"DOB": "14/11/2003",
"Height": null,
"Weight": null,
"Nationality": "Ireland"
},
{
"Position": "Goalkeeper",
"Club": "Liverpool",
"Name": "Adri\u00e1n",
"DOB": "03/01/1987",
"Height": "190cm",
"Weight": "80kg",
"Nationality": "Spain"
},
{
"Position": "Southampton",
"Club": "Forward",
"Name": "Che Adams",
"DOB": "13/07/1996",
"Height": "175cm",
"Weight": "70kg",
"Nationality": "Scotland"
},
{
"Position": "Southampton",
"Club": "Forward",
"Name": "Adam Armstrong",
"DOB": "10/02/1997",
"Height": "174cm",
"Weight": "69kg",
"Nationality": "England"
},
{
"Position": "Forward",
"Name": "Tammy Abraham",
"DOB": "02/10/1997",
"Height": "190cm",
"Weight": null,
"Nationality": "England"
}
]
I have been trying to work with a JSON object where I have been trying to get values from two different keys. What I want to do is to check if in object 1 contains in object 2 and has the value over 0 then I want to print it out.
get_json = json.dumps({
"attributes": {
"203": {
"id": "203",
"code": "sizefootwear_conf",
"label": "EU",
"options": [{
"id": "6320",
"label": "38",
"products": ["69813"]
},
{
"id": "6351",
"label": "38,5",
"products": ["69817"]
},
{
"id": "6335",
"label": "39",
"products": ["69818"]
},
{
"id": "6354",
"label": "40",
"products": ["69819"]
},
{
"id": "6338",
"label": "40,5",
"products": ["69820"]
},
{
"id": "6357",
"label": "41",
"products": ["69821"]
},
{
"id": "6326",
"label": "42",
"products": ["69822"]
},
{
"id": "6362",
"label": "42,5",
"products": ["69823"]
},
{
"id": "6341",
"label": "43",
"products": ["69824"]
},
{
"id": "6365",
"label": "44",
"products": ["69814"]
},
{
"id": "6344",
"label": "44,5",
"products": ["69815"]
},
{
"id": "6370",
"label": "45,5",
"products": ["69816"]
}
],
"position": "0"
},
"205": {
"id": "205",
"code": "sizefootwearus_conf",
"label": "US",
"options": [{
"id": "6319",
"label": "5,5",
"products": ["69813"]
},
{
"id": "6372",
"label": "6,0",
"products": ["69817"]
},
{
"id": "6334",
"label": "6,5",
"products": ["69818"]
},
{
"id": "6350",
"label": "7,0",
"products": ["69819"]
},
{
"id": "6337",
"label": "7,5",
"products": ["69820"]
},
{
"id": "6353",
"label": "8,0",
"products": ["69821"]
},
{
"id": "6325",
"label": "8,5",
"products": ["69822"]
},
{
"id": "6356",
"label": "9,0",
"products": ["69823"]
},
{
"id": "6340",
"label": "9,5",
"products": ["69824"]
},
{
"id": "6364",
"label": "10,0",
"products": ["69814"]
},
{
"id": "6343",
"label": "10,5",
"products": ["69815"]
},
{
"id": "6328",
"label": "11,5",
"products": ["69816"]
}
],
"position": "1"
},
"204": {
"id": "204",
"code": "sizefootwearuk_conf",
"label": "UK",
"options": [{
"id": "6318",
"label": "5,0",
"products": ["69813"]
},
{
"id": "6352",
"label": "5,5",
"products": ["69817"]
},
{
"id": "6743",
"label": "6,0-EU39",
"products": ["69818"]
},
{
"id": "6744",
"label": "6,0-EU40",
"products": ["69819"]
},
{
"id": "6355",
"label": "6,5",
"products": ["69820"]
},
{
"id": "6336",
"label": "7,0",
"products": ["69821"]
},
{
"id": "6361",
"label": "7,5",
"products": ["69822"]
},
{
"id": "6324",
"label": "8,0",
"products": ["69823"]
},
{
"id": "6363",
"label": "8,5",
"products": ["69824"]
},
{
"id": "6339",
"label": "9,0",
"products": ["69814"]
},
{
"id": "6366",
"label": "9,5",
"products": ["69815"]
},
{
"id": "6369",
"label": "10,5",
"products": ["69816"]
}
],
"position": "2"
}
},
"productStockAlert": {
"entity": "69825",
"map": {
"203": {
"label": "52,5",
"": "",
"6610": "6610",
"6498": "6498",
"6582": "6582",
"6516": "6516",
"6501": "6501",
"6518": "6518",
"6504": "6504",
"6395": "6395",
"6404": "6404",
"6533": "6533",
"6407": "6407",
"6530": "6530",
"6410": "6410",
"6413": "6413",
"6416": "6416",
"6534": "6534",
"6419": "6419",
"6422": "6422",
"6425": "6425",
"6398": "6398",
"6401": "6401",
"6531": "6531",
"6431": "6431",
"6443": "6443",
"6446": "6446",
"6495": "6495",
"6449": "6449",
"6452": "6452",
"6455": "6455",
"6458": "6458",
"6461": "6461",
"6807": "6807",
"6464": "6464",
"6434": "6434",
"6437": "6437",
"6558": "6558",
"6440": "6440",
"6480": "6480",
"6481": "6481",
"6382": "6382",
"6465": "6465",
"6631": "6631",
"6332": "6332",
"6466": "6466",
"6348": "6348",
"6634": "6634",
"6320": "6320",
"6351": "6351",
"6384": "6384",
"6659": "6659",
"6335": "6335",
"6388": "6388",
"6508": "6508",
"6354": "6354",
"6338": "6338",
"6389": "6389",
"6664": "6664",
"6357": "6357",
"6390": "6390",
"6506": "6506",
"6637": "6637",
"6326": "6326",
"6362": "6362",
"6391": "6391",
"6640": "6640",
"6341": "6341",
"6392": "6392",
"6560": "6560",
"6365": "6365",
"6344": "6344",
"6385": "6385",
"6838": "6838",
"6368": "6368",
"6386": "6386",
"6370": "6370",
"6643": "6643",
"6628": "6628",
"6329": "6329",
"6529": "6529",
"6387": "6387",
"6843": "6843",
"6347": "6347",
"6470": "6470",
"6360": "6360",
"6646": "6646",
"6472": "6472",
"6323": "6323",
"6564": "6564",
"6593": "6593",
"6474": "6474",
"6376": "6376",
"6565": "6565",
"6561": "6561",
"6567": "6567",
"6604": "6604",
"6607": "6607"
},
"205": {
"label": "18,0",
"": "",
"6513": "6513",
"6497": "6497",
"6583": "6583",
"6500": "6500",
"6821": "6821",
"6503": "6503",
"6532": "6532",
"6394": "6394",
"6403": "6403",
"6406": "6406",
"6409": "6409",
"6412": "6412",
"6415": "6415",
"6418": "6418",
"6421": "6421",
"6424": "6424",
"6397": "6397",
"6400": "6400",
"6430": "6430",
"6442": "6442",
"6445": "6445",
"6448": "6448",
"6451": "6451",
"6454": "6454",
"6457": "6457",
"6460": "6460",
"6463": "6463",
"6433": "6433",
"6436": "6436",
"6439": "6439",
"6555": "6555",
"6468": "6468",
"6507": "6507",
"6632": "6632",
"6331": "6331",
"6319": "6319",
"6635": "6635",
"6372": "6372",
"6334": "6334",
"6661": "6661",
"6350": "6350",
"6337": "6337",
"6663": "6663",
"6353": "6353",
"6619": "6619",
"6325": "6325",
"6621": "6621",
"6638": "6638",
"6356": "6356",
"6340": "6340",
"6623": "6623",
"6641": "6641",
"6364": "6364",
"6343": "6343",
"6625": "6625",
"6840": "6840",
"6367": "6367",
"6328": "6328",
"6644": "6644",
"6371": "6371",
"6346": "6346",
"6842": "6842",
"6359": "6359",
"6322": "6322",
"6647": "6647",
"6373": "6373",
"6566": "6566",
"6375": "6375",
"6562": "6562",
"6605": "6605",
"6608": "6608"
},
"204": {
"label": "17,0",
"": "",
"6611": "6611",
"6514": "6514",
"6496": "6496",
"6515": "6515",
"6499": "6499",
"6517": "6517",
"6502": "6502",
"6393": "6393",
"6505": "6505",
"6402": "6402",
"6405": "6405",
"6408": "6408",
"6411": "6411",
"6414": "6414",
"6417": "6417",
"6420": "6420",
"6423": "6423",
"6396": "6396",
"6399": "6399",
"6429": "6429",
"6745": "6745",
"6441": "6441",
"6444": "6444",
"6447": "6447",
"6450": "6450",
"6453": "6453",
"6456": "6456",
"6459": "6459",
"6462": "6462",
"6432": "6432",
"6435": "6435",
"6438": "6438",
"6467": "6467",
"6381": "6381",
"6633": "6633",
"6330": "6330",
"6349": "6349",
"6636": "6636",
"6318": "6318",
"6352": "6352",
"6660": "6660",
"6333": "6333",
"6743": "6743",
"6744": "6744",
"6355": "6355",
"6662": "6662",
"6336": "6336",
"6620": "6620",
"6361": "6361",
"6622": "6622",
"6639": "6639",
"6324": "6324",
"6363": "6363",
"6624": "6624",
"6642": "6642",
"6339": "6339",
"6366": "6366",
"6626": "6626",
"6839": "6839",
"6342": "6342",
"6627": "6627",
"6369": "6369",
"6645": "6645",
"6327": "6327",
"6358": "6358",
"6841": "6841",
"6345": "6345",
"6471": "6471",
"6648": "6648",
"6321": "6321",
"6473": "6473",
"6374": "6374",
"6563": "6563",
"6606": "6606",
"6609": "6609"
}
},
"child": {
"6320_6319_6318_": {
"entity": "69813",
"stock_number": 0,
"stock_status": false,
"productId": "69813",
"parent_url": "https://www.bstn.com/eu_en/p/jordan-jordan-why-not-zer0-4-dd4889-006-0250549"
},
"6365_6364_6339_": {
"entity": "69814",
"stock_number": 5,
"stock_status": true,
"productId": "69814",
"parent_url": "https://www.bstn.com/eu_en/p/jordan-jordan-why-not-zer0-4-dd4889-006-0250549"
},
"6344_6343_6366_": {
"entity": "69815",
"stock_number": 3,
"stock_status": true,
"productId": "69815",
"parent_url": "https://www.bstn.com/eu_en/p/jordan-jordan-why-not-zer0-4-dd4889-006-0250549"
},
"6370_6328_6369_": {
"entity": "69816",
"stock_number": 1,
"stock_status": true,
"productId": "69816",
"parent_url": "https://www.bstn.com/eu_en/p/jordan-jordan-why-not-zer0-4-dd4889-006-0250549"
},
"6351_6372_6352_": {
"entity": "69817",
"stock_number": 0,
"stock_status": false,
"productId": "69817",
"parent_url": "https://www.bstn.com/eu_en/p/jordan-jordan-why-not-zer0-4-dd4889-006-0250549"
},
"6335_6334_6743_": {
"entity": "69818",
"stock_number": 0,
"stock_status": false,
"productId": "69818",
"parent_url": "https://www.bstn.com/eu_en/p/jordan-jordan-why-not-zer0-4-dd4889-006-0250549"
},
"6354_6350_6744_": {
"entity": "69819",
"stock_number": 0,
"stock_status": false,
"productId": "69819",
"parent_url": "https://www.bstn.com/eu_en/p/jordan-jordan-why-not-zer0-4-dd4889-006-0250549"
},
"6338_6337_6355_": {
"entity": "69820",
"stock_number": 0,
"stock_status": false,
"productId": "69820",
"parent_url": "https://www.bstn.com/eu_en/p/jordan-jordan-why-not-zer0-4-dd4889-006-0250549"
},
"6357_6353_6336_": {
"entity": "69821",
"stock_number": 3,
"stock_status": true,
"productId": "69821",
"parent_url": "https://www.bstn.com/eu_en/p/jordan-jordan-why-not-zer0-4-dd4889-006-0250549"
},
"6326_6325_6361_": {
"entity": "69822",
"stock_number": 4,
"stock_status": true,
"productId": "69822",
"parent_url": "https://www.bstn.com/eu_en/p/jordan-jordan-why-not-zer0-4-dd4889-006-0250549"
},
"6362_6356_6324_": {
"entity": "69823",
"stock_number": 6,
"stock_status": true,
"productId": "69823",
"parent_url": "https://www.bstn.com/eu_en/p/jordan-jordan-why-not-zer0-4-dd4889-006-0250549"
},
"6341_6340_6363_": {
"entity": "69824",
"stock_number": 6,
"stock_status": true,
"productId": "69824",
"parent_url": "https://www.bstn.com/eu_en/p/jordan-jordan-why-not-zer0-4-dd4889-006-0250549"
}
}
}
}
)
So what I did is that I created two dicts within a list:
first_loop = []
second_loop = []
total_stock = 0
for idx, sizes in json_value["attributes"].items():
for getId in sizes["options"]:
first_loop.append({getId["label"]: getId["products"][0]})
break
for idx, test in json_value["productStockAlert"]["child"].items():
total_stock += test["stock_number"]
second_loop.append({test["productId"]: test["stock_number"]})
print("first_loop", first_loop)
print("second_loop", second_loop)
print("total_stock", total_stock)
which returns:
first_loop [{'38': '69813'}, {'38,5': '69817'}, {'39': '69818'}, {'40': '69819'}, {'40,5': '69820'}, {'41': '69821'}, {'42': '69822'}, {'42,5': '69823'}, {'43': '69824'}, {'44': '69814'}, {'44,5': '69815'}, {'45,5': '69816'}]
second_loop [{'69813': 0}, {'69814': 5}, {'69815': 3}, {'69816': 1}, {'69817': 0}, {'69818': 0}, {'69819': 0}, {'69820': 0}, {'69821': 3}, {'69822': 4}, {'69823': 6}, {'69824': 6}]
total_stock 28
My issue is how can I compare from first_loop where I check the ID (etc 69816) is in second_loop and has the value above 0, if its above 0 then I want to add it to a append it to a new list etc: 45,5 (1) (Which is the size number from first_loop and the number (value) from second_loop.
Output would end up:
>>> ["41 (3)", "42 (4)", "42,5 (6)", "43 (6)", "44 (5)", "44,5 (3)", "45,5 (1)"]
Basically, you just need to create id-label mapping, id-count mapping and merge them:
id_label_mapping = {o["products"][0]: o["label"] for o in next(iter(json_value["attributes"].values()))["options"]}
id_count_mapping = {o["productId"]: o["stock_number"] for o in json_value["productStockAlert"]["child"].values()}
result = [f"{l} ({id_count_mapping[k]})" for k, l in id_label_mapping.items() if id_count_mapping.get(k)]
In your code you've done 2 major mistakes which makes implementation of last step (merging) much harder.
You're creating list of dicts instead of single dict with different keys;
In first_loop you're using label as key, but in second_loop you're using productId.
If we will fix this 2 gaps, your code will work:
first_loop = {}
second_loop = {}
total_stock = 0
for idx, sizes in json_value["attributes"].items():
for getId in sizes["options"]:
first_loop[getId["products"][0]] = getId["label"]
break
for idx, test in json_value["productStockAlert"]["child"].items():
total_stock += test["stock_number"]
second_loop[test["productId"]] = test["stock_number"]
result = []
for product_id, label in first_loop.items():
count = second_loop.get(product_id)
if count: # filters both None (key doesn't exit) and 0
result.append(f"{label} ({count})")
print("result", result)
print("total_stock", total_stock)
Not sure if it's the most efficient way, but you could:
make dicts not lists, does it need to be a list?
swap the key-value of the first_loop
intersect the sets
get the values from original, print only if > 0
[Code not tested]
first_loop = {}
second_loop = {}
total_stock = 0
for idx, sizes in json_value["attributes"].items():
for getId in sizes["options"]:
first_loop[getId["products"][0]] = getId["label"]
break
for idx, test in json_value["productStockAlert"]["child"].items():
total_stock += test["stock_number"]
second_loop[test["productId"]] = test["stock_number"]
matching = set(first_loop.keys()).intersection(second_loop.keys())
for prod_id in matching:
stock = second_loop.get(prod_id)
if stock > 0:
print(f"{first_loop.get(prod_id) ({stock})")
Lastly, you have a break statement, that will make it run only one time... In which case you do not need a for loop...
It's quite too case specific, but hope it helps...
I have the following json document:
{
"id": "5c26321bd8f4113d43b91141",
"idMemberCreator": "5b203bc7e47d817a8138bc37",
"data": {
"list": {
"name": "Sorji for QA",
"id": "5b0a2543b89acdbdb85f7b42"
},
"board": {
"shortLink": "iyCzZ5jx",
"name": "FlicksIO",
"id": "5b0a251f68a9e74b8ec3b3ac"
},
"card": {
"shortLink": "vOt2vO7v",
"idShort": 92,
"name": "New column in main for Storefront provider correlation.",
"id": "5b9c0023533f7c26424ea4ed",
"closed": true
},
"old": {
"closed": false
}
},
"type": "updateCard",
"date": "2018-12-28T14:24:27.455Z",
"limits": {},
"memberCreator": {
"id": "5b203bc7e47d817a8138bc37",
"avatarHash": "73bfa48c76c3c92615fe89ff79a6c5ae",
"avatarUrl": "https://trello-avatars.s3.amazonaws.com/73bfa48f79a6c5ae",
"fullName": "Marie Bond",
"idMemberReferrer": null,
"initials": "MB",
"username": "mb"
}
}
I would like to expand this out to be a single level with dot notation. That is, it should look like:
{
"id": "5c26321bd8f4113d43b91141",
"idMemberCreator": "5b203bc7e47d817a8138bc37",
"data.list.name": "Sorji for QA",
"data.list.id": "5b0a2543b89acdbdb85f7b42"
"data.board.shortLink": "iyCzZ5jx",
"data.board.name": "FlicksIO",
"data.board.id": "5b0a251f68a9e74b8ec3b3ac"
"data.card.shortLink": "vOt2vO7v",
"data.card.idShort": 92,
"data.card.name": "New column in main for Storefront provider correlation.",
"data.card.id": "5b9c0023533f7c26424ea4ed",
"data.card.closed": true
"data.old.closed": false
"type": "updateCard",
"date": "2018-12-28T14:24:27.455Z",
"limits": {},
"memberCreator.id": "5b203bc7e47d817a8138bc37",
"memberCreator.avatarHash": "73bfa48c76c3c92615fe89ff79a6c5ae",
"memberCreator.avatarUrl": "https://trello-avatars.s3.amazonaws.com/73bfa48f79a6c5ae",
"memberCreator.fullName": "Marie Bond",
"memberCreator.idMemberReferrer": null,
"memberCreator.initials": "MB",
"memberCreator.username": "mb"
}
Would it be possible to do this with a generator object? I've been working a lot on recursion today, and have been trying to move from while loops to using generator objects and yields, etc.
You can keep a parameter in the signature of the recursive function to store the paths:
data = {'id': '5c26321bd8f4113d43b91141', 'idMemberCreator': '5b203bc7e47d817a8138bc37', 'data': {'list': {'name': 'Sorji for QA', 'id': '5b0a2543b89acdbdb85f7b42'}, 'board': {'shortLink': 'iyCzZ5jx', 'name': 'FlicksIO', 'id': '5b0a251f68a9e74b8ec3b3ac'}, 'card': {'shortLink': 'vOt2vO7v', 'idShort': 92, 'name': 'New column in main for Storefront provider correlation.', 'id': '5b9c0023533f7c26424ea4ed', 'closed': True}, 'old': {'closed': False}}, 'type': 'updateCard', 'date': '2018-12-28T14:24:27.455Z', 'limits': {}, 'memberCreator': {'id': '5b203bc7e47d817a8138bc37', 'avatarHash': '73bfa48c76c3c92615fe89ff79a6c5ae', 'avatarUrl': 'https://trello-avatars.s3.amazonaws.com/73bfa48f79a6c5ae', 'fullName': 'Marie Bond', 'idMemberReferrer': None, 'initials': 'MB', 'username': 'mb'}}
def dot_paths(d, _paths = []):
for a, b in d.items():
if not b or not isinstance(b, dict):
yield ['.'.join(_paths+[a]), b]
else:
yield from dot_paths(b, _paths+[a])
import json
print(json.dumps(dict(dot_paths(data)), indent=4))
Output:
{
"id": "5c26321bd8f4113d43b91141",
"idMemberCreator": "5b203bc7e47d817a8138bc37",
"data.list.name": "Sorji for QA",
"data.list.id": "5b0a2543b89acdbdb85f7b42",
"data.board.shortLink": "iyCzZ5jx",
"data.board.name": "FlicksIO",
"data.board.id": "5b0a251f68a9e74b8ec3b3ac",
"data.card.shortLink": "vOt2vO7v",
"data.card.idShort": 92,
"data.card.name": "New column in main for Storefront provider correlation.",
"data.card.id": "5b9c0023533f7c26424ea4ed",
"data.card.closed": true,
"data.old.closed": false,
"type": "updateCard",
"date": "2018-12-28T14:24:27.455Z",
"limits": {},
"memberCreator.id": "5b203bc7e47d817a8138bc37",
"memberCreator.avatarHash": "73bfa48c76c3c92615fe89ff79a6c5ae",
"memberCreator.avatarUrl": "https://trello-avatars.s3.amazonaws.com/73bfa48f79a6c5ae",
"memberCreator.fullName": "Marie Bond",
"memberCreator.idMemberReferrer": null,
"memberCreator.initials": "MB",
"memberCreator.username": "mb"
}
I'm rewriting a view based on what I know the final output should be in json but it's returning the dictionary as a string.
new output
{
"results":
["
{
'plot': u'',
'runtime': u'N/A',
'description': u'x',
'videos': [
{
'id': 823,
'name': u'x',
'youtube_id': u'FtcubOnXgZk'
}
],
'country': u'India',
'writer': u'Neetu Varma, Ranjeev Verma',
'name': u'Chalk N Duster',
'id': 940,
'director': u'Jayant Gilatar',
'hot': True,
'content': u'x',
'actors': u'Shabana Azmi, Arya Babbar, Gavie Chahal, Juhi Chawla',
'year': 2015,
'images': [
{'small': '/media/cache/62/fd/62fd5158d281c042e3cf1f919183e94e.jpg', 'medium': '/media/cache/5e/32/5e32ebb1a4d25bba0d0c70b4b448e948.jpg'}],
'trailer_youtube_id': u'FtcubOnXgZk',
'type': 'movie',
'slug': u'chalk-n-duster',
'categories': [{'parent_id': 2, 'id': 226, 'name': u'Drama'}],
'shows': {
'starts': '2016-01-16',
'booking_url': u'',
'venue': {
'address': u'',
'id': 854,
'name': u'Nyali Cinemax',
'area': {
'id': 52,
'parent': {
'id': 48,
'name': u'Mombasa'
},
'name': u'Nyali'
}
},
'starts_time': '18:30:00'
}
}", "{'plot': u'' ....
old output
"results": [
{
"actors": "x",
"categories": [
{
"id": 299,
"name": "Biography",
"parent_id": 2
},
],
"content": "x",
"country": "x",
"description": "x",
"director": "x",
"hot": true,
"id": 912,
"images": [
{
"medium": "/media/cache/d2/b3/d2b3a7885e7c39bfc5c2b297b66619c5.jpg",
"small": "/media/cache/e2/d0/e2d01b2c7c77d3590536666de4a7fd7d.jpg"
}
],
"name": "Bridge of Spies",
"plot": "x",
"runtime": "141 min",
"shows": [
{
"booking_url": "",
"starts": "2015-11-27",
"starts_time": "16:30:00",
"venue": {
"address": "The Junction Shopping Mall",
"area": {
"id": 68,
"name": "Ngong Road",
"parent": {
"id": 2,
"name": "Nairobi"
}
},
"id": 1631,
"name": "Century Cinemax Junction"
}
},
],
"slug": "bridge-of-spies",
"trailer_youtube_id": "",
"type": "movie",
"videos": [
{
"id": "795",
"name": "Bridge of Spies",
"youtube_id": "2-2x3r1m2I4"
}
],
"writer": "Matt Charman, Ethan Coen, Joel Coen",
"year": 2015
}, ...
]
Here's the view, I know the shows should also be a list, but in order to start testing I'll need the data to come in the right format. If it's involves too much rewriting I'm okay with links and explanation.
#memoize(timeout=60*60)
def movies_json():
today = datetime.date.today()
movies = Movie.objects.filter(shows__starts__gte=today)
results = []
number = len(movies)
for movie in movies:
print "Now Remaining: {0}".format(number)
number -= 1
medium = get_thumbnail(movie.picture(), '185x274', crop='center', quality=99).url
small = get_thumbnail(movie.picture(), '50x74', crop='center', quality=99).url
movie_details = {
'director':movie.director,
'plot':movie.plot,
'actors':movie.actors,
'content':movie.content,
'country':movie.country,
'description':movie.description,
'hot':movie.hot,
'id':movie.id,
'images':[{'medium':medium, 'small':small}],
'name':movie.name,
'plot':movie.plot,
'runtime':movie.runtime,
'slug':movie.slug,
'type':'movie',
'writer':movie.writer,
'year':movie.year,
}
youtube_details = movie.videos.filter(youtube_id__isnull=False)[0]
movie_details['trailer_youtube_id'] = youtube_details.youtube_id if youtube_details.youtube_id else ""
movie_details['videos'] = [
{
'id':youtube_details.id,
'name':movie.name,
'youtube_id':youtube_details.youtube_id,
}
]
shows = []
for show in movie.shows.all():
show_details = {
'booking_url':show.booking_url,
'starts':show.starts.isoformat(),
'starts_time':show.starts_time.isoformat(),
'venue': {
'address':show.venue.address,
'area': {
'id': show.venue.area.id,
'name': show.venue.area.name,
'parent': {
'id': show.venue.area.parent.id,
'name': show.venue.area.parent.name,
}
},
'id': show.venue.id,
'name': show.venue.name,
}
}
shows.append(show_details)
movie_details['shows'] = show_details
category_list = []
for category in movie.categories.all():
category_details = {
'id':category.id,
'name':category.name,
'parent_id':category.parent.id,
}
category_list.append(category_details)
movie_details['categories'] = category_list
results.append(movie_details)
return results
The data is returned by django rest framework 0.4.0
import json
json_obj = json.load(json_string)