Trasform Json into Pandas Dataframe

Trasform Json into Pandas Dataframe - python

I have this kind of json I would transform it into a pandas dataframe, with specific columns names.
{
"data": [
{
"id": 1,
"name": "3Way Result",
"suspended": false,
"bookmaker": {
"data": [
{
"id": 27802,
"name": "Ladbrokes",
"odds": {
"data": [
{
"label": "1",
"value": "1.61",
"probability": "62.11%",
"dp3": "1.610",
"american": -164,
"factional": null,
"winning": null,
"handicap": null,
"total": null,
"bookmaker_event_id": null,
"last_update": {
"date": "2021-10-01 16:41:27.000000",
"timezone_type": 3,
"timezone": "UTC"
}
},
{
"label": "X",
"value": "3.90",
"probability": "25.64%",
"dp3": "3.900",
"american": 290,
"factional": null,
"winning": null,
"handicap": null,
"total": null,
"bookmaker_event_id": null,
"last_update": {
"date": "2021-10-01 16:41:27.000000",
"timezone_type": 3,
"timezone": "UTC"
}
},
{
"label": "2",
"value": "5.20",
"probability": "19.23%",
"dp3": "5.200",
"american": 420,
"factional": null,
"winning": null,
"handicap": null,
"total": null,
"bookmaker_event_id": null,
"last_update": {
"date": "2021-10-01 16:41:27.000000",
"timezone_type": 3,
"timezone": "UTC"
}
}
]
}
},
{
"id": 70,
"name": "Pncl",
"odds": {
"data": [
{
"label": "1",
"value": "1.65",
"probability": "60.61%",
"dp3": "1.645",
"american": -154,
"factional": null,
"winning": null,
"handicap": null,
"total": null,
"bookmaker_event_id": null,
"last_update": {
"date": "2021-10-01 16:59:18.000000",
"timezone_type": 3,
"timezone": "UTC"
}
},
{
"label": "X",
"value": "4.20",
"probability": "23.81%",
"dp3": "4.200",
"american": 320,
"factional": null,
"winning": null,
"handicap": null,
"total": null,
"bookmaker_event_id": null,
"last_update": {
"date": "2021-10-01 16:59:18.000000",
"timezone_type": 3,
"timezone": "UTC"
}
},
{
"label": "2",
"value": "5.43",
"probability": "18.42%",
"dp3": "5.430",
"american": 443,
"factional": null,
"winning": null,
"handicap": null,
"total": null,
"bookmaker_event_id": null,
"last_update": {
"date": "2021-10-01 16:59:18.000000",
"timezone_type": 3,
"timezone": "UTC"
}
}
]
}
}
]
}
}
],
"meta": {
"plans": [
{
"name": "Football Free Plan",
"features": "Standard",
"request_limit": "180,60",
"sport": "Soccer"
}
],
"sports": [
{
"id": 1,
"name": "Soccer",
"current": true
}
]
}
}
All columns name contains the name of the bookmaker plus the label value.
I would take the value in label and use it as column name with the name of the bookmaker in name. Then the float in value use it as row of the dataframe
Here the Expected Output
1_LadBrokes X_LadBrokes 2_LadBrokes last_update_LadBrokes 1_Pncl X_Pncl 2_Pncl last_update_Pncl
0 1.61 3.9 5.2 2021-10-01 16:41:27.000000 1.65 4.2 5.43 2021-10-01 16:59:18.000000

You can achieve it like so using json_normalize + apply.
def set_values(x):
data = x["odds.data"]
label = data.get("label")
value = data.get("value")
last_update_date = data["last_update"]["date"]
name = x["name"]
x[f"{label}_{name}"] = value
x[f"last_update_{name}"] = last_update_date
return x
df = (
pd.json_normalize(data["data"], record_path=["bookmaker", "data"])
.explode("odds.data")
.apply(lambda x: set_values(x), axis=1)
.drop(["odds.data", "id", "name"], axis=1)
.ffill()
.bfill()
.head(1)
)
In [39]: df
Out[39]:
1_Ladbrokes 1_Pncl 2_Ladbrokes 2_Pncl X_Ladbrokes X_Pncl last_update_Ladbrokes last_update_Pncl
0 1.61 1.65 5.20 5.43 3.90 4.20 2021-10-01 16:41:27.000000 2021-10-01 16:59:18.000000

Use pd.json_normalize and create two subdataframes for value and last_update them join them.
out = pd.json_normalize(
data=data['data'],
record_path=['bookmaker', 'data', 'odds', 'data'],
meta=[['bookmaker', 'data', 'name']]
)[['label', 'value', 'last_update.date', 'bookmaker.data.name']]
df1 = out.set_index(out['label'] + '_' + out['bookmaker.data.name'])['value']
df2 = out.set_index('bookmaker.data.name')['last_update.date'] \
.add_prefix('last_update_').drop_duplicates()
df = pd.concat([df1, df2]).to_frame().T
Output:
>>> df
1_Ladbrokes_Ladbrokes X_Ladbrokes_Ladbrokes 2_Ladbrokes_Ladbrokes 1_Pncl_Pncl X_Pncl_Pncl 2_Pncl_Pncl last_update_Ladbrokes last_update_Pncl
0 1.61 3.90 5.20 1.65 4.20 5.43 2021-10-01 16:41:27.000000 2021-10-01 16:59:18.000000

Related

How to get data from nested list in response.json()

There is a json response from an API request in the following schema:
[
{
"id": "1",
"variable": "x",
"unt": "%",
"results": [
{
"classification": [
{
"id": "1",
"name": "group",
"category": {
"555": "general"
}
}
],
"series": [
{
"location": {
"id": "1",
"level": {
"id": "n1",
"name": "z"
},
"name": "z"
},
"serie": {
"202001": "0.08",
"202002": "0.48",
"202003": "0.19"
}
}
]
}
]
}
]
I want to transform the data from the "serie" key into a pandas DataFrame.
I can do that explicitly:
content = val[0]["results"][0]["series"][0]["serie"]
df = pd.DataFrame(content.items())
df
0 1
0 202001 0.08
1 202002 0.48
2 202003 0.19
But if there is more than one record, that would get only the data from the first element because of the positional arguments [0].
Is there a way to retrieve that data not considering the positional arguments?

Try:
val = [
{
"id": "1",
"variable": "x",
"unt": "%",
"results": [
{
"classification": [
{"id": "1", "name": "group", "category": {"555": "general"}}
],
"series": [
{
"location": {
"id": "1",
"level": {"id": "n1", "name": "z"},
"name": "z",
},
"serie": {"202001": "0.08", "202002": "0.48", "202003": "0.19"},
}
],
}
],
},
{
"id": "2",
"variable": "x",
"unt": "%",
"results": [
{
"classification": [
{"id": "1", "name": "group", "category": {"555": "general"}}
],
"series": [
{
"location": {
"id": "1",
"level": {"id": "n1", "name": "z"},
"name": "z",
},
"serie": {"202001": "1.08", "202002": "1.48", "202003": "1.19"},
}
],
}
],
},
]
df = pd.DataFrame(
[k, v]
for i in val
for ii in i["results"]
for s in ii["series"]
for k, v in s["serie"].items()
)
print(df)
Prints:
0 1
0 202001 0.08
1 202002 0.48
2 202003 0.19
3 202001 1.08
4 202002 1.48
5 202003 1.19

Python - trying to convert time from utc to cst in api response

Below is code I am using to get data from an api. And below that is the response. I am trying to convert datetime from UTC to CST and then present the data with that time zone instead. But I am having trouble isolating datetime
import requests
import json
weather = requests.get('...')
j = json.loads(weather.text)
print (json.dumps(j, indent=2))
Response:
{
"metadata": null,
"data": [
{
"datetime": "2022-12-11T05:00:00Z",
"is_day_time": false,
"icon_code": 5,
"weather_text": "Clear with few low clouds and few cirrus",
"temperature": {
"value": 45.968,
"units": "F"
},
"feels_like_temperature": {
"value": 39.092,
"units": "F"
},
"relative_humidity": 56,
"precipitation": {
"precipitation_probability": 4,
"total_precipitation": {
"value": 0.0,
"units": "in"
}
},
"wind": {
"speed": {
"value": 5.144953471725125,
"units": "mi/h"
},
"direction": 25
},
"wind_gust": {
"value": 9.014853256979242,
"units": "mi/h"
},
"pressure": {
"value": 29.4171829577118,
"units": "inHg"
},
"visibility": {
"value": 6.835083114610673,
"units": "mi"
},
"dew_point": {
"value": 31.01,
"units": "F"
},
"cloud_cover": 31
},
{
"datetime": "2022-12-11T06:00:00Z",
"is_day_time": false,
"icon_code": 4,
"weather_text": "Clear with few low clouds",
"temperature": {
"value": 45.068,
"units": "F"
},
"feels_like_temperature": {
"value": 38.066,
"units": "F"
},
"relative_humidity": 56,
"precipitation": {
"precipitation_probability": 5,
"total_precipitation": {
"value": 0.0,
"units": "in"
}
},
"wind": {
"speed": {
"value": 5.167322834645669,
"units": "mi/h"
},
"direction": 27
},
"wind_gust": {
"value": 8.724051539012168,
"units": "mi/h"
},
"pressure": {
"value": 29.4213171559632,
"units": "inHg"
},
"visibility": {
"value": 5.592340730136005,
"units": "mi"
},
"dew_point": {
"value": 30.2,
"units": "F"
},
"cloud_cover": 13
},
{
"datetime": "2022-12-11T07:00:00Z",
"is_day_time": false,
"icon_code": 4,
"weather_text": "Clear with few low clouds",
"temperature": {
"value": 44.33,
"units": "F"
},
"feels_like_temperature": {
"value": 37.364,
"units": "F"
},
"relative_humidity": 56,
"precipitation": {
"precipitation_probability": 4,
"total_precipitation": {
"value": 0.0,
"units": "in"
}
},
"wind": {
"speed": {
"value": 4.988367931281317,
"units": "mi/h"
},
"direction": 28
},
"wind_gust": {
"value": 8.254294917680744,
"units": "mi/h"
},
"pressure": {
"value": 29.4165923579616,
"units": "inHg"
},
"visibility": {
"value": 7.456454306848007,
"units": "mi"
},
"dew_point": {
"value": 29.714,
"units": "F"
},
"cloud_cover": 22
}
],
"error": null

I am assuming what you mean is that you want to present the data in the current time of the Central Time zone. As of the date this question was asked, that would be CST (Central Standard Time). At another time it will be CDT (Central Daylight Time) based on daylight savings time rules that are followed in the Country/City for the time zone for which you wish to localize the data. The rules are all nicely kept in the IANA Timezone Database.
So the trick is that you pick your Country/City from the Timezone DB that follows the rules as they apply to your current time zone. For Central Time, America/Chicago usually works but YMMV.
There are a lot of ways to do this. This example is inefficiently iterating through the dictionary created by json.loads and replacing the time string with a converted string. The key is using the dateutil library to parse the timestamp string and convert using the proper UTC offset as defined for the time zone in the IANA database.
Hopefully this example has enough pieces you can copy and adapt to your own needs.
from dateutil.parser import parse
from dateutil import tz
import json
j = json.loads(weather)
# Loop through each data entry, reformatting the time
for entry in j["data"]:
if "datetime" in entry.keys():
parsed_dt = parse(entry["datetime"])
converted = parsed_dt.astimezone(tz.gettz("America/Chicago"))
entry["datetime"] = converted.isoformat()
print (json.dumps(j, indent=2))
The resulting JSON has datetime fields that contain an ISO timestamp for the CST time.
{
"metadata": null,
"data": [{
"datetime": "2022-12-10T23:00:00-06:00",
"is_day_time": false,
"icon_code": 5,
"weather_text": "Clear with few low clouds and few cirrus",
"temperature": {
"value": 45.968,
"units": "F"
},
"feels_like_temperature": {
"value": 39.092,
"units": "F"
},
"relative_humidity": 56,
"precipitation": {
"precipitation_probability": 4,
"total_precipitation": {
"value": 0.0,
"units": "in"
}
},
"wind": {
"speed": {
"value": 5.144953471725125,
"units": "mi/h"
},
"direction": 25
},
"wind_gust": {
"value": 9.014853256979242,
"units": "mi/h"
},
"pressure": {
"value": 29.4171829577118,
"units": "inHg"
},
"visibility": {
"value": 6.835083114610673,
"units": "mi"
},
"dew_point": {
"value": 31.01,
"units": "F"
},
"cloud_cover": 31
},
{
"datetime": "2022-12-11T00:00:00-06:00",
"is_day_time": false,
"icon_code": 4,
"weather_text": "Clear with few low clouds",
"temperature": {
"value": 45.068,
"units": "F"
},
"feels_like_temperature": {
"value": 38.066,
"units": "F"
},
"relative_humidity": 56,
"precipitation": {
"precipitation_probability": 5,
"total_precipitation": {
"value": 0.0,
"units": "in"
}
},
"wind": {
"speed": {
"value": 5.167322834645669,
"units": "mi/h"
},
"direction": 27
},
"wind_gust": {
"value": 8.724051539012168,
"units": "mi/h"
},
"pressure": {
"value": 29.4213171559632,
"units": "inHg"
},
"visibility": {
"value": 5.592340730136005,
"units": "mi"
},
"dew_point": {
"value": 30.2,
"units": "F"
},
"cloud_cover": 13
},
{
"datetime": "2022-12-11T01:00:00-06:00",
"is_day_time": false,
"icon_code": 4,
"weather_text": "Clear with few low clouds",
"temperature": {
"value": 44.33,
"units": "F"
},
"feels_like_temperature": {
"value": 37.364,
"units": "F"
},
"relative_humidity": 56,
"precipitation": {
"precipitation_probability": 4,
"total_precipitation": {
"value": 0.0,
"units": "in"
}
},
"wind": {
"speed": {
"value": 4.988367931281317,
"units": "mi/h"
},
"direction": 28
},
"wind_gust": {
"value": 8.254294917680744,
"units": "mi/h"
},
"pressure": {
"value": 29.4165923579616,
"units": "inHg"
},
"visibility": {
"value": 7.456454306848007,
"units": "mi"
},
"dew_point": {
"value": 29.714,
"units": "F"
},
"cloud_cover": 22
}
],
"error": null
}

(Python) Cant scrape data from my targeted site anymore using re, requests, and json

I'm having a problem where i can scrape data from a website by using the java pathing. I'm trying to scrape from Rocket League Tracker.
here's my code:
import requests
import re
import json
import math
def rankGetter():
trackerLink = 'https://rocketleague.tracker.network/rocket-league/profile/epic/DirectPanda/overview'
# now we have the tracker link we're going to scrape the website
# all the HTML of the site is now in result
result = requests.get(trackerLink)
# checker to make sure the user used the correct information
if result.status_code == 400:
print('profile not found')
else:
# Extract everything needed to render the current page. Data is stored as Json in the
# JavaScript variable: window.__INITIAL_STATE__={"route":{"path":"\u0 ... }};
json_string = re.search(r"window.__INITIAL_STATE__\s?=\s?(\{.*?\});", result.text).group(1)
# convert text string to structured json data
rocketleague = json.loads(json_string)
# Save structured json data to a text file that helps you orient yourself and pick
# the parts you are interested in.
with open('rocketleague_json_data.txt', 'w') as outfile:
outfile.write(json.dumps(rocketleague, indent=4, sort_keys=True))
The error is the text doc made doesn't have the ranks I want anymore.
"stats": {
"standardLeaderboardLeaders": {},
"standardLeaderboards": [],
"standardPlayers": {},
"standardTitles": {}
},
**"stats-v2": {
"segments": {},
"standardProfileMatches": {},
"standardProfileSummaries": {},
"standardProfiles": {},
"standardProfilesHistory": {},
"standardSessions": {},
"subscriptions": {}
},**
"titles": {
"currentTitle": {
"name": "Rocket League",
"platforms": [
The Ranks should be under stats-V2 but as you can see its empty now.
whats happening and how do i fix it? I was able to get ranks for a week but all the sudden it stopped working today.

Seems that the data are loaded from external URL:
import json
import requests
url = "https://api.tracker.gg/api/v2/rocket-league/standard/profile/epic/DirectPanda"
headers = {
"User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:87.0) Gecko/20100101 Firefox/87.0"
}
data = requests.get(url, headers=headers).json()
print(json.dumps(data, indent=4))
Prints:
{
"data": {
"platformInfo": {
"platformSlug": "epic",
"platformUserId": null,
"platformUserHandle": "DirectPanda",
"platformUserIdentifier": "DirectPanda",
"avatarUrl": null,
"additionalParameters": null
},
"userInfo": {
"userId": null,
"isPremium": false,
"isVerified": false,
"isInfluencer": false,
"isPartner": false,
"countryCode": null,
"customAvatarUrl": null,
"customHeroUrl": null,
"socialAccounts": [],
"pageviews": 592,
"isSuspicious": null
},
"metadata": {
"lastUpdated": {
"value": "2021-04-22T17:39:42.277-04:00",
"displayValue": "2021-04-22T21:39:42.2770000+00:00"
},
"playerId": 16603481,
"currentSeason": 17
},
"segments": [
{
"type": "overview",
"attributes": {},
"metadata": {
"name": "Lifetime"
},
"expiryDate": "0001-01-01T00:00:00+00:00",
"stats": {
"wins": {
"rank": 30357,
"percentile": 98.3,
"displayName": "Wins",
"displayCategory": "Performance",
"category": "performance",
"metadata": {},
"value": 4985,
"displayValue": "4,985",
"displayType": "Number"
},
"goals": {
"rank": 23698,
"percentile": 98.7,
"displayName": "Goals",
"displayCategory": "Performance",
"category": "performance",
"metadata": {},
"value": 14363,
"displayValue": "14,363",
"displayType": "Number"
},
"mVPs": {
"rank": 35646,
"percentile": 98.0,
"displayName": "MVPs",
"displayCategory": "Performance",
"category": "performance",
"metadata": {},
"value": 2093,
"displayValue": "2,093",
"displayType": "Number"
},
"saves": {
"rank": 30864,
"percentile": 98.3,
"displayName": "Saves",
"displayCategory": "Performance",
"category": "performance",
"metadata": {},
"value": 9231,
"displayValue": "9,231",
"displayType": "Number"
},
"assists": {
"rank": 29228,
"percentile": 98.4,
"displayName": "Assists",
"displayCategory": "Performance",
"category": "performance",
"metadata": {},
"value": 4763,
"displayValue": "4,763",
"displayType": "Number"
},
"shots": {
"rank": 24596,
"percentile": 98.6,
"displayName": "Shots",
"displayCategory": "Performance",
"category": "performance",
"metadata": {},
"value": 29139,
"displayValue": "29,139",
"displayType": "Number"
},
"goalShotRatio": {
"rank": 1409320,
"percentile": 15.0,
"displayName": "Goal Shot Ratio",
"displayCategory": "Performance",
"category": "performance",
"metadata": {},
"value": 49.29132777377398,
"displayValue": "49.3",
"displayType": "NumberPrecision1"
},
"score": {
"rank": 28260,
"percentile": 98.4,
"displayName": "TRN Score",
"displayCategory": "General",
"category": "general",
"metadata": {},
"value": 2398222.83,
"displayValue": "2,398,222.8",
"displayType": "NumberPrecision1"
},
"seasonRewardLevel": {
"rank": null,
"percentile": 85.0,
"displayName": "Season Reward Level",
"displayCategory": "General",
"category": "general",
"metadata": {
"iconUrl": "https://trackercdn.com/cdn/tracker.gg/rocket-league/ranks/s4-13.png",
"rankName": "Diamond"
},
"value": 5,
"displayValue": "5",
"displayType": "Number"
},
"seasonRewardWins": {
"rank": null,
"percentile": 95.8,
"displayName": "Season Reward Wins",
"displayCategory": "General",
"category": "general",
"metadata": {},
"value": 9,
"displayValue": "9",
"displayType": "Number"
}
}
},
{
"type": "playlist",
"attributes": {
"playlistId": 0,
"season": 17
},
"metadata": {
"name": "Un-Ranked"
},
"expiryDate": "0001-01-01T00:00:00+00:00",
"stats": {
"tier": {
"rank": null,
"percentile": null,
"displayName": "Matches",
"displayCategory": "General",
"category": "general",
"metadata": {
"iconUrl": "https://trackercdn.com/cdn/tracker.gg/rocket-league/ranks/s4-0.png",
"name": "Unranked"
},
"value": 0,
"displayValue": "0",
"displayType": "Number"
},
"division": {
"rank": null,
"percentile": null,
"displayName": "Matches",
"displayCategory": "General",
"category": "general",
"metadata": {
"name": "Division I"
},
"value": 0,
"displayValue": "0",
"displayType": "Number"
},
"matchesPlayed": {
"rank": null,
"percentile": null,
"displayName": "Matches",
"displayCategory": "Performance",
"category": "performance",
"metadata": {},
"value": 0,
"displayValue": "0",
"displayType": "Number"
},
"winStreak": {
"rank": null,
"percentile": null,
"displayName": "WinStreak",
"displayCategory": "Performance",
"category": "performance",
"metadata": {
"type": "win"
},
"value": 0,
"displayValue": "0",
"displayType": "Number"
},
"rating": {
"rank": 215152,
"percentile": 90.0,
"displayName": "Rating",
"displayCategory": "Skill",
"category": "skill",
"metadata": {},
"value": 1597,
"displayValue": "1,597",
"displayType": "Number"
}
}
},
{
"type": "playlist",
"attributes": {
"playlistId": 10,
"season": 17
},
"metadata": {
"name": "Ranked Duel 1v1"
},
"expiryDate": "0001-01-01T00:00:00+00:00",
"stats": {
"tier": {
"rank": null,
"percentile": 98.2,
"displayName": "Matches",
"displayCategory": "General",
"category": "general",
"metadata": {
"iconUrl": "https://trackercdn.com/cdn/tracker.gg/rocket-league/ranks/s4-16.png",
"name": "Champion I"
},
"value": 16,
"displayValue": "16",
"displayType": "Number"
},
"division": {
"rank": null,
"percentile": 88.0,
"displayName": "Matches",
"displayCategory": "General",
"category": "general",
"metadata": {
"deltaDown": 13,
"deltaUp": 6,
"name": "Division III"
},
"value": 2,
"displayValue": "2",
"displayType": "Number"
},
"matchesPlayed": {
"rank": null,
"percentile": 57.0,
"displayName": "Matches",
"displayCategory": "Performance",
"category": "performance",
"metadata": {},
"value": 2,
"displayValue": "2",
"displayType": "Number"
},
"winStreak": {
"rank": null,
"percentile": 60.0,
"displayName": "WinStreak",
"displayCategory": "Performance",
"category": "performance",
"metadata": {
"type": "win"
},
"value": 1,
"displayValue": "1",
"displayType": "Number"
},
"rating": {
"rank": 101541,
"percentile": 96.1,
"displayName": "Rating",
"displayCategory": "Skill",
"category": "skill",
"metadata": {},
"value": 1031,
"displayValue": "1,031",
"displayType": "Number"
}
}
},
{
"type": "playlist",
"attributes": {
"playlistId": 11,
"season": 17
},
"metadata": {
"name": "Ranked Doubles 2v2"
},
"expiryDate": "0001-01-01T00:00:00+00:00",
"stats": {
"tier": {
"rank": null,
"percentile": 87.0,
"displayName": "Matches",
"displayCategory": "General",
"category": "general",
"metadata": {
"iconUrl": "https://trackercdn.com/cdn/tracker.gg/rocket-league/ranks/s4-16.png",
"name": "Champion I"
},
"value": 16,
"displayValue": "16",
"displayType": "Number"
},
"division": {
"rank": null,
"percentile": 90.0,
"displayName": "Matches",
"displayCategory": "General",
"category": "general",
"metadata": {
"deltaDown": 15,
"deltaUp": 3,
"name": "Division IV"
},
"value": 3,
"displayValue": "3",
"displayType": "Number"
},
"matchesPlayed": {
"rank": null,
"percentile": 80.0,
"displayName": "Matches",
"displayCategory": "Performance",
"category": "performance",
"metadata": {},
"value": 40,
"displayValue": "40",
"displayType": "Number"
},
"winStreak": {
"rank": null,
"percentile": 34.0,
"displayName": "WinStreak",
"displayCategory": "Performance",
"category": "performance",
"metadata": {
"type": "loss"
},
"value": 1,
"displayValue": "-1",
"displayType": "Number"
},
"rating": {
"rank": 311789,
"percentile": 89.0,
"displayName": "Rating",
"displayCategory": "Skill",
"category": "skill",
"metadata": {},
"value": 1177,
"displayValue": "1,177",
"displayType": "Number"
}
}
},
{
"type": "playlist",
"attributes": {
"playlistId": 13,
"season": 17
},
"metadata": {
"name": "Ranked Standard 3v3"
},
"expiryDate": "0001-01-01T00:00:00+00:00",
"stats": {
"tier": {
"rank": null,
"percentile": 96.0,
"displayName": "Matches",
"displayCategory": "General",
"category": "general",
"metadata": {
"iconUrl": "https://trackercdn.com/cdn/tracker.gg/rocket-league/ranks/s4-17.png",
"name": "Champion II"
},
"value": 17,
"displayValue": "17",
"displayType": "Number"
},
"division": {
"rank": null,
"percentile": 79.0,
"displayName": "Matches",
"displayCategory": "General",
"category": "general",
"metadata": {
"deltaDown": 7,
"deltaUp": 27,
"name": "Division III"
},
"value": 2,
"displayValue": "2",
"displayType": "Number"
},
"matchesPlayed": {
"rank": null,
"percentile": 97.8,
"displayName": "Matches",
"displayCategory": "Performance",
"category": "performance",
"metadata": {},
"value": 95,
"displayValue": "95",
"displayType": "Number"
},
"winStreak": {
"rank": null,
"percentile": 16.0,
"displayName": "WinStreak",
"displayCategory": "Performance",
"category": "performance",
"metadata": {
"type": "loss"
},
"value": 2,
"displayValue": "-2",
"displayType": "Number"
},
"rating": {
"rank": 122500,
"percentile": 95.8,
"displayName": "Rating",
"displayCategory": "Skill",
"category": "skill",
"metadata": {},
"value": 1255,
"displayValue": "1,255",
"displayType": "Number"
}
}
},
...

How to join document in search query

{
"took": 0,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 3,
"max_score": 0.2876821,
"hits": [
{
"_index": "product_index",
"_type": "product",
"_id": "1115",
"_score": 0.2876821,
"_source": {
"isactive": true,
"in_use": false,
"brand_name": "Adidas",
"sku_id": "56456487987987",
"long_description": "this is long description",
"key_feature": [
{
"id": 1148,
"key_feature": "sport wear"
},
{
"id": 1147,
"key_feature": "Cotton shirt"
},
{
"id": 1146,
"key_feature": "White and blue"
}
],
"isdeleted": false,
"created_by": null,
"brand_id": 5,
"search_terms": [
{
"label": "white shirt",
"value": 9
}
]
"color_id": 2,
"specific_keywords": "",
"item_list": [
{
"item_id": 1114,
"product_id": 1115,
"isactive": true,
"id": 9,
"isdeleted": false
},
{
"item_id": 1113,
"product_id": 1115,
"isactive": true,
"id": 10,
"isdeleted": false
}
],
"upc_code": "",
"display_size": "L",
"name": "New White shirt",
"updated_by": null,
"id": 1115,
"updated_date": "2020-03-25T08:24:37.644571+00:00",
"color_name": "blue",
"created_date": "2020-03-25T08:11:14.966673+00:00",
"category": [
{
"parent_category_id": 78,
"sub_sub_category": null,
"sub_category": null,
"sub_category_id": null,
"sub_sub_category_id": null,
"parent_category": "new Shirt Cate",
"id": 1151
}
]
}
},
{
"_index": "product_index",
"_type": "product",
"_id": "1113",
"_score": 0.2876821,
"_source": {
"isactive": true,
"in_use": false,
"sku_id": "1456456488",
"brand_name": "Adidas",
"long_description": "",
"key_feature": [
{
"id": 1142,
"key_feature": "Cotton"
},
{
"id": 1141,
"key_feature": "Office Use"
},
{
"id": 1140,
"key_feature": "Black formal"
}
],
"isdeleted": false,
"created_by": null,
"brand_id": 5,
"search_terms": [
]
"color_id": 1,
"specific_keywords": "",
"item_list": [
],
"display_size": "L",
"upc_code": "",
"name": "New Cotton formal shirt black",
"updated_by": null,
"id": 1113,
"updated_date": "2020-03-25T06:48:30.903041+00:00",
"created_date": "2020-03-25T06:48:29.943043+00:00",
"color_name": "black",
"category": [
{
"sub_sub_category": null,
"parent_category_id": 54,
"sub_category": null,
"sub_category_id": null,
"sub_sub_category_id": null,
"parent_category": "MEN'S CLOTHING",
"id": 1149
}
]
}
},
{
"_index": "product_index",
"_type": "product",
"_id": "1114",
"_score": 0.2876821,
"_source": {
"isactive": true,
"in_use": false,
"sku_id": "145645648811",
"brand_name": "Adidas",
"long_description": "",
"key_feature": [
{
"id": 1145,
"key_feature": "Cotton"
},
{
"id": 1144,
"key_feature": "Office Use"
},
{
"id": 1143,
"key_feature": "Black formal"
}
],
"isdeleted": false,
"created_by": null,
"brand_id": 5,
"search_terms": [
],
"color_id": 1,
"specific_keywords": "",
"item_list": [
],
"display_size": "L",
"upc_code": "",
"updated_by": null,
"name": "New Cotton Casual shirt black",
"id": 1114,
"created_date": "2020-03-25T07:13:26.233675+00:00",
"color_name": "black",
"updated_date": "2020-03-25T07:13:27.229363+00:00",
"category": [
{
"sub_sub_category": null,
"parent_category_id": 54,
"sub_category": null,
"sub_category_id": null,
"sub_sub_category_id": null,
"parent_category": "MEN'S CLOTHING",
"id": 1150
}
]
}
}
]
}
}
my requirement is to attach all related documents with specific key value fields which is specify in item_list based on item_id. In above result doc id 1115 has item_list which contains item_id 1114 and 1113. so the particular fields attach in the doc 1115.
what should be the search query for that in elastic search?

You can't do join in Elasticsearch, to achieve your goal, you can do two things:
duplicate the information of item_id 1114 and 1113 in the item_id
1115 (and for sure in all others documents).
Do join at application level, so after this query you can extract the item_id 1114 and 1113 and run two others query to get the information about this items. Then join all the json at application level.

Get different values from repeating item JSON

I have this json derived dict:
{
"stats": [
{
"name": "Jengas",
"time": 166,
"uid": "177098244407558145",
"id": 1
},
{
"name": "- k",
"time": 20,
"uid": "199295228664872961",
"id": 2
},
{
"name": "MAD MARX",
"time": "0",
"uid": "336539711785009153",
"id": 3
},
{
"name": "loli",
"time": 20,
"uid": "366299640976375818",
"id": 4
},
{
"name": "Woona",
"time": 20,
"uid": "246996981178695686",
"id": 5
}
]
}
I want to get the "time" from everybody in the list and use it with sort.
So the result I get has this:
TOP 10:
Jengas: 166
Loli: 20
My first try is to list different values from repeating item.
Right now the code is:
with open('db.json') as json_data:
topvjson = json.load(json_data)
print(topvjson)
d = topvjson['stats'][0]['time']
print(d)

Extract the stats list, apply sort to it with the appropriate key:
from json import loads
data = loads("""{
"stats": [{
"name": "Jengas",
"time": 166,
"uid": "177098244407558145",
"id": 1
}, {
"name": "- k",
"time": 20,
"uid": "199295228664872961",
"id": 2
}, {
"name": "MAD MARX",
"time": "0",
"uid": "336539711785009153",
"id": 3
}, {
"name": "loli",
"time": 20,
"uid": "366299640976375818",
"id": 4
}, {
"name": "Woona",
"time": 20,
"uid": "246996981178695686",
"id": 5
}]
}""")
stats = data['stats']
stats.sort(key = lambda entry: int(entry['time']), reverse=True)
print("TOP 10:")
for entry in stats[:10]:
print("%s: %d" % (entry['name'], int(entry['time'])))
This prints:
TOP 10:
Jengas: 166
- k: 20
loli: 20
Woona: 20
MAD MARX: 0
Note that your time is neither an integer nor string: there are both 0 and "0" in the dataset. That's why you need the conversion int(...).

You can sort the list of dict values like:
Code:
top_three = [(x[1], -x[0]) for x in sorted(
(-int(user['time']), user['name']) for user in stats['stats'])][:3]
This works by taking the time and the name and building a tuple. The tuples can the be sorted, and then the names can be extracted (via: x[1]) after the sort.
Test Code:
stats = {
"stats": [{
"name": "Jengas",
"time": 166,
"uid": "177098244407558145",
"id": 1
}, {
"name": "- k",
"time": 20,
"uid": "199295228664872961",
"id": 2
}, {
"name": "MAD MARX",
"time": "0",
"uid": "336539711785009153",
"id": 3
}, {
"name": "loli",
"time": 20,
"uid": "366299640976375818",
"id": 4
}, {
"name": "Woona",
"time": 20,
"uid": "246996981178695686",
"id": 5
}]
}
top_three = [x[1] for x in sorted(
(-int(user['time']), user['name']) for user in stats['stats'])][:3]
print(top_three)
Results:
[('Jengas', 166), ('- k', 20), ('Woona', 20)]

Here's a way to do it using the built-in sorted() function:
data = {
"stats": [
{
"name": "Jengas",
"time": 166,
"uid": "177098244407558145",
"id": 1
},
{
etc ...
}
]
}
print('TOP 3')
sorted_by_time = sorted(data['stats'], key=lambda d: int(d['time']), reverse=True)
for i, d in enumerate(sorted_by_time, 1):
if i > 3: break
print('{name}: {time}'.format(**d))
Output:
TOP 3
Jengas: 166
- k: 20
loli: 20

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Trasform Json into Pandas Dataframe - python

Related

How to get data from nested list in response.json()

Python - trying to convert time from utc to cst in api response

(Python) Cant scrape data from my targeted site anymore using re, requests, and json

How to join document in search query

Get different values from repeating item JSON

Categories

Resources