Related
I have a JSON feed i need to collect data from but I am running into some issues with iterating and parsing the API data.
The function below collects what I need but some horses get scratched (dont race) and the format of their JSON is different to horses who's status is "Starter".
def race_data():
for race_key in races_list():
data = requests.get(f'{Base_url}events/{race_key}.json?app_id={AppID}&app_key={AppKey}').text
# parse race data
data = json.loads(data)
for race_odds in data['competitors']:
horse_name = race_odds['name']
jockey_name = race_odds['jockey']
jockey_weight = race_odds['weight']
trainer_name = race_odds['trainer']
fixed_win_odds = race_odds['prices'][2]['price']
fixed_place_odds = race_odds['prices'][0]['price']
race_key = data['eventKey']
race_time = data['eventDateTimeUtc']
horse_status = race_odds['status']
print(f'{race_key} {horse_name} {jockey_name} {jockey_weight} {trainer_name} {fixed_win_odds} {fixed_place_odds} {race_time} {horse_status}')
if __name__ == "__main__":
race_data()
which produces
202210290200.T.AUS.quirindi.1 Ponte Pietra Jai Williams 0 Nikki Pollock 1.1 16.0 2022-10-29T02:55:00Z Starter
202210290200.T.AUS.quirindi.1 Gundawarra Vad Bolozhinskyi 0 G A O'brien 4.5 35.0 2022-10-29T02:55:00Z Starter
202210290200.T.AUS.quirindi.1 Cemented Ms Chelsea Hillier 0 Sally Torrens 2.5 35.0 2022-10-29T02:55:00Z Starter
202210290200.T.AUS.quirindi.1 Jack Duggan Ms Madeline Owen 0 J C Deamer 1.1 2.7 2022-10-29T02:55:00Z Starter
202210290200.T.AUS.quirindi.1 Northern Borders Ms Zara Lewis 0 W T Martyn 9.6 68.1 2022-10-29T02:55:00Z Starter
202210290200.T.AUS.quirindi.1 Iffland Ms Amelia Denby 0 K A Lees 1.9 19.4 2022-10-29T02:55:00Z Starter
202210290200.T.AUS.quirindi.1 La Brea Benjamin Osmond 0 K A Lees 2.1 14.0 2022-10-29T02:55:00Z Starter
202210290200.T.AUS.quirindi.1 My Gem Jacob Golden 0 Ms S Grills 2.8 16.1 2022-10-29T02:55:00Z Starter
202210290200.T.AUS.quirindi.1 Another Super Patrick Scorse 0 M J Dwyer 4.9 87.5 2022-10-29T02:55:00Z Starter
I run into an issue when one of the runners is scratched (not racing)
the problem is with the odds price field. When a horse is scratched (defined in horse_status)
fixed_win_odds = race_odds['prices'][2]['price']
fixed_place_odds = race_odds['prices'][0]['price']
The JSON below is for a horse that is scratched:
{
"status": "Scratched",
"startPos": 3,
"trainer": "Ms L Selby",
"shortForm": "0608247373",
"sequence": 7,
"weight": "0",
"name": "Bingo Banko",
"jockey": "Unknown",
"prices":
[
{
"betType": "FixedPlace",
"code": "FXD",
"flucs": [],
"price": 0
},
{
"betType": "FixedWin",
"code": "FXD",
"flucs":
[
{
"price": 0,
"productType": "Current"
},
{
"price": 16,
"productType": "Max"
},
{
"price": 16,
"productType": "Open"
},
{
"price": 15,
"productType": "Last"
},
{
"price": 16,
"productType": "Last"
}
],
"price": 0
}
],
"id": "competitor:202210290200.T.AUS.quirindi.1.bingo_banko",
"competitorKey": "202210290200.T.AUS.quirindi.1.bingo_banko"
}
and this is what the JSON looks like when the horse is a starter (not scratched)
{
"status": "Starter",
"startPos": 9,
"trainer": "M J Dwyer",
"shortForm": "7626280005",
"sequence": 6,
"weight": "0",
"name": "Another Super",
"jockey": "Patrick Scorse",
"prices":
[
{
"betType": "Win",
"code": "BT3",
"flucs": [],
"price": 85
},
{
"betType": "FixedPlace",
"code": "FXD",
"flucs": [],
"price": 3.9
},
{
"betType": "Place",
"code": "BT2P",
"flucs": [],
"price": 5.7
},
{
"betType": "FixedWin",
"code": "FXD",
"flucs":
[
{
"price": 17,
"productType": "Current"
},
{
"price": 20,
"productType": "Max"
},
{
"price": 19,
"productType": "Open"
},
{
"price": 17,
"productType": "Last"
},
{
"price": 18,
"productType": "Last"
},
{
"price": 17,
"productType": "Last"
},
{
"price": 16,
"productType": "Last"
},
{
"price": 15,
"productType": "Last"
}
],
"price": 17
}
],
"id": "competitor:202210290200.T.AUS.quirindi.1.another_super",
"competitorKey": "202210290200.T.AUS.quirindi.1.another_super"
}
I still need to collect the information on the horses that are scratched, so need some help with formatting the iteration to collect the data without getting key error on horses that are scratched.
you can view the full JSON for a race below for reference:
[
{
"status": "Starter",
"startPos": 5,
"trainer": "Nikki Pollock",
"shortForm": "x686623464",
"sequence": 3,
"weight": "0",
"name": "Ponte Pietra",
"jockey": "Jai Williams",
"prices":
[
{
"betType": "Win",
"code": "BT3",
"flucs": [],
"price": 15.6
},
{
"betType": "FixedPlace",
"code": "FXD",
"flucs": [],
"price": 2.6
},
{
"betType": "Place",
"code": "BT2P",
"flucs": [],
"price": 1.04
},
{
"betType": "FixedWin",
"code": "FXD",
"flucs":
[
{
"price": 10,
"productType": "Current"
},
{
"price": 16,
"productType": "Max"
},
{
"price": 16,
"productType": "Open"
},
{
"price": 10,
"productType": "Last"
},
{
"price": 9,
"productType": "Last"
},
{
"price": 9,
"productType": "Last"
},
{
"price": 8,
"productType": "Last"
},
{
"price": 8,
"productType": "Last"
}
],
"price": 10
}
],
"id": "competitor:202210290200.T.AUS.quirindi.1.ponte_pietra",
"competitorKey": "202210290200.T.AUS.quirindi.1.ponte_pietra"
},
{
"status": "Starter",
"startPos": 4,
"trainer": "G A O'brien",
"shortForm": "90x4491",
"sequence": 4,
"weight": "0",
"name": "Gundawarra",
"jockey": "Vad Bolozhinskyi",
"prices":
[
{
"betType": "Win",
"code": "BT3",
"flucs": [],
"price": 34
},
{
"betType": "FixedPlace",
"code": "FXD",
"flucs": [],
"price": 1.45
},
{
"betType": "Place",
"code": "BT2P",
"flucs": [],
"price": 11
},
{
"betType": "FixedWin",
"code": "FXD",
"flucs":
[
{
"price": 3,
"productType": "Current"
},
{
"price": 3,
"productType": "Max"
},
{
"price": 3,
"productType": "Open"
},
{
"price": 3,
"productType": "Last"
},
{
"price": 3,
"productType": "Last"
},
{
"price": 3,
"productType": "Last"
},
{
"price": 3,
"productType": "Last"
},
{
"price": 3,
"productType": "Last"
}
],
"price": 3.7
}
],
"id": "competitor:202210290200.T.AUS.quirindi.1.gundawarra",
"competitorKey": "202210290200.T.AUS.quirindi.1.gundawarra"
},
{
"status": "Starter",
"startPos": 6,
"trainer": "Sally Torrens",
"shortForm": "7816x64858",
"sequence": 1,
"weight": "0",
"name": "Cemented",
"jockey": "Ms Chelsea Hillier",
"prices":
[
{
"betType": "Win",
"code": "BT3",
"flucs": [],
"price": 85
},
{
"betType": "FixedPlace",
"code": "FXD",
"flucs": [],
"price": 3.1
},
{
"betType": "Place",
"code": "BT2P",
"flucs": [],
"price": 3
},
{
"betType": "FixedWin",
"code": "FXD",
"flucs":
[
{
"price": 13,
"productType": "Current"
},
{
"price": 19,
"productType": "Max"
},
{
"price": 19,
"productType": "Open"
},
{
"price": 13,
"productType": "Last"
},
{
"price": 14,
"productType": "Last"
},
{
"price": 16,
"productType": "Last"
},
{
"price": 15,
"productType": "Last"
},
{
"price": 16,
"productType": "Last"
}
],
"price": 13
}
],
"id": "competitor:202210290200.T.AUS.quirindi.1.cemented",
"competitorKey": "202210290200.T.AUS.quirindi.1.cemented"
},
{
"status": "Starter",
"startPos": 2,
"trainer": "J C Deamer",
"shortForm": "75x5456x73",
"sequence": 2,
"weight": "0",
"name": "Jack Duggan",
"jockey": "Ms Madeline Owen",
"prices":
[
{
"betType": "Win",
"code": "BT3",
"flucs": [],
"price": 3
},
{
"betType": "FixedPlace",
"code": "FXD",
"flucs": [],
"price": 1.14
},
{
"betType": "Place",
"code": "BT2P",
"flucs": [],
"price": 1.04
},
{
"betType": "FixedWin",
"code": "FXD",
"flucs":
[
{
"price": 1,
"productType": "Current"
},
{
"price": 1,
"productType": "Max"
},
{
"price": 1,
"productType": "Open"
},
{
"price": 1,
"productType": "Last"
},
{
"price": 1,
"productType": "Last"
},
{
"price": 1,
"productType": "Last"
},
{
"price": 1,
"productType": "Last"
},
{
"price": 1,
"productType": "Last"
}
],
"price": 1.8
}
],
"id": "competitor:202210290200.T.AUS.quirindi.1.jack_duggan",
"competitorKey": "202210290200.T.AUS.quirindi.1.jack_duggan"
},
{
"status": "Starter",
"startPos": 10,
"trainer": "W T Martyn",
"shortForm": "00409",
"sequence": 8,
"weight": "0",
"name": "Northern Borders",
"jockey": "Ms Zara Lewis",
"prices":
[
{
"betType": "Win",
"code": "BT3",
"flucs": [],
"price": 116
},
{
"betType": "FixedPlace",
"code": "FXD",
"flucs": [],
"price": 8
},
{
"betType": "Place",
"code": "BT2P",
"flucs": [],
"price": 9
},
{
"betType": "FixedWin",
"code": "FXD",
"flucs":
[
{
"price": 41,
"productType": "Current"
},
{
"price": 51,
"productType": "Max"
},
{
"price": 34,
"productType": "Open"
},
{
"price": 41,
"productType": "Last"
},
{
"price": 51,
"productType": "Last"
},
{
"price": 41,
"productType": "Last"
},
{
"price": 51,
"productType": "Last"
},
{
"price": 41,
"productType": "Last"
}
],
"price": 41
}
],
"id": "competitor:202210290200.T.AUS.quirindi.1.northern_borders",
"competitorKey": "202210290200.T.AUS.quirindi.1.northern_borders"
},
{
"status": "Starter",
"startPos": 8,
"trainer": "K A Lees",
"shortForm": "6",
"sequence": 9,
"weight": "0",
"name": "Iffland",
"jockey": "Ms Amelia Denby",
"prices":
[
{
"betType": "Win",
"code": "BT3",
"flucs": [],
"price": 33.5
},
{
"betType": "FixedPlace",
"code": "FXD",
"flucs": [],
"price": 3.3
},
{
"betType": "Place",
"code": "BT2P",
"flucs": [],
"price": 1.7
},
{
"betType": "FixedWin",
"code": "FXD",
"flucs":
[
{
"price": 14,
"productType": "Current"
},
{
"price": 20,
"productType": "Max"
},
{
"price": 17,
"productType": "Open"
},
{
"price": 14,
"productType": "Last"
},
{
"price": 16,
"productType": "Last"
},
{
"price": 14,
"productType": "Last"
},
{
"price": 16,
"productType": "Last"
},
{
"price": 14,
"productType": "Last"
}
],
"price": 14
}
],
"id": "competitor:202210290200.T.AUS.quirindi.1.iffland",
"competitorKey": "202210290200.T.AUS.quirindi.1.iffland"
},
{
"status": "Starter",
"startPos": 7,
"trainer": "K A Lees",
"shortForm": "55x570x58",
"sequence": 10,
"weight": "0",
"name": "La Brea",
"jockey": "Benjamin Osmond",
"prices":
[
{
"betType": "Win",
"code": "BT3",
"flucs": [],
"price": 43.3
},
{
"betType": "FixedPlace",
"code": "FXD",
"flucs": [],
"price": 2.6
},
{
"betType": "Place",
"code": "BT2P",
"flucs": [],
"price": 1.9
},
{
"betType": "FixedWin",
"code": "FXD",
"flucs":
[
{
"price": 10,
"productType": "Current"
},
{
"price": 13,
"productType": "Max"
},
{
"price": 11,
"productType": "Open"
},
{
"price": 10,
"productType": "Last"
},
{
"price": 9,
"productType": "Last"
},
{
"price": 9,
"productType": "Last"
},
{
"price": 8,
"productType": "Last"
},
{
"price": 9,
"productType": "Last"
}
],
"price": 10
}
],
"id": "competitor:202210290200.T.AUS.quirindi.1.la_brea",
"competitorKey": "202210290200.T.AUS.quirindi.1.la_brea"
},
{
"status": "Starter",
"startPos": 1,
"trainer": "Ms S Grills",
"shortForm": "673410x084",
"sequence": 5,
"weight": "0",
"name": "My Gem",
"jockey": "Jacob Golden",
"prices":
[
{
"betType": "Win",
"code": "BT3",
"flucs": [],
"price": 14.9
},
{
"betType": "FixedPlace",
"code": "FXD",
"flucs": [],
"price": 3.7
},
{
"betType": "Place",
"code": "BT2P",
"flucs": [],
"price": 3.2
},
{
"betType": "FixedWin",
"code": "FXD",
"flucs":
[
{
"price": 16,
"productType": "Current"
},
{
"price": 16,
"productType": "Max"
},
{
"price": 13,
"productType": "Open"
},
{
"price": 16,
"productType": "Last"
},
{
"price": 15,
"productType": "Last"
},
{
"price": 14,
"productType": "Last"
},
{
"price": 16,
"productType": "Last"
},
{
"price": 15,
"productType": "Last"
}
],
"price": 16
}
],
"id": "competitor:202210290200.T.AUS.quirindi.1.my_gem",
"competitorKey": "202210290200.T.AUS.quirindi.1.my_gem"
},
{
"status": "Starter",
"startPos": 9,
"trainer": "M J Dwyer",
"shortForm": "7626280005",
"sequence": 6,
"weight": "0",
"name": "Another Super",
"jockey": "Patrick Scorse",
"prices":
[
{
"betType": "Win",
"code": "BT3",
"flucs": [],
"price": 85
},
{
"betType": "FixedPlace",
"code": "FXD",
"flucs": [],
"price": 3.9
},
{
"betType": "Place",
"code": "BT2P",
"flucs": [],
"price": 5.7
},
{
"betType": "FixedWin",
"code": "FXD",
"flucs":
[
{
"price": 17,
"productType": "Current"
},
{
"price": 20,
"productType": "Max"
},
{
"price": 19,
"productType": "Open"
},
{
"price": 17,
"productType": "Last"
},
{
"price": 18,
"productType": "Last"
},
{
"price": 17,
"productType": "Last"
},
{
"price": 16,
"productType": "Last"
},
{
"price": 15,
"productType": "Last"
}
],
"price": 17
}
],
"id": "competitor:202210290200.T.AUS.quirindi.1.another_super",
"competitorKey": "202210290200.T.AUS.quirindi.1.another_super"
},
{
"status": "Scratched",
"startPos": 3,
"trainer": "Ms L Selby",
"shortForm": "0608247373",
"sequence": 7,
"weight": "0",
"name": "Bingo Banko",
"jockey": "Unknown",
"prices":
[
{
"betType": "FixedPlace",
"code": "FXD",
"flucs": [],
"price": 0
},
{
"betType": "FixedWin",
"code": "FXD",
"flucs":
[
{
"price": 0,
"productType": "Current"
},
{
"price": 16,
"productType": "Max"
},
{
"price": 16,
"productType": "Open"
},
{
"price": 15,
"productType": "Last"
},
{
"price": 16,
"productType": "Last"
}
],
"price": 0
}
],
"id": "competitor:202210290200.T.AUS.quirindi.1.bingo_banko",
"competitorKey": "202210290200.T.AUS.quirindi.1.bingo_banko"
}
]
thanks for your assistance.
Use a for loop to iterate through the prices, and check the betType key to see if it's either FixedWin or FixedPlace, that way the ordering of the different prices doesn't matter.
Also since the race_key and the race event are always the same you should define them at the top of the function so your implementation doesn't need to keep checking the dictionary for them.
def race_data():
for race_key in races_list():
data = requests.get(f'{Base_url}events/{race_key}.json?app_id={AppID}&app_key={AppKey}').text
# parse race data
data = json.loads(data)
race_key = data['eventKey']
race_time = data['eventDateTimeUtc']
for race_odds in data:
horse_name = race_odds['name']
jockey_name = race_odds['jockey']
jockey_weight = race_odds['weight']
trainer_name = race_odds['trainer']
for price in race_odds['prices']:
if price["betType"] == "FixedWin":
fixed_win_odds = price["price"]
elif price["betType"] == "FixedPlace":
fixed_place_odds = price["price"]
horse_status = race_odds['status']
print(f'{race_key} {horse_name} {jockey_name} {jockey_weight} {trainer_name} {fixed_win_odds} {fixed_place_odds} {race_time} {horse_status}')
if __name__ == "__main__":
race_data()
I have the following Dataframe with MultiIndex rows in pandas.
time available_slots status
month day
1 1 10:00:00 1 AVAILABLE
1 12:00:00 1 AVAILABLE
1 14:00:00 1 AVAILABLE
1 16:00:00 1 AVAILABLE
1 18:00:00 1 AVAILABLE
2 10:00:00 1 AVAILABLE
... ... ... ...
2 28 12:00:00 1 AVAILABLE
28 14:00:00 1 AVAILABLE
28 16:00:00 1 AVAILABLE
28 18:00:00 1 AVAILABLE
28 20:00:00 1 AVAILABLE
And I need to transform it to a hierarchical nested JSON as this:
[
{
"month": 1,
"days": [
{
"day": 1,
"slots": [
{
"time": "10:00:00",
"available_slots": 1,
"status": "AVAILABLE"
},
{
"time": "12:00:00",
"available_slots": 1,
"status": "AVAILABLE"
},
...
]
},
{
"day": 2,
"slots": [
...
]
}
]
},
{
"month": 2,
"days":[
{
"day": 1,
"slots": [
...
]
}
]
},
...
]
Unfortunately, it is not as easy as doing df.to_json(orient="index").
Does anyone know if there is a method in pandas to perform this kind of transformations? or in what way I could iterate over the DataFrame to build the final object?
Here's one way. Basically repeated groupby + apply(to_dict) + reset_index until we get the desired shape:
out = (df.groupby(level=[0,1])
.apply(lambda x: x.to_dict('records'))
.reset_index()
.rename(columns={0:'slots'})
.groupby('month')
.apply(lambda x: x[['day','slots']].to_dict('records'))
.reset_index()
.rename(columns={0:'days'})
.to_json(orient='records', indent=True)
)
Output:
[
{
"month":1,
"days":[
{
"day":1,
"slots":[
{
"time":"10:00:00",
"available_slots":1,
"status":"AVAILABLE"
},
{
"time":"12:00:00",
"available_slots":1,
"status":"AVAILABLE"
},
{
"time":"14:00:00",
"available_slots":1,
"status":"AVAILABLE"
},
{
"time":"16:00:00",
"available_slots":1,
"status":"AVAILABLE"
},
{
"time":"18:00:00",
"available_slots":1,
"status":"AVAILABLE"
}
]
},
{
"day":2,
"slots":[
{
"time":"10:00:00",
"available_slots":1,
"status":"AVAILABLE"
}
]
}
]
},
{
"month":2,
"days":[
{
"day":28,
"slots":[
{
"time":"12:00:00",
"available_slots":1,
"status":"AVAILABLE"
},
{
"time":"14:00:00",
"available_slots":1,
"status":"AVAILABLE"
},
{
"time":"16:00:00",
"available_slots":1,
"status":"AVAILABLE"
},
{
"time":"18:00:00",
"available_slots":1,
"status":"AVAILABLE"
},
{
"time":"20:00:00",
"available_slots":1,
"status":"AVAILABLE"
}
]
}
]
}
]
You can use a double loop for each level of your index:
data = []
for month, df1 in df.groupby(level=0):
data.append({'month': month, 'days': []})
for day, df2 in df1.groupby(level=1):
data[-1]['days'].append({'day': day, 'slots': df2.to_dict('records')})
Output:
import json
print(json.dumps(data, indent=2))
[
{
"month": 1,
"days": [
{
"day": 1,
"slots": [
{
"time": "10:00:00",
"available_slots": 1,
"status": "AVAILABLE"
},
{
"time": "12:00:00",
"available_slots": 1,
"status": "AVAILABLE"
},
{
"time": "14:00:00",
"available_slots": 1,
"status": "AVAILABLE"
},
{
"time": "16:00:00",
"available_slots": 1,
"status": "AVAILABLE"
},
{
"time": "18:00:00",
"available_slots": 1,
"status": "AVAILABLE"
}
]
},
{
"day": 2,
"slots": [
{
"time": "10:00:00",
"available_slots": 1,
"status": "AVAILABLE"
}
]
}
]
},
{
"month": 2,
"days": [
{
"day": 28,
"slots": [
{
"time": "12:00:00",
"available_slots": 1,
"status": "AVAILABLE"
},
{
"time": "14:00:00",
"available_slots": 1,
"status": "AVAILABLE"
},
{
"time": "18:00:00",
"available_slots": 1,
"status": "AVAILABLE"
},
{
"time": "20:00:00",
"available_slots": 1,
"status": "AVAILABLE"
}
]
}
]
}
]
In a Django application, I want to use a dictionary as elements of a result.html page:
<tbody>
{% for element in products%}
<tr>
<td>{{ element['q0']['Results'][0]['Name'] }}</td>
</tr>
{% endfor %}
</tbody>
But it returns Could not parse the remainder: '['q0']['Results'][0]['Name']' from 'element['q0']['Results'][0]['Name']':
return render(request, 'todo/result.html', {'products': top_products})
File "C:\Python36\lib\site-packages\django\shortcuts.py", line 19, in render
content = loader.render_to_string(template_name, context, request, using=using)
File "C:\Python36\lib\site-packages\django\template\loader.py", line 61, in render_to_string
template = get_template(template_name, using=using)
File "C:\Python36\lib\site-packages\django\template\loader.py", line 15, in get_template
return engine.get_template(template_name)
File "C:\Python36\lib\site-packages\django\template\backends\django.py", line 34, in get_template
return Template(self.engine.get_template(template_name), self)
File "C:\Python36\lib\site-packages\django\template\engine.py", line 143, in get_template
template, origin = self.find_template(template_name)
File "C:\Python36\lib\site-packages\django\template\engine.py", line 125, in find_template
template = loader.get_template(name, skip=skip)
File "C:\Python36\lib\site-packages\django\template\loaders\base.py", line 30, in get_template
contents, origin, origin.template_name, self.engine,
File "C:\Python36\lib\site-packages\django\template\base.py", line 155, in __init__
self.nodelist = self.compile_nodelist()
File "C:\Python36\lib\site-packages\django\template\base.py", line 193, in compile_nodelist
return parser.parse()
File "C:\Python36\lib\site-packages\django\template\base.py", line 478, in parse
raise self.error(token, e)
File "C:\Python36\lib\site-packages\django\template\base.py", line 476, in parse
compiled_result = compile_func(self, token)
File "C:\Python36\lib\site-packages\django\template\defaulttags.py", line 814, in do_for
nodelist_loop = parser.parse(('empty', 'endfor',))
File "C:\Python36\lib\site-packages\django\template\base.py", line 449, in parse
raise self.error(token, e)
File "C:\Python36\lib\site-packages\django\template\base.py", line 447, in parse
filter_expression = self.compile_filter(token.contents)
File "C:\Python36\lib\site-packages\django\template\base.py", line 563, in compile_filter
return FilterExpression(token, self)
File "C:\Python36\lib\site-packages\django\template\base.py", line 663, in __init__
"from '%s'" % (token[upto:], token))
django.template.exceptions.TemplateSyntaxError: Could not parse the remainder: '['q0']['Results'][0]['Name']' from 'element['q0']['Results'][0]['Name']'
It was sent by views.py:
def getmatch(request):
# cosas cosas cosas para obtener top_products
print(top_products[0])
return render(request, 'todo/result.html', {'products': top_products})
Here is an example of a product top_products[0]:
{
"q1": {
"Id": "q1",
"Limit": 20,
"Offset": 0,
"TotalResults": 0,
"Locale": "fr_FR",
"Results": [],
"Includes": {},
"HasE rrors": false,
"Errors": []
},
"q0": {
"Id": "q0",
"Limit": 10,
"Offset": 0,
"TotalResults": 1,
"Locale": "fr_FR",
"Results": [
{
"EANs": [
"8011003827336"
],
"Description": "L’aur a divine d’une femme habillée d’une essence éblouissante et sensuelle…\nEros pour Femme est le mythe signé Versace, qui déclenche la passion débordante d’Eros au pre mier regard.\n\nMais qui séduit qui ?\nEros pour Femme est une invitation à s’abandonner au désir, en osmose avec les forces de la nature apaisée.\n\nAudacieuse, cré ative et sensuelle, comme seule peut l’être la maison Versace, cette Eau de Toilette révèle une aura radieuse et une séduction irrésistible.",
"ImageUrl": "https://w ww.sephora.fr/dw/image/v2/BCVW_PRD/on/demandware.static/-/Sites-masterCatalog_Sephora/default/dw99b648b2/images/hi-res/SKU/SKU_5/359845_swatch.jpg?sw=250&sh=250&sm=f it",
"Name": "Eros pour Femme - Eau de Toilette",
"Id": "P2615007",
"CategoryId": "parfum_719097",
"BrandExternalId": "versace_c45bfd",
"Brand": {
"Id": "versace_c45b fd",
"Name": "VERSACE"
},
"Active": true,
"ProductPageUrl": "https://www.sephora.fr/p/eros-pour-femme---eau-de-toilette-359845.html",
"Disabled": false,
"ISBNs": [],
"FamilyIds": [],
"UPCs": [],
"StoryIds": [],
"ModelNumbers": [],
"Attributes": {},
"QuestionIds": [],
"AttributesOrder": [],
"ReviewIds": [],
"ManufacturerPartNumber s": [],
"QAStatistics": {
"QuestionHelpfulVoteCount": 0,
"FirstAnswerTime": "None",
"LastQuestionAnswerTime": "None",
"FirstQuestionTime": "None",
"FeaturedAnswerCount": 0,
"LastAnswerTime": "None",
"TagDistribution": {},
"ContextDataDistribution": {},
"TotalAnswerCount": 0,
"FeaturedQuestionCount": 0,
"LastQuestionTime": "None",
"Question NotHelpfulVoteCount": 0,
"BestAnswerCount": 0,
"TagDistributionOrder": [],
"AnswerHelpfulVoteCount": 0,
"HelpfulVoteCount": 0,
"AnswerNotHelpfulVoteCount": 0,
"Total QuestionCount": 0,
"ContextDataDistributionOrder": []
},
"TotalQuestionCount": 0,
"TotalAnswerCount": 0,
"ReviewStatistics": {
"ContextDataDistributionOrder": [
"Gender ",
"Age",
"Eyes",
"Skin",
"loyalty"
],
"ContextDataDistribution": {
"Gender": {
"Id": "Gender",
"Values": [
{
"Count": 7,
"Value": "Female"
}
]
},
"Age": {
"Id": "Age",
"Valu es": [
{
"Count": 1,
"Value": "13to17"
},
{
"Count": 2,
"Value": "18to24"
},
{
"Count": 1,
"Value": "25to34"
},
{
"Count": 1,
"Value": "35to44"
},
{
"Count": 1,
"Value": "45to 54"
},
{
"Count": 1,
"Value": "plus54"
}
]
},
"Eyes": {
"Id": "Eyes",
"Values": [
{
"Count": 2,
"Value": "Marrons"
},
{
"Count": 3,
"Value": "Bleus"
},
{
"Count": 1,
"Value": "N oirs"
}
]
},
"Skin": {
"Id": "Skin",
"Values": [
{
"Count": 1,
"Value": "Normale"
},
{
"Count": 2,
"Value": "Seche"
},
{
"Count": 2,
"Value": "Mixte"
},
{
"Count": 1,
"Value": " Deshydratee"
}
]
},
"loyalty": {
"Id": "loyalty",
"Values": [
{
"Count": 2,
"Value": "Yes--Im-a-VIB"
},
{
"Count": 2,
"Value": "Yes--Im-a-VIB-Rouge"
},
{
"Count": 2,
"Value": "No"
}
]
}
},
"AverageOverallRating": 4.428571428571429,
"NotHelpfulVoteCount": 1,
"FeaturedReviewCount": 0,
"NotRecommendedCount": 1,
"HelpfulVoteCount": 19,
"RatingDis tribution": [
{
"RatingValue": 5,
"Count": 5
},
{
"RatingValue": 2,
"Count": 1
},
{
"RatingValue": 4,
"Count": 1
}
],
"RecommendedCount": 5,
"RatingsOnlyReviewCount": 0,
"To talReviewCount": 7,
"FirstSubmissionTime": "2017-05-28T22:46:00.000+00:00",
"LastSubmissionTime": "2020-03-21T19:01:26.000+00:00",
"SecondaryRatingsAveragesOrder": [],
"SecondaryRatingsAverages": {},
"OverallRatingRange": 5,
"TagDistributionOrder": [],
"TagDistribution": {}
},
"TotalReviewCount": 7,
"FilteredQAStatistics": {
"Ques tionHelpfulVoteCount": 0,
"FirstAnswerTime": "None",
"LastQuestionAnswerTime": "None",
"FirstQuestionTime": "None",
"FeaturedAnswerCount": 0,
"LastAnswerTime": "None",
"TagD istribution": {},
"ContextDataDistribution": {},
"TotalAnswerCount": 0,
"FeaturedQuestionCount": 0,
"LastQuestionTime": "None",
"QuestionNotHelpfulVoteCount": 0,
"Best AnswerCount": 0,
"TagDistributionOrder": [],
"AnswerHelpfulVoteCount": 0,
"HelpfulVoteCount": 0,
"AnswerNotHelpfulVoteCount": 0,
"TotalQuestionCount": 0,
"ContextDat aDistributionOrder": []
},
"FilteredReviewStatistics": {
"ContextDataDistributionOrder": [
"Gender",
"Age",
"Eyes",
"Skin",
"loyalty"
],
"ContextDataDistribution": {
"Gen der": {
"Id": "Gender",
"Values": [
{
"Count": 7,
"Value": "Female"
}
]
},
"Age": {
"Id": "Age",
"Values": [
{
"Count": 1,
"Value": "13to17"
},
{
"Count": 2,
"Value": "18to24"
},
{
"Count": 1,
"Value": "25to34"
},
{
"Count": 1,
"Value": "35to44"
},
{
"Count": 1,
"Value": "45to54"
},
{
"Count": 1,
"Value": "plus54"
}
]
},
"Eyes": {
"Id": "Eyes",
"Value s": [
{
"Count": 2,
"Value": "Marrons"
},
{
"Count": 3,
"Value": "Bleus"
},
{
"Count": 1,
"Value": "Noirs"
}
]
},
"Skin": {
"Id": "Skin",
"Values": [
{
"Count": 1,
"Value": "Nor male"
},
{
"Count": 2,
"Value": "Seche"
},
{
"Count": 2,
"Value": "Mixte"
},
{
"Count": 1,
"Value": "Deshydratee"
}
]
},
"loyalty": {
"Id": "loyalty",
"Values": [
{
"Count": 2,
"Value": "Yes--Im-a-VIB"
},
{
"Count": 2,
"Value": "Yes--Im-a-VIB-Rouge"
},
{
"Count": 2,
"Value": "No"
}
]
}
},
"AverageOverallRating": 4.428571428571429,
"NotHelpfulVoteCo unt": 1,
"FeaturedReviewCount": 0,
"NotRecommendedCount": 1,
"HelpfulVoteCount": 19,
"RatingDistribution": [
{
"RatingValue": 5,
"Count": 5
},
{
"RatingValue": 2,
"Count ": 1
},
{
"RatingValue": 4,
"Count": 1
}
],
"RecommendedCount": 5,
"RatingsOnlyReviewCount": 0,
"TotalReviewCount": 7,
"FirstSubmissionTime": "2017-05-28T22:46:00.000+00 :00",
"LastSubmissionTime": "2020-03-21T19:01:26.000+00:00",
"SecondaryRatingsAveragesOrder": [],
"SecondaryRatingsAverages": {},
"OverallRatingRange": 5,
"TagDistri butionOrder": [],
"TagDistribution": {}
}
}
],
"Includes": {},
"HasErrors": false,
"Errors": []
},
"d": {
"attributs": {
"Doux": {
"claimed_benefit": 0,
"perceived_benefit": 0
},
"Délicat": {
"claimed_benefit": 0,
"perceived_benefit": 0
},
"Elegant": {
"claimed_benefit": 0,
"perceived_benefit": 0
},
"Mature": {
"claimed_benefit": 0,
" perceived_benefit": 0
},
"Sexy": {
"claimed_benefit": 0,
"perceived_benefit": 0
},
"Féminin": {
"claimed_benefit": 0,
"perceived_benefit": 0
},
"Frais": {
"claimed_ benefit": 0,
"perceived_benefit": 0.14285714285714285
},
"Classe": {
"claimed_benefit": 0,
"perceived_benefit": 0
},
"Mou": {
"claimed_benefit": 0,
"perceived_benefit": 0.14285714285714285
},
"Décontracté": {
"claimed_benefit": 0,
"perceived_benefit": 0
},
"Comme les autres": {
"claimed_benefit": 0,
"perceived_benefit": 0
},
"Jeu ne femme": {
"claimed_benefit": 1,
"perceived_benefit": 0.14285714285714285
},
"charmant": {
"claimed_benefit": 0,
"perceived_benefit": 0
},
"Gai": {
"claimed_benefit": 0,
"perceived_benefit": 0
},
"Propre": {
"claimed_benefit": 0,
"perceived_benefit": 0
},
"Eté": {
"claimed_benefit": 0,
"perceived_benefit": 0
},
"Rafraîchissant ": {
"claimed_benefit": 0,
"perceived_benefit": 0
},
"Chaud": {
"claimed_benefit": 0,
"perceived_benefit": 0
},
"Masculin": {
"claimed_benefit": 0,
"perceived_benefit ": 0
},
"Fiable": {
"claimed_benefit": 0,
"perceived_benefit": 0
},
"Mystérieux": {
"claimed_benefit": 0,
"perceived_benefit": 0
},
"Furtif": {
"claimed_benefit": 0,
"perceived_benefit": 0.14285714285714285
},
"Fort": {
"claimed_benefit": 0,
"perceived_benefit": 0.14285714285714285
},
"Hivernal": {
"claimed_benefit": 0,
"perceived_ benefit": 0
},
"Herbacé": {
"claimed_benefit": 0,
"perceived_benefit": 0
},
"Plantes": {
"claimed_benefit": 0,
"perceived_benefit": 0
},
"Big brands": {
"claimed_be nefit": 0,
"perceived_benefit": 0
},
"Luxueux": {
"claimed_benefit": 0,
"perceived_benefit": 0
},
"Connu": {
"claimed_benefit": 0,
"perceived_benefit": 0.2857142857142857
},
"A la mode": {
"claimed_benefit": 0,
"perceived_benefit": 0
}
}
},
"total": 0
}
Instead of using square bracket notations, Django Template Language uses dots. So the result should be: {{ element.q0.Results.0.Name }}
I have a big JSON file with a very complex structure
you can look on it here: https://drive.google.com/file/d/1tBVJ2xYSCpTTUGPJegvAz2ZXbeN0bteX/view?usp=sharing
it contains more than 7 millions lines, and I want to extract only the "text" field
I have written a python code, to extra all the values of the "text" key or field in the whole file, and it extracted only 12 values! while when I open the JSON file on the Visualstudio, I have more than 19000 values!!
you can see the code here:
import json
import csv
with open("/Users/zahraa-maher/rasa-init-demo/venv/Tickie/external_data/frames2.json") as file:
data = json.load(file)
fname = "outputText8.csv"
with open(fname, "w") as file:
csv_file = csv.writer(file,lineterminator='\n')
csv_file.writerow(["text"])
for item in data[i]["turns"]:
csv_file.writerow([item['text']])
please take a look on the JSON file as it is very large one and with a complex structure, so I an not paste it here to see because it would be not understandable
also this is a part of the son file:
[
{
"user_id": "U22HTHYNP",
"turns": [
{
"text": "I'd like to book a trip to Atlantis from Caprica on Saturday, August 13, 2016 for 8 adults. I have a tight budget of 1700.",
"labels": {
"acts": [
{
"args": [
{
"val": "book",
"key": "intent"
}
],
"name": "inform"
},
{
"args": [
{
"val": "Atlantis",
"key": "dst_city"
},
{
"val": "Caprica",
"key": "or_city"
},
{
"val": "Saturday, August 13, 2016",
"key": "str_date"
},
{
"val": "8",
"key": "n_adults"
},
{
"val": "1700",
"key": "budget"
}
],
"name": "inform"
}
],
"acts_without_refs": [
{
"args": [
{
"val": "book",
"key": "intent"
}
],
"name": "inform"
},
{
"args": [
{
"val": "Atlantis",
"key": "dst_city"
},
{
"val": "Caprica",
"key": "or_city"
},
{
"val": "Saturday, August 13, 2016",
"key": "str_date"
},
{
"val": "8",
"key": "n_adults"
},
{
"val": "1700",
"key": "budget"
}
],
"name": "inform"
}
],
"active_frame": 1,
"frames": [
{
"info": {
"intent": [
{
"val": "book",
"negated": false
}
],
"budget": [
{
"val": "1700.0",
"negated": false
}
],
"dst_city": [
{
"val": "Atlantis",
"negated": false
}
],
"or_city": [
{
"val": "Caprica",
"negated": false
}
],
"str_date": [
{
"val": "august 13",
"negated": false
}
],
"n_adults": [
{
"val": "8",
"negated": false
}
]
},
"frame_id": 1,
"requests": [],
"frame_parent_id": null,
"binary_questions": [],
"compare_requests": []
}
]
},
"author": "user",
"timestamp": 1471272019730.0
},
{
"db": {
"result": [
[
{
"trip": {
"returning": {
"duration": {
"hours": 0,
"min": 51
},
"arrival": {
"hour": 10,
"year": 2016,
"day": 24,
"min": 51,
"month": 8
},
"departure": {
"hour": 10,
"year": 2016,
"day": 24,
"min": 0,
"month": 8
}
},
"seat": "ECONOMY",
"leaving": {
"duration": {
"hours": 0,
"min": 51
},
"arrival": {
"hour": 0,
"year": 2016,
"day": 16,
"min": 51,
"month": 8
},
"departure": {
"hour": 0,
"year": 2016,
"day": 16,
"min": 0,
"month": 8
}
},
"or_city": "Porto Alegre",
"duration_days": 9
},
"price": 2118.81,
"hotel": {
"gst_rating": 7.15,
"vicinity": [],
"name": "Scarlet Palms Resort",
"country": "Brazil",
"amenities": [
"FREE_BREAKFAST",
"FREE_PARKING",
"FREE_WIFI"
],
"dst_city": "Goiania",
"category": "3.5 star hotel"
}
},
{
"trip": {
"returning": {
"duration": {
"hours": 2,
"min": 37
},
"arrival": {
"hour": 12,
"year": 2016,
"day": 10,
"min": 37,
"month": 8
},
"departure": {
"hour": 10,
"year": 2016,
"day": 10,
"min": 0,
"month": 8
}
},
"seat": "ECONOMY",
"leaving": {
"duration": {
"hours": 2,
"min": 37
},
"arrival": {
"hour": 0,
"year": 2016,
"day": 4,
"min": 37,
"month": 8
},
"departure": {
"hour": 22,
"year": 2016,
"day": 3,
"min": 0,
"month": 8
}
},
"or_city": "Porto Alegre",
"duration_days": 7
},
"price": 2369.83,
"hotel": {
"gst_rating": 0,
"vicinity": [],
"name": "Sunway Hostel",
"country": "Argentina",
"amenities": [
"FREE_BREAKFAST",
"FREE_WIFI"
],
"dst_city": "Rosario",
"category": "2.0 star hotel"
}
},
{
"trip": {
"returning": {
"duration": {
"hours": 0,
"min": 51
},
"arrival": {
"hour": 10,
"year": 2016,
"day": 24,
"min": 51,
"month": 8
},
"departure": {
"hour": 10,
"year": 2016,
"day": 24,
"min": 0,
"month": 8
}
},
"seat": "BUSINESS",
"leaving": {
"duration": {
"hours": 0,
"min": 51
},
"arrival": {
"hour": 0,
"year": 2016,
"day": 16,
"min": 51,
"month": 8
},
"departure": {
"hour": 0,
"year": 2016,
"day": 16,
"min": 0,
"month": 8
}
},
"or_city": "Porto Alegre",
"duration_days": 9
},
"price": 2375.72,
"hotel": {
"gst_rating": 7.15,
"vicinity": [],
"name": "Scarlet Palms Resort",
"country": "Brazil",
"amenities": [
"FREE_BREAKFAST",
"FREE_PARKING",
"FREE_WIFI"
],
"dst_city": "Goiania",
"category": "3.5 star hotel"
}
},
{
"trip": {
"returning": {
"duration": {
"hours": 1,
"min": 30
},
"arrival": {
"hour": 11,
"year": 2016,
"day": 1,
"min": 30,
"month": 9
},
"departure": {
"hour": 10,
"year": 2016,
"day": 1,
"min": 0,
"month": 9
}
},
"seat": "BUSINESS",
"leaving": {
"duration": {
"hours": 1,
"min": 30
},
"arrival": {
"hour": 18,
"year": 2016,
"day": 19,
"min": 30,
"month": 8
},
"departure": {
"hour": 17,
"year": 2016,
"day": 19,
"min": 0,
"month": 8
}
},
"or_city": "Porto Alegre",
"duration_days": 13
},
"price": 2492.95,
"hotel": {
"gst_rating": 0,
"vicinity": [],
"name": "Hotel Mundo",
"country": "Brazil",
"amenities": [
"FREE_BREAKFAST",
"FREE_WIFI",
"FREE_PARKING"
],
"dst_city": "Manaus",
"category": "2.5 star hotel"
}
},
{
"trip": {
"returning": {
"duration": {
"hours": 0,
"min": 51
},
"arrival": {
"hour": 10,
"year": 2016,
"day": 31,
"min": 51,
"month": 8
},
"departure": {
"hour": 10,
"year": 2016,
"day": 31,
"min": 0,
"month": 8
}
},
"seat": "ECONOMY",
"leaving": {
"duration": {
"hours": 0,
"min": 51
},
"arrival": {
"hour": 19,
"year": 2016,
"day": 27,
"min": 51,
"month": 8
},
"departure": {
"hour": 19,
"year": 2016,
"day": 27,
"min": 0,
"month": 8
}
},
"or_city": "Porto Alegre",
"duration_days": 4
},
"price": 2538.0,
"hotel": {
"gst_rating": 8.22,
"vicinity": [],
"name": "The Glee",
"country": "Brazil",
"amenities": [
"FREE_BREAKFAST",
"FREE_WIFI"
],
"dst_city": "Recife",
"category": "4.0 star hotel"
}
}
],
[],
[],
[],
[],
[],
[]
],
"search": [
{
"ORIGIN_CITY": "Porto Alegre",
"PRICE_MIN": "2000",
"NUM_ADULTS": "2",
"timestamp": 1471271949.995,
"PRICE_MAX": "3000",
"ARE_DATES_FLEXIBLE": "true",
"NUM_CHILDREN": "5",
"START_TIME": "1470110400000",
"MAX_DURATION": 2592000000.0,
"DESTINATION_CITY": "Brazil",
"RESULT_LIMIT": "10",
"END_TIME": "1472616000000"
},
{
"ORIGIN_CITY": "Atlantis",
"NUM_ADULTS": "8",
"RESULT_LIMIT": "10",
"timestamp": 1471272148.124,
"PRICE_MAX": "1700",
"NUM_CHILDREN": "",
"ARE_DATES_FLEXIBLE": "true",
"START_TIME": "NaN",
"END_TIME": "NaN"
},
{
"ORIGIN_CITY": "Caprica",
"PRICE_MAX": "1700",
"NUM_ADULTS": "8",
"RESULT_LIMIT": "10",
"timestamp": 1471272189.07,
"DESTINATION_CITY": "Atlantis",
"NUM_CHILDREN": "",
"ARE_DATES_FLEXIBLE": "true",
"START_TIME": "1470715200000",
"END_TIME": "1472011200000"
},
{
"ORIGIN_CITY": "Caprica",
"PRICE_MAX": "1700",
"NUM_ADULTS": "8",
"RESULT_LIMIT": "10",
"timestamp": 1471272205.436,
"DESTINATION_CITY": "Atlantis",
"NUM_CHILDREN": "",
"ARE_DATES_FLEXIBLE": "true",
"START_TIME": "1470715200000",
"END_TIME": "1472011200000"
},
{
"ORIGIN_CITY": "Caprica",
"PRICE_MIN": "1700",
"NUM_ADULTS": "8",
"RESULT_LIMIT": "10",
"timestamp": 1471272278.72,
"DESTINATION_CITY": "Atlantis",
"NUM_CHILDREN": "",
"ARE_DATES_FLEXIBLE": "true",
"START_TIME": "1470715200000",
"END_TIME": "1472011200000"
},
{
"ORIGIN_CITY": "Caprica",
"PRICE_MIN": "1700",
"NUM_ADULTS": "8",
"RESULT_LIMIT": "10",
"timestamp": 1471272454.542,
"DESTINATION_CITY": "Atlantis",
"NUM_CHILDREN": "",
"ARE_DATES_FLEXIBLE": "true",
"START_TIME": "1471060800000",
"END_TIME": "1472011200000"
},
{
"ORIGIN_CITY": "Caprica",
"PRICE_MIN": "1700",
"NUM_ADULTS": "8",
"RESULT_LIMIT": "10",
"timestamp": 1471272466.008,
"DESTINATION_CITY": "Atlantis",
"NUM_CHILDREN": "",
"ARE_DATES_FLEXIBLE": "true",
"START_TIME": "1471060800000",
"END_TIME": "1472011200000"
}
]
},
How it could be modified to extract all the "text" values from the JSON file to a CSV file?
This is a potential solution using pandas:
import pandas as pd
#importing data
dj = pd.read_json("frames2.json")
dtext = dj[["user_id","turns"]]
#Saving text records in a list
list_ = []
for record in dtext["turns"].values:
for r in record:
list_.append(r["text"])
#Exporting the csv
out = pd.Series(list_,name="text")
out.to_csv("text.csv")
It gives the following output.
Try:
import json
import csv
with open("/Users/zahraa-maher/rasa-init-demo/venv/Tickie/external_data/frames2.json") as file:
data = json.load(file)
fname = "outputText8.csv"
with open(fname, "w") as file:
csv_file = csv.writer(file,lineterminator='\n')
csv_file.writerow(["text"])
for keys,values in data.items():
now it up to you which of the fields you want to save, if you user a debugger you can see the values and Keys
I have a scenario as depicted below in python code .
In this I am trying to explicitly define new york and ny as synonyms. But unfortunately it is not working. Can you please guide me as I am new to elastic search.
Also I am using custom analyzer.
I also have the file synonyms.txt having text:
ny,newyork,nyork
from datetime import datetime
from elasticsearch import Elasticsearch
es = Elasticsearch()
keywords = ['thousand eyes', 'facebook', 'superdoc', 'quora', 'your story', 'Surgery', 'lending club', 'ad roll',
'the honest company', 'Draft kings', 'newyork']
count = 1
doc_setting = {
"settings": {
"analysis": {
"analyzer": {
"my_analyzer_keyword": {
"type": "custom",
"tokenizer": "keyword",
"filter": [
"asciifolding",
"lowercase",
"synonym"
]
},
"my_analyzer_shingle": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"asciifolding",
"lowercase",
"synonym"
]
}
},
"filter": {
"synonym": {
"type": "synonym",
"synonyms_path": "synonyms.txt",
"ignore_case": "true"
}
}
}
}, "mappings": {
"your_type": {
"properties": {
"keyword": {
"type": "string",
"index_analyzer": "my_analyzer_keyword",
"search_analyzer": "my_analyzer_shingle"
}
}
}
}
}
validate=es.index(index='test', doc_type='your_type', body=doc_setting)
print(validate)
for keyword in keywords:
doc = {
'id': count,
'keyword': keyword
}
res = es.index(index="test", doc_type='your_type', id=count, body=doc)
print(res['result'])
count = count + 1
#res11 = es.get(index="test", doc_type='your_type', id=1)
#print(res11['_source'])
es.indices.refresh(index="test")
question = "I saw news on ny news channel of lending club on facebook, your story and quora"
print("Question asked: %s" % question)
res = es.search(index="test",`enter code here` doc_type='your_type', body={
"query": {"match": {"keyword": question}}})
print("Got %d Hits:" % res['hits']['total'])
for hit in res['hits']['hits']:
print(hit["_source"])
PUT /test_index
{
"settings": {
"analysis": {
"analyzer": {
"my_analyzer_keyword": {
"type": "custom",
"tokenizer": "keyword",
"filter": [
"asciifolding",
"lowercase",
"synonym"
]
},
"my_analyzer_shingle": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"asciifolding",
"lowercase",
"synonym"
]
}
},
"filter": {
"synonym" : {
"type" : "synonym",
"lenient": true,
"synonyms" : ["ny,newyork,nyork"]
}
}
}
}, "mappings": {
"your_type": {
"properties": {
"keyword": {
"type": "text",
"analyzer": "my_analyzer_keyword",
"search_analyzer": "my_analyzer_shingle"
}
}
}
}
}
Then Analyze using
POST /test_index/_analyze
{
"analyzer" : "my_analyzer_shingle",
"text" : "I saw news on ny news channel of lending club on facebook, your story and quorat"
}
The tokens I get are
{
"tokens": [
{
"token": "i",
"start_offset": 0,
"end_offset": 1,
"type": "<ALPHANUM>",
"position": 0
},
{
"token": "saw",
"start_offset": 2,
"end_offset": 5,
"type": "<ALPHANUM>",
"position": 1
},
{
"token": "news",
"start_offset": 6,
"end_offset": 10,
"type": "<ALPHANUM>",
"position": 2
},
{
"token": "on",
"start_offset": 11,
"end_offset": 13,
"type": "<ALPHANUM>",
"position": 3
},
{
"token": "ny",
"start_offset": 14,
"end_offset": 16,
"type": "<ALPHANUM>",
"position": 4
},
{
"token": "newyork",
"start_offset": 14,
"end_offset": 16,
"type": "SYNONYM",
"position": 4
},
{
"token": "nyork",
"start_offset": 14,
"end_offset": 16,
"type": "SYNONYM",
"position": 4
},
{
"token": "news",
"start_offset": 17,
"end_offset": 21,
"type": "<ALPHANUM>",
"position": 5
},
{
"token": "channel",
"start_offset": 22,
"end_offset": 29,
"type": "<ALPHANUM>",
"position": 6
},
{
"token": "of",
"start_offset": 30,
"end_offset": 32,
"type": "<ALPHANUM>",
"position": 7
},
{
"token": "lending",
"start_offset": 33,
"end_offset": 40,
"type": "<ALPHANUM>",
"position": 8
},
{
"token": "club",
"start_offset": 41,
"end_offset": 45,
"type": "<ALPHANUM>",
"position": 9
},
{
"token": "on",
"start_offset": 46,
"end_offset": 48,
"type": "<ALPHANUM>",
"position": 10
},
{
"token": "facebook",
"start_offset": 49,
"end_offset": 57,
"type": "<ALPHANUM>",
"position": 11
},
{
"token": "your",
"start_offset": 59,
"end_offset": 63,
"type": "<ALPHANUM>",
"position": 12
},
{
"token": "story",
"start_offset": 64,
"end_offset": 69,
"type": "<ALPHANUM>",
"position": 13
},
{
"token": "and",
"start_offset": 70,
"end_offset": 73,
"type": "<ALPHANUM>",
"position": 14
},
{
"token": "quorat",
"start_offset": 74,
"end_offset": 80,
"type": "<ALPHANUM>",
"position": 15
}
]
}
and the search produces
POST /test_index/_search
{
"query" : {
"match" : { "keyword" : "I saw news on ny news channel of lending club on facebook, your story and quora" }
}
}
{
"took": 36,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 3,
"max_score": 1.6858001,
"hits": [
{
"_index": "test_index",
"_type": "your_type",
"_id": "4",
"_score": 1.6858001,
"_source": {
"keyword": "newyork"
}
},
{
"_index": "test_index",
"_type": "your_type",
"_id": "2",
"_score": 1.1727304,
"_source": {
"keyword": "facebook"
}
},
{
"_index": "test_index",
"_type": "your_type",
"_id": "5",
"_score": 0.6931472,
"_source": {
"keyword": "quora"
}
}
]
}
}