import pandas as pd
data = [['INDIA', 'UP', 'BANARAS'], ['INDIA', 'UP', 'KANPUR'], ['INDIA', 'TN', 'CHENNAI'], ['US', 'TEXAS', 'HUSTON']]
cols = ['COUNTRY', 'STATE', 'CITY']
df = pd.DataFrame(data=data, columns=cols)
I want result like this...
[
{
"COUNTRY": "INDIA",
"STATE": "TN",
"CITIES": [
{
"CITY": "CHENNAI"
}
]
},
{
"COUNTRY": "INDIA",
"STATE": "UP",
"CITIES": [
{
"CITY": "BANARAS"
},
{
"CITY": "KANPUR"
}
]
},
{
"COUNTRY": "US",
"STATE": "TEXAS",
"CITITES": [
{
"CITY": "HUSTON"
}
]
}
]
You can try
out = (df.groupby(['COUNTRY', 'STATE'])
.apply(lambda g: g[['CITY']].to_dict(orient='records'))
.to_frame('CITIES')
.reset_index()
.to_dict(orient='records'))
pprint(out)
[{'CITIES': [{'CITY': 'CHENNAI'}], 'COUNTRY': 'INDIA', 'STATE': 'TN'},
{'CITIES': [{'CITY': 'BANARAS'}, {'CITY': 'KANPUR'}],
'COUNTRY': 'INDIA',
'STATE': 'UP'},
{'CITIES': [{'CITY': 'HUSTON'}], 'COUNTRY': 'US', 'STATE': 'TEXAS'}]
add this to the above code:
import json
with open("sample.json", "w") as outfile:
json.dump(out, outfile)
or you could remove to_dict() and add this line after
out.to_json("filename.json")
Related
I have a DataFrame like:
id
country
city
amount
duplicated
1
France
Paris
200
1
2
France
Paris
200
1
3
France
Lyon
50
2
4
France
Lyon
50
2
5
France
Lyon
50
2
And I would like to store a list per distinct value in duplicated, like:
list 1
[
{
"id": 1,
"country": "France",
"city": "Paris",
"amount": 200,
},
{
"id": 2,
"country": "France",
"city": "Paris",
"amount": 200,
}
]
list 2
[
{
"id": 3,
"country": "France",
"city": "Lyon",
"amount": 50,
},
{
"id": 4,
"country": "France",
"city": "Lyon",
"amount": 50,
},
{
"id": 5,
"country": "France",
"city": "Lyon",
"amount": 50,
}
]
I tried filtering duplicates with
df[df.duplicated(['country','city','amount', 'duplicated'], keep = False)]
but it just returns the same df.
You can use groupby:
lst = (df.groupby(['country', 'city', 'amount']) # or .groupby('duplicated')
.apply(lambda x: x.to_dict('records'))
.tolist())
Output:
>>> lst
[[{'id': 3,
'country': 'France',
'city': 'Lyon',
'amount': 50,
'duplicated': 2},
{'id': 4,
'country': 'France',
'city': 'Lyon',
'amount': 50,
'duplicated': 2},
{'id': 5,
'country': 'France',
'city': 'Lyon',
'amount': 50,
'duplicated': 2}],
[{'id': 1,
'country': 'France',
'city': 'Paris',
'amount': 200,
'duplicated': 1},
{'id': 2,
'country': 'France',
'city': 'Paris',
'amount': 200,
'duplicated': 1}]]
Another solution if you want a dict indexed by duplicated key:
data = {k: v.to_dict('records') for k, v in df.set_index('duplicated').groupby(level=0)}
>>> data[1]
[{'id': 1, 'country': 'France', 'city': 'Paris', 'amount': 200},
{'id': 2, 'country': 'France', 'city': 'Paris', 'amount': 200}]
>>> data[2]
[{'id': 3, 'country': 'France', 'city': 'Lyon', 'amount': 50},
{'id': 4, 'country': 'France', 'city': 'Lyon', 'amount': 50},
{'id': 5, 'country': 'France', 'city': 'Lyon', 'amount': 50}]
If I understand you correctly, you can use DataFrame.to_dict('records') to make your lists:
list_1 = df[df['duplicated'] == 1].to_dict('records')
list_1 = df[df['duplicated'] == 2].to_dict('records')
Or for an arbitrary number of values in the column, you can make a dict:
result = {}
for value in df['duplicated'].unique():
result[value] = df[df['duplicated'] == value].to_dict('records')
I have this code:
dotteds = ["apple.orange.banana", "a.b.c", "a.b.d"]
name = "name"
avtype = "type"
fields = "fields"
main_dictionary_list = []
for x in dotteds:
split_name = x.split('.')
if len(split_name) > 1:
value = {name: split_name[-1], avtype: 'string'}
dicts = []
for y in split_name:
dicts.append({name: y, avtype: {name: y, avtype: "record", fields: []}})
dicts[-1] = value
value = value['name']+split_name[-1]
for z in reversed(range(len(dicts))):
if z != 0:
dicts[z - 1]['type']['fields'].append(dicts[z])
main_dictionary_list.append(dicts[0])
else:
dicts = []
value = {name: split_name[-1], avtype: 'string'}
dicts.append(value)
main_dictionary_list.append(dicts[0])
print(main_dictionary_list)
Which gives me an output like this:
[{
'name': 'apple',
'type': {
'name': 'apple',
'type': 'record',
'fields': [{
'name': 'orange',
'type': {
'name': 'orange',
'type': 'record',
'fields': [{
'name': 'banana',
'type': 'string'
}
]
}
}
]
}
}, {
'name': 'a',
'type': {
'name': 'a',
'type': 'record',
'fields': [{
'name': 'b',
'type': {
'name': 'b',
'type': 'record',
'fields': [{
'name': 'c',
'type': 'string'
}
]
}
}
]
}
}, {
'name': 'a',
'type': {
'name': 'a',
'type': 'record',
'fields': [{
'name': 'b',
'type': {
'name': 'b',
'type': 'record',
'fields': [{
'name': 'd',
'type': 'string'
}
]
}
}
]
}
}
]
Ideally I need:
[{
'name': 'apple',
'type': {
'name': 'apple',
'type': 'record',
'fields': [{
'name': 'orange',
'type': {
'name': 'orange',
'type': 'record',
'fields': [{
'name': 'banana',
'type': 'string'
}
]
}
}
]
}
}, {
'name': 'a',
'type': {
'name': 'a',
'type': 'record',
'fields': [{
'name': 'b',
'type': {
'name': 'b',
'type': 'record',
'fields': [{
'name': 'c',
'type': 'string'
},
{
'name': 'd',
'type': 'string'
}
]
}
}
]
}
}
]
I need to be able to do this with any number of combinations:
apple.orange.banana, a.b.c, a.b.d, a.b.q.e.a.s.d, etc.
I cannot figure out how to combine the similar 'name: key' combinations. It's intended to be avro format.
I have also tried making the dotted values into a dictionary which is a bit of trouble on its own.
You can use recursion with collections.defaultdict:
from collections import defaultdict
def group(vals, last=None):
if any(len(i) == 1 for i in vals):
return [{'name':last, 'type':{'name':last, 'type':'record', 'fields':[{'name':i[0], 'type':'string'} if len(i) == 1 else group([i], i[0])[0] for i in vals]}}]
_d = defaultdict(list)
for i in vals:
_d[i[0]].append(i[1:])
return [{'name':a, 'type':group(b, last=a)} if last is None else
{'name':last, 'type':'record', 'fields':group(b, last=a)} for a, b in _d.items()]
import json
vals = ['apple.orange.banana', 'a.b.c', 'a.b.d']
print(json.dumps(group([i.split('.') for i in vals]), indent=4))
Output:
[
{
"name": "apple",
"type": [
{
"name": "apple",
"type": "record",
"fields": [
{
"name": "orange",
"type": {
"name": "orange",
"type": "record",
"fields": [
{
"name": "banana",
"type": "string"
}
]
}
}
]
}
]
},
{
"name": "a",
"type": [
{
"name": "a",
"type": "record",
"fields": [
{
"name": "b",
"type": {
"name": "b",
"type": "record",
"fields": [
{
"name": "c",
"type": "string"
},
{
"name": "d",
"type": "string"
}
]
}
}
]
}
]
}
]
vals = ['asd.2', 'asd.3', 'asd.5.3.4']
print(json.dumps(group([i.split('.') for i in vals]), indent=4))
Output:
[
{
"name": "asd",
"type": [
{
"name": "asd",
"type": {
"name": "asd",
"type": "record",
"fields": [
{
"name": "2",
"type": "string"
},
{
"name": "3",
"type": "string"
},
{
"name": "5",
"type": "record",
"fields": [
{
"name": "5",
"type": "record",
"fields": [
{
"name": "3",
"type": {
"name": "3",
"type": "record",
"fields": [
{
"name": "4",
"type": "string"
}
]
}
}
]
}
]
}
]
}
}
]
}
]
I have a dataframe with goal scorers and I would like to extract the top scoring group into an array. This group can contain more than one items (in the example below there are two players with 8 goals).
So in the example below it would result in an array like this:
[{'goals': 8, 'name': 'Sergio Agüero', 'team': 'Manchester City'}, {'goals': 8, 'name': 'Tammy Abraham', 'team': 'Chelsea'}]
import pandas as pd
data = [
{
"name": "Sergio Ag\u00fcero",
"team": "Manchester City",
"goals": "8"
},
{
"name": "Tammy Abraham",
"team": "Chelsea",
"goals": "8"
},
{
"name": "Pierre-Emerick Aubameyang",
"team": "Arsenal",
"goals": "7"
},
{
"name": "Raheem Sterling",
"team": "Manchester City",
"goals": "6"
},
{
"name": "Teemu Pukki",
"team": "Norwich",
"goals": "6"
}
]
top_scorers = pd.DataFrame(data, columns=["name", "team", "goals"])
top_scoring_group = top_scorers.groupby("goals")
IIUC,
(top_scorers[top_scorers['goals'].eq(top_scorers['goals'].max())]
.to_dict('rows')
)
Output:
[{'name': 'Sergio Agüero', 'team': 'Manchester City', 'goals': '8'},
{'name': 'Tammy Abraham', 'team': 'Chelsea', 'goals': '8'}]
top_scoring_group = top_scorers.groupby("team", as_index=False)['goals'].sum().nlargest(1, 'goals', keep='all')['team']
This will get the teams with most goals, and keep them all if there are more than one.
I have two lists of dicts
list1 =
[
{"name": "Maria",
"id": "16a",
},
{"name": "Tania",
"id": "13b",
},
{"name": "Steve",
"id": "5a",
}
]
list2 =
[
{"name": "Eric",
"id": "16a",
},
{"name": "Mike",
"id": "7b",
},
{"name": "Steve",
id: "57a",
}
]
I want to be able to return a list of dicts from list2, if the same id is not found in list1
For example, it should return
[
{"name": "Mike",
"id": "7b",
},
{"name": "Steve",
"id": "57a",
}
]
I tried a few suggestions here on stack overflow but haven't been able to get it right.
Use a list-comprehension that iterates through list2 checking the id with ids in list1:
list1 = [
{'name': "Maria",
'id': "16a",
},
{'name': "Tania",
'id': "13b",
},
{'name': "Steve",
'id': "5a",
}
]
list2 = [
{'name': "Eric",
'id': "16a",
},
{'name': "Mike",
'id': "7b",
},
{'name': "Steve",
'id': "57a",
}
]
list1_ids = [y['id'] for y in list1]
result = [x for x in list2 if x['id'] not in list1_ids]
# [{'name': 'Mike', 'id': '7b'}, {'name': 'Steve', 'id': '57a'}]
This should do:
[d2 for d2 in list2 if d2['id'] not in [d1['id'] for d1 in list1]]
Output:
[{'id': '7b', 'name': 'Mike'}, {'id': '57a', 'name': 'Steve'}]
You can also do it using filter function:
list1 = [
{"name": "Maria",
"id": "16a",
},
{"name": "Tania",
"id": "13b",
},
{"name": "Steve",
"id": "5a",
}
]
list2 = [
{"name": "Eric",
"id": "16a",
},
{"name": "Mike",
"id": "7b",
},
{"name": "Steve",
"id": "57a",
}
]
IDs = set(value["id"] for value in list1)
output = list(filter(lambda elem: elem["id"] not in IDs, list2))
print(output)
Output:
[{'name': 'Mike', 'id': '7b'}, {'name': 'Steve', 'id': '57a'}]
I've got an array of dictionaries that looks like this:
[
{ 'country': 'UK', 'city': 'Manchester' },
{ 'country': 'UK', 'city': 'Liverpool' },
{ 'country': 'France', 'city': 'Paris' } ...
]
And I want to end up with a dictionary like this:
{ 'Liverpool': 'UK', 'Manchester': 'UK', ... }
Obviously I can do this:
d = {}
for c in cities:
d[c['city']] = c['country']
But is there any way I could do it with a single-line map?
You can use a dict comprehension :
>>> li = [
... { 'country': 'UK', 'city': 'Manchester' },
... { 'country': 'UK', 'city': 'Liverpool' },
... { 'country': 'France', 'city': 'Paris' }
... ]
>>> {d['city']: d['country'] for d in li}
{'Paris': 'France', 'Liverpool': 'UK', 'Manchester': 'UK'}
Or us operator.itemgetter and map function :
>>> dict(map(operator.itemgetter('city','country'),li))
{'Paris': 'France', 'Liverpool': 'UK', 'Manchester': 'UK'}