flat nested json inside arrays with Python - python
I want to convert this:
{} Json
{} 0
[] variants
{} 0
fileName
id
{} mediaType
baseFilePtah
id
name
sortOrder
{} 1
fileName
id
{} mediaType
baseFilePtah
id
name
sortOrder
Into this:
{} Json
{} 0
[] variants
{} 0
fileName
id
mediaType_baseFilePath
mediaType_id
mediaType_name
SortOrder
{} 1
fileName
id
mediaType_baseFilePath
mediaType_id
mediaType_name
SortOrder
Basically each
{}
{}
should be merged together. But not rows numbers.
This is the code I wrote:
def flatten_json(y):
out = {}
def flatten(x, name=''):
if type(x) is dict:
print type(x), name
for a in x:
flatten(x[a], name + a + '_')
elif type(x) is list:
print type(x), name
out[name[:-1]] = x
else:
out[name[:-1]] = x
flatten(y)
return out
def generatejson(response2):
# response 2 is [(first data set), (second data set)] convert it to dictionary {0: (first data set), 1: (second data set)}
sample_object = {i: data for i, data in enumerate(response2)}
# begin to flat (merge sub-jsons)
flat = {k: flatten_json(v) for k, v in sample_object.items()}
return json.dumps(flat, sort_keys=True)
This is the result of the code on my sample data:
As you can see manufacturer was merged but mediaType was not.
The code prints:
<type 'dict'>
<type 'list'> additionalLocaleInfos_
<type 'list'> variants_
<type 'dict'> manufacturer_
My aim was that type list will be further investigated in the recursion. The code suppose to detect that inside the variants list there is also a dict of mediaType but it doesn't.
Data sample for generatejson(response2) - is a list of this structure:
[{"additionalLocaleInfos": [], "approved": false, "approvedBy": null, "approvedOn": null, "catalogId": 4, "code": "611",
"createdOn": "2018-03-24 09:39", "customsCode": null, "deletedOn": null, "id": 1, "invariantName": "Leisure Ali Baba Trousers", "isPermanent": false, "locale": null, "madeIn": null,
"manufacturer": {"createdOn": "2018-02-23 18:20", "deletedOn": null, "id": 1, "invariantName": "Unknown", "updatedOn": "2018-02-23 18:20"},
"onNoStockShowComingSoon": false, "season": "", "updatedOn": "2018-03-24 09:39",
"variants": [{"assets": [{"fileName": "mu/2016/05/16/leisure-ali-baba-trousers-32956-0.jpg", "id": 1,
"mediaType": {"baseFilePath": "Catalog", "id": 7, "name": "Product Main Image"}, "sortOrder": 0}]} ]}]
Full example can be found here (but not mandatory for the question)
http://www.filedropper.com/file_389
How can I make it look inside the list to check if it's made of more objects?
This code works only without arrays. For some reason It doesn't look inside the array to see what objects are in it.
Something like this will flatten a dict structure containing dicts, lists and tuples into a flat dict.
The json_data blob is an excerpt from the data you posted.
import json
import collections
json_data = """
{"additionalLocaleInfos":[],"approved":false,"approvedBy":null,"approvedOn":null,"catalogId":4,"code":"611","createdOn":"2018-03-24 09:39","customsCode":null,"deletedOn":null,"id":1,"invariantName":"Leisure Ali Baba Trousers","isPermanent":false,"locale":null,"madeIn":null,"manufacturer":{"createdOn":"2018-02-23 18:20","deletedOn":null,"id":1,"invariantName":"Unknown","updatedOn":"2018-02-23 18:20"},"onNoStockShowComingSoon":false,"season":"","updatedOn":"2018-03-24 09:39","variants":[{"assets":[{"fileName":"mu/2016/05/16/leisure-ali-baba-trousers-32956-0.jpg","id":1,"mediaType":{"baseFilePath":"Catalog","id":7,"name":"Product Main Image"},"sortOrder":0},{"fileName":"080113/3638.jpg","id":2,"mediaType":{"baseFilePath":"Catalog","id":8,"name":"Product Additional Image"},"sortOrder":0},{"fileName":"mu/2016/05/16/leisure-ali-baba-trousers-32956-1.jpg","id":3,"mediaType":{"baseFilePath":"Catalog","id":8,"name":"Product Additional Image"},"sortOrder":0},{"fileName":"mu/2015/07/21/leisure-ali-baba-trousers-13730-0.jpg","id":4,"mediaType":{"baseFilePath":"Catalog","id":8,"name":"Product Additional Image"},"sortOrder":0},{"fileName":"mu/2016/05/16/leisure-ali-baba-trousers-32956-2.jpg","id":5,"mediaType":{"baseFilePath":"Catalog","id":8,"name":"Product Additional Image"},"sortOrder":0},{"fileName":"mu/2015/07/29/leisure-ali-baba-trousers-13853-0.jpg","id":6,"mediaType":{"baseFilePath":"Catalog","id":8,"name":"Product Additional Image"},"sortOrder":0}],"attributes":[{"attribute":{"code":"COL","cultureNeutralName":"Color","id":1,"useAsFilter":false},"code":"BLACK","groupId":0,"id":3,"invariantValue":"BLACK","locale":null,"sortOrder":0,"valueLocale":null},{"attribute":{"code":"SZ","cultureNeutralName":"Size","id":2,"useAsFilter":false},"code":"ONE SIZE","groupId":0,"id":7,"invariantValue":"ONE SIZE","locale":null,"sortOrder":0,"valueLocale":null},{"attribute":{"code":"WEIGHT","cultureNeutralName":"WEIGHT","id":14,"useAsFilter":false},"code":"0.30","groupId":0,"id":2,"invariantValue":"0.30","locale":null,"sortOrder":0,"valueLocale":null},{"attribute":{"code":"STLPTND","cultureNeutralName":"OsStyleOptionId","id":25,"useAsFilter":false},"code":"2","groupId":0,"id":6,"invariantValue":"2","locale":null,"sortOrder":0,"valueLocale":null},{"attribute":{"code":"STLNMBR","cultureNeutralName":"OsStyleNumber","id":26,"useAsFilter":false},"code":"611-1412","groupId":0,"id":1,"invariantValue":"611-1412","locale":null,"sortOrder":0,"valueLocale":null},{"attribute":{"code":"SZFCTEN","cultureNeutralName":"SizeFacetEn","id":35,"useAsFilter":true},"code":"S","groupId":0,"id":8,"invariantValue":"S","locale":null,"sortOrder":0,"valueLocale":null},{"attribute":{"code":"SZFCTEN","cultureNeutralName":"SizeFacetEn","id":35,"useAsFilter":true},"code":"M","groupId":0,"id":9,"invariantValue":"M","locale":null,"sortOrder":0,"valueLocale":null},{"attribute":{"code":"SZFCTEN","cultureNeutralName":"SizeFacetEn","id":35,"useAsFilter":true},"code":"L","groupId":0,"id":10,"invariantValue":"L","locale":null,"sortOrder":0,"valueLocale":null}],"cost":0,"createdOn":"2018-03-24 09:39","deletedOn":null,"eaN1":"2500002822528","eaN2":null,"eaN3":null,"id":1,"isDefault":false,"locale":null,"sku":"611-1412-28","sortOrder":0,"upC1":null,"upC2":null,"upC3":null,"updatedOn":"2018-03-24 09:39","variantInventories":[{"defectiveQty":0,"id":1,"lastUpdate":"2018-03-24 09:39","orderLevelQty":0,"preorderQty":0,"qtyInStock":0,"reorderQty":0,"reservedQty":100,"transferredQty":0,"variantId":1,"warehouseId":1}],"variantPrices":[{"id":1,"price":5,"priceListId":1,"priceType":{"code":"Base price","id":1,"remarks":null},"validFrom":"2018-03-24 09:39","validUntil":"2068-03-24 09:39","variantId":1}]}]}
""".strip()
data = json.loads(json_data)
def flatten_object(d, out=None, name_path=()):
out = (out or collections.OrderedDict())
iterator = (d.items() if isinstance(d, dict) else enumerate(d))
for index, value in iterator:
i_path = name_path + (index,)
if isinstance(value, (list, dict, tuple)):
flatten_object(value, out, i_path)
else:
out[i_path] = value
return out
for key, value in flatten_object(data).items():
print('_'.join(str(atom) for atom in key), value)
The output here will be something like
approved False
approvedBy None
approvedOn None
[...]
variants_0_cost 0
variants_0_createdOn 2018-03-24 09:39
variants_0_deletedOn None
variants_0_eaN1 2500002822528
variants_0_eaN2 None
variants_0_eaN3 None
variants_0_assets_0_fileName mu/2016/05/16/leisure-ali-baba-trousers-32956-0.jpg
variants_0_assets_0_id 1
variants_0_assets_0_mediaType_baseFilePath Catalog
variants_0_assets_0_mediaType_id 7
variants_0_assets_0_mediaType_name Product Main Image
variants_0_assets_0_sortOrder 0
variants_0_assets_1_fileName 080113/3638.jpg
variants_0_assets_1_id 2
variants_0_assets_1_mediaType_baseFilePath Catalog
variants_0_assets_1_mediaType_id 8
variants_0_assets_1_mediaType_name Product Additional Image
variants_0_assets_1_sortOrder 0
variants_0_assets_2_fileName mu/2016/05/16/leisure-ali-baba-trousers-32956-1.jpg
[...]
variants_0_attributes_0_attribute_code COL
variants_0_attributes_0_attribute_cultureNeutralName Color
variants_0_attributes_0_attribute_id 1
variants_0_attributes_0_attribute_useAsFilter False
variants_0_attributes_0_code BLACK
variants_0_attributes_0_groupId 0
variants_0_attributes_0_id 3
variants_0_attributes_0_invariantValue BLACK
variants_0_attributes_0_locale None
variants_0_attributes_0_sortOrder 0
variants_0_attributes_0_valueLocale None
variants_0_attributes_1_attribute_code SZ
variants_0_attributes_1_attribute_cultureNeutralName Size
variants_0_attributes_1_attribute_id 2
variants_0_attributes_1_attribute_useAsFilter False
variants_0_attributes_1_code ONE SIZE
variants_0_attributes_1_groupId 0
variants_0_attributes_1_id 7
variants_0_attributes_1_invariantValue ONE SIZE
variants_0_attributes_1_locale None
variants_0_attributes_1_sortOrder 0
variants_0_attributes_1_valueLocale None
variants_0_attributes_2_attribute_code WEIGHT
variants_0_attributes_2_attribute_cultureNeutralName WEIGHT
variants_0_attributes_2_attribute_id 14
variants_0_attributes_2_attribute_useAsFilter False
variants_0_attributes_2_code 0.30
variants_0_attributes_2_groupId 0
[...]
but you'll probably only want to run this on a single object within variants, or a list of attributes.
variant = data['variants'][0]
merged_flattened_assets = dict()
for asset in variant['assets']:
merged_flattened_assets.update({
'_'.join(key): value
for (key, value)
in flatten_object(asset).items()
})
for key, value in merged_flattened_assets.items():
print(key, value)
outputs
fileName mu/2015/07/29/leisure-ali-baba-trousers-13853-0.jpg
id 6
mediaType_baseFilePath Catalog
mediaType_id 8
mediaType_name Product Additional Image
sortOrder 0
Related
Dynamic list creation and append values - python
I have a input data that is parsed from a json and printing the output like this from keys like tablename,columnname,columnlength data = ('tablename', 'abc.xyz'),('tablename','abc.xyz'),('columnname', 'xxx'),('columnname', 'yyy'),('columnlen', 55) data[0] = abc.xyz abc.xyz abc.xyz data[1] = xxx yyy zzz data[2] = 20 30 60 data[0] represents tablename data[1] represents columnname data[2] represents column length I have code below that does creating the empty list manually TableName_list = [] ColumnName_list = [] ColumnLen_list = [] for x in data: if x[0] == 'tablename': TableName_list.append(data[0]]) elif x[0] == 'columnname': ColumnName_list.append(data[1]) elif x[0] == 'columnlen': ColumnLen_list.append(data[2]) I need to create a dynamic empty list respectively for each fields(tablename,column,columnlength) and append the data to that empty list in the dictionary and my output is needed like this in a dictionary dict = {'TableName':TableName_list,'ColumnName':ColumnName_list,'ColumnLen':columnLength_list }
This is probably most easily done with a defaultdict: from collections import defaultdict dd = defaultdict(list) data = [ ('tablename', 'abc.xyz'),('tablename','abc.xyz'), ('columnname', 'xxx'),('columnname', 'yyy'), ('columnlen', 55),('columnlen', 30) ] for d in data: dd[d[0]].append(d[1]) Output: defaultdict(<class 'list'>, { 'tablename': ['abc.xyz', 'abc.xyz'], 'columnname': ['xxx', 'yyy'], 'columnlen': [55, 30] }) If the case of the names in the result is important, you could use a dictionary to translate the incoming names: aliases = { 'tablename' : 'TableName', 'columnname' : 'ColumnName', 'columnlen' : 'ColumnLen' } for d in data: dd[aliases[d[0]]].append(d[1]) Output: defaultdict(<class 'list'>, { 'TableName': ['abc.xyz', 'abc.xyz'], 'ColumnName': ['xxx', 'yyy'], 'ColumnLen': [55, 30] })
I suggest to make a dictionary directly, something look like this: out_dict = {} for x in data: key = x[0] if key in out_dict.keys(): out_dict[key] = out_dict[key].append(x[1]) else: out_dict[key] = [x[1]]
using pandas: import pandas as pd >>> pd.DataFrame(data).groupby(0)[1].apply(list).to_dict() ''' {'columnlen': [55, 30], 'columnname': ['xxx', 'yyy'], 'tablename': ['abc.xyz', 'abc.xyz']}
PYTHON - Fastest Way of Flattening/Exploding multiple large JSON files with nested arrays, have more than 100000 json files
I have written an efficient JSON flattening logic that explodes and join nested JSON arrays it works faster for on JSON with more than 100s of nested arrays and nested dict but problem is now I have 100000 JSON files to handle. is there a way to either merge multiple JSONs to one big ad run this code or something else, any help will be great.... I know there are some duplicate question but this is mainly regarding the efficiently handling large number of large JSON files # let's say I have this json and flattening/exploding code: from collections import defaultdict, MutableMapping from copy import deepcopy import pandas as pd sample = { "rss": { "overview": { "id": { "data": [ { "stuff": [ { "onetype": [ {"id": '1', "name": "John Doe"}, {"id": '2', "name": "Don Joeh"}, ] }, {"othertype": [{"id": '2', "company": "ACME"}]}, ] }, {"otherstuff": [{"thing": [['1', '42'], ['2', '2']]}]}, ] } } } } # Flattening with exploding Logic: def cross_join(left, right): new_rows = [] if right else left for left_row in left: for right_row in right: temp_row = deepcopy(left_row) for key, value in right_row.items(): temp_row[key] = value new_rows.append(deepcopy(temp_row)) return new_rows def dict_maker(dic_list): dd = defaultdict(list) for d in dic_list: for key, value in d.items(): dd[key].append(value) return dd def flatten_list(data): for elem in data: if isinstance(elem, list): yield from flatten_list(elem) else: yield elem def flatten_struct(data, prev_heading=""): if isinstance(data, dict): rows = [{}] for key, value in data.items(): rows = cross_join(rows, flatten_struct(value, prev_heading + "_" + key)) elif isinstance(data, list): rows = [] for i in range(len(data)): [ rows.append(elem) for elem in flatten_list(flatten_struct(data[i], prev_heading)) ] else: rows = [{prev_heading[1:]: data}] return rows def flatten(d, parent_key="", sep="_"): items = [] if isinstance(d, dict): for k, v in d.items(): new_key = parent_key + sep + k if parent_key else k if isinstance(v, MutableMapping): items.extend(flatten(v, new_key, sep=sep).items()) else: items.append((new_key, v)) else: {} return dict(items) def get_section_df(section, section_grp, id=None): df_lst = [] finalMap = {} for elem in section: d = flatten(elem) flat = [ {k + "_" + key: val for key, val in dict_maker(flatten_struct(v)).items()} if isinstance(v, list) else {k: v} for k, v in d.items() ] for new_d in flat: finalMap.update(new_d) # finalMap.update({k:v for k,v in id}) if len(finalMap) > 0: df = pd.concat( { str(section_grp) + "_" + k.replace("#", "").replace("#", ""): pd.Series(v) for k, v in finalMap.items() }, axis=1, ) df_lst.append(df) return df_lst def process(json_sample): df_list = [] master_d = flatten(json_sample) master_keys = [k for k in master_d.keys() if type(master_d.get(k)) == list] grouped_path_dict = {x: x.split("_")[2] for x in master_keys} master_id = '' for flatted in master_keys: lst = master_d.get(flatted) path_group = grouped_path_dict.get(flatted) # if isinstance(lst, list): if len(get_section_df(section=lst, id=master_id, section_grp=path_group)) > 0: pdf = pd.concat( get_section_df(section=lst, id=master_id, section_grp=path_group) ) df_list.append(pdf) df = pd.concat(df_list) return df print(process(json_sample=sample)) id_stuff_onetype_id id_stuff_onetype_name id_stuff_othertype_id id_stuff_othertype_company id_otherstuff_thing 0 1 John Doe 2 ACME NaN 1 2 Don Joeh NaN NaN NaN 0 1 John Doe 2 ACME 1 1 2 Don Joeh NaN NaN 42 2 NaN NaN NaN NaN 2 3 NaN NaN NaN NaN 2
create dataframe by Iterating upto nth level of values in nested dictionary
I have a json file downloaded from this link/website human diseased icd-11 classification, this data have a upto 8 level of nesting e.g: "name":"br08403", "children":[ { "name":"01 Certain infectious or parasitic diseases", "children":[ { "name":"Gastroenteritis or colitis of infectious origin", "children":[ { "name":"Bacterial intestinal infections", "children":[ { "name":"1A00 Cholera", "children":[ { "name":"H00110 Cholera" } I tried with this code: def flatten_json(nested_json): """ Flatten json object with nested keys into a single level. Args: nested_json: A nested json object. Returns: The flattened json object if successful, None otherwise. """ out = {} def flatten(x, name=''): if type(x) is dict: for a in x: flatten(x[a], name + a + '_') elif type(x) is list: i = 0 for a in x: flatten(a, name + str(i) + '_') i += 1 else: out[name[:-1]] = x flatten(nested_json) return out df2 = pd.Series(flatten_json(dictionary)).to_frame() output i'm getting is: name br08403 children_0_name 01 Certain infectious or parasitic diseases children_0_children_0_name Gastroenteritis or colitis of infectious origin children_0_children_0_children_0_name Bacterial intestinal infections children_0_children_0_children_0_children_0_name 1A00 Cholera ... ... children_21_children_17_children_10_name NF0A Certain early complications of trauma, n... children_21_children_17_children_11_name NF0Y Other specified effects of external causes children_21_children_17_children_12_name NF0Z Unspecified effects of external causes children_21_children_18_name NF2Y Other specified injury, poisoning or cer... children_21_children_19_name NF2Z Unspecified injury, poisoning or certain.. but the desired output is a dataframe with 8 columns which can accommodate the last depth of the nested name key e.g. something like this: I would really appreciate any help code tried for extracting the 'name' property by created a dataframe as follows: with open('br08403.json') as f: d = json.load(f) df2 = pd.DataFrame(d) data = [] for a in range(len(df2)): # print(df2['children'][a]['name']) data.append(df2['children'][a]['name']) for b in range(len(df2['children'][a]['children'])): # print(df2['children'][a]['children'][b]['name']) data.append(df2['children'][a]['children'][b]['name']) if len(df2['children'][a]['children'][b]) < 2: print(df2['children'][a]['children'][b]['name']) else: for c in range(len(df2['children'][a]['children'][b]['children'])): # print(df2['children'][a]['children'][b]['children'][c]['name']) data.append(df2['children'][a]['children'][b]['children'][c]['name']) if len(df2['children'][a]['children'][b]['children'][c]) < 2: print(df2['children'][a]['children'][b]['children'][c]['name']) else: for d in range(len(df2['children'][a]['children'][b]['children'][c]['children'])): # print(df2['children'][a]['children'][b]['children'][c]['children'][d]['name']) data.append(df2['children'][a]['children'][b]['children'][c]['children'][d]['name']) but i'm getting a plain list as follows: ['01 Certain infectious or parasitic diseases', 'Gastroenteritis or colitis of infectious origin', 'Bacterial intestinal infections', '1A00 Cholera', '1A01 Intestinal infection due to other Vibrio', '1A02 Intestinal infections due to Shigella', '1A03 Intestinal infections due to Escherichia coli', '1A04 Enterocolitis due to Clostridium difficile', '1A05 Intestinal infections due to Yersinia enterocolitica', '1A06 Gastroenteritis due to Campylobacter', '1A07 Typhoid fever', '1A08 Paratyphoid Fever', '1A09 Infections due to other Salmonella',....
A simple pandas only iterative approach. res = requests.get("https://www.genome.jp/kegg-bin/download_htext?htext=br08403.keg&format=json&filedir=") js = res.json() df = pd.json_normalize(js) for i in range(20): df = pd.json_normalize(df.explode("children").to_dict(orient="records")) if "children" in df.columns: df.drop(columns="children", inplace=True) df = df.rename(columns={"children.name":f"level{i}","children.children":"children"}) if df[f"level{i}"].isna().all() or "children" not in df.columns: break
Duplicate values in a dictionary
I am trying to read through a csv file in the following format: number,alphabet 1,a 2,b 3,c 2,b 1,a My code to create a dictionary: alpha = open('alpha.csv','r') csv_alpha = csv.reader(alpha) alpha_file = {row[0]:row[1] for row in csv_alpha} OUTPUT: alpha_file = { 1:'a', 2:'b', 3:'c' } By looking at the file, 1 and 2 have duplicate values. How can i possibly change my output to : alpha_file = { 1:'a', 1:'a', 2:'b', 2:'b', 3:'c' } LNG - PYTHON
use a list to hold key's value alpha = open('alpha.csv','r') csv_alpha = csv.reader(alpha) alpha_file = dict() for row in csv_alpha: if row[0] in alpha_file: alpha_file[row[0]].append(row[1]) else: alpha_file[row[0]] = [row[1]] the output will be like: { 1:['a','a'],2:['b','b'], 3:['c'] } to output the number of key occurrences, use a for loop d = { 1:['a','a'],2:['b','b'], 3:['c'] } amount = [] for key, value in d.iteritems(): amount += [key] * len(value) print amount output looks like: [1, 1, 2, 2, 3]
Python - Convert JSON key/values into key/value where value is an array
I have a JSON file with numerous entries like this: { "area1": "California", "area2": "Sierra Eastside", "area3": "Bishop Area", "area4": "Volcanic Tablelands (Happy/Sad Boulders)", "area5": "Fish Slough Boulders", "grade": "V6 ", "route": "The Orgasm", "type1": "Boulder", "type2": "NONE", "type3": "NONE", "type4": "NONE", }, I want to take the area and type entries and turn them into arrays: { "area": ["California","Sierra Eastside","Bishop Area","Volcanic Tablelands (Happy/Sad Boulders)","Fish Slough Boulders"] "grade": "V6 ", "route": "The Orgasm", "type": ["Boulder","NONE","NONE","NONE"] }, I have this code which almost works: json_data=open('../json/routes_test.json') datas = json.load(json_data) datas_arrays = [] area_keys = ['area1','area2','area3','area4','area5'] type_keys = ['type1','type2','type3','type4'] for data in datas: areaArray = [] typeArray = [] deleteArray = [] for k, v in data.iteritems(): for area_key in area_keys: if (k == area_key): areaArray.append(v) deleteArray.append(k) for type_key in type_keys: if (k == type_key): typeArray.append(v) deleteArray.append(k) for k in deleteArray: del data[k] data['area'] = areaArray data['type'] = typeArray datas_arrays.append(data) print datas_arrays print "********" out = json.dumps(datas_arrays, sort_keys=True,indent=4, separators=(',', ': ')) print out f_out= open('../json/toues_test_intoarrays.json', 'w') f_out.write(out) f_out.close() The problem is that the area array is all out of order and the type array is backwards, which I can't have. I find it strange that one is unordered and one is ordered but backwards. To me it seems like the iteration should assure they're placed in order.
Python dictionaries have an arbitrary ordering, they are not sorted. You want to use your prebuilt lists of keys instead: with open('../json/routes_test.json') as json_data: datas = json.load(json_data) area_keys = ['area1','area2','area3','area4','area5'] type_keys = ['type1','type2','type3','type4'] for data in datas: data['area'] = [data[k] for k in area_keys] data['type'] = [data[k] for k in type_keys] for k in area_keys + type_keys: del data[k] out = json.dumps(datas, sort_keys=True, indent=4, separators=(',', ': ')) print out with open('../json/toues_test_intoarrays.json', 'w') as f_out: f_out.write(out) which changes the dictionaries in-place. You could even determine the area and type keys from each entry: for data in datas: keys = sorted(data.keys()) area_keys = [k for k in keys if k.startswith('area')] data['area'] = [data[k] for k in area_keys] type_keys = [k for k in keys if k.startswith('type')] data['type'] = [data[k] for k in type_keys] for k in area_keys + type_keys: del data[k] and omit the list literals with the 'area1', 'area2' etc. hardcoded lists altogether.
Iterate the keys in order. for k, v in sorted(data.iteritems()): This will fail once you get past 9, but it will do for now.