Duplicate values in a dictionary - python

I am trying to read through a csv file in the following format:
number,alphabet
1,a
2,b
3,c
2,b
1,a
My code to create a dictionary:
alpha = open('alpha.csv','r')
csv_alpha = csv.reader(alpha)
alpha_file = {row[0]:row[1] for row in csv_alpha}
OUTPUT:
alpha_file = { 1:'a', 2:'b', 3:'c' }
By looking at the file, 1 and 2 have duplicate values.
How can i possibly change my output to :
alpha_file = { 1:'a', 1:'a', 2:'b', 2:'b', 3:'c' }
LNG - PYTHON

use a list to hold key's value
alpha = open('alpha.csv','r')
csv_alpha = csv.reader(alpha)
alpha_file = dict()
for row in csv_alpha:
if row[0] in alpha_file:
alpha_file[row[0]].append(row[1])
else:
alpha_file[row[0]] = [row[1]]
the output will be like:
{ 1:['a','a'],2:['b','b'], 3:['c'] }
to output the number of key occurrences, use a for loop
d = { 1:['a','a'],2:['b','b'], 3:['c'] }
amount = []
for key, value in d.iteritems():
amount += [key] * len(value)
print amount
output looks like:
[1, 1, 2, 2, 3]

Related

in python, read csv data and. convert unique values into python's dictionary

How do I convert below comma delimited records -
COL1,COL2,COL3,COL4
A101,P501,U901,US_A
A101,P501,U902,US_B
A101,P502,U901,US_A
A102,P501,U901,US_A
A102,P502,U902,US_B
into python dictionary -
result = {
"A101": {
"P501": {"U901": "US_A", "U902": "US_B"},
"P502": {"U901": "US_A"}
},
"A102": {
"P501": {"U901": "US_A"},
"P502": {"U902": "US_B"}
}
}
Thank you for your help!
Approach
We can process the rows of the CSV file as follows:
Convert each row in CSV file from a list to a nested dictionary using Convert a list to nested dictionary i.e. line reduce(lambda x, y: {y: x}, reversed(row))) in code below.
Merge the nested dictionaries using
Merge nested dictionaries in Python using merge_dict function below
Code
import csv
def csv_to_nested_dict(filenm):
' CSV file to nested dictionary '
with open(filenm, 'r') as csvfile:
csv_reader = csv.reader(csvfile, delimiter=',')
next(csv_reader) # skip header row
result = {}
for row in csv_reader:
# Convert row to nested dictionary and
# Merge into result
result = merge_dict(result,
reduce(lambda x, y: {y: x}, reversed(row))) # row to nested dictionary
return result
def merge_dict(dict1, dict2):
' Merges nested dictionaries '
for key, val in dict1.items():
if type(val) == dict:
if key in dict2 and type(dict2[key] == dict):
merge_dict(dict1[key], dict2[key])
else:
if key in dict2:
dict1[key] = dict2[key]
for key, val in dict2.items():
if not key in dict1:
dict1[key] = val
return dict1
Test
Usage:
res = csv_to_nested_dict('test.txt') # result
# Use json to pretty print nested dictionary res
import json
print(json.dumps(res, indent = 4))
Input File test.txt
COL1,COL2,COL3,COL4
A101,P501,U901,US_A
A101,P501,U902,US_B
A101,P502,U901,US_A
A102,P501,U901,US_A
A102,P502,U902,US_B
Output
{
"A101": {
"P501": {
"U901": "US_A",
"U902": "US_B"
},
"P502": {
"U901": "US_A"
}
},
"A102": {
"P501": {
"U901": "US_A"
},
"P502": {
"U902": "US_B"
}
}
}
here is simple version of solution -
def dict_reader(file_name):
with open(file_name, 'r') as csvfile:
reader = csv.DictReader(csvfile)
try:
data = dict()
for row in reader:
col1, col2, col3, col4 = (row["col1"], row["col2"], row["col3"], row["col4"])
if col1 in data:
if col2 in data[col1]:
data[col1][col2].update({col3: col4})
else:
data[col1][col2] = {col3: col4}
else:
data[col1] = {col2: {col3: col4}}
except csv.Error as e:
sys.exit('file {}, line {}: {}'.format(file_name, reader.line_num, e))
finally:
return data
it is not very elegant solution but works.

Dynamic list creation and append values - python

I have a input data that is parsed from a json and printing the output like this from keys like tablename,columnname,columnlength
data = ('tablename', 'abc.xyz'),('tablename','abc.xyz'),('columnname', 'xxx'),('columnname', 'yyy'),('columnlen', 55)
data[0] =
abc.xyz
abc.xyz
abc.xyz
data[1] =
xxx
yyy
zzz
data[2] =
20
30
60
data[0] represents tablename
data[1] represents columnname
data[2] represents column length
I have code below that does creating the empty list manually
TableName_list = []
ColumnName_list = []
ColumnLen_list = []
for x in data:
if x[0] == 'tablename':
TableName_list.append(data[0]])
elif x[0] == 'columnname':
ColumnName_list.append(data[1])
elif x[0] == 'columnlen':
ColumnLen_list.append(data[2])
I need to create a dynamic empty list respectively for each fields(tablename,column,columnlength) and append the data to that empty list in the dictionary
and my output is needed like this in a dictionary
dict = {'TableName':TableName_list,'ColumnName':ColumnName_list,'ColumnLen':columnLength_list }
This is probably most easily done with a defaultdict:
from collections import defaultdict
dd = defaultdict(list)
data = [
('tablename', 'abc.xyz'),('tablename','abc.xyz'),
('columnname', 'xxx'),('columnname', 'yyy'),
('columnlen', 55),('columnlen', 30)
]
for d in data:
dd[d[0]].append(d[1])
Output:
defaultdict(<class 'list'>, {
'tablename': ['abc.xyz', 'abc.xyz'],
'columnname': ['xxx', 'yyy'],
'columnlen': [55, 30]
})
If the case of the names in the result is important, you could use a dictionary to translate the incoming names:
aliases = { 'tablename' : 'TableName', 'columnname' : 'ColumnName', 'columnlen' : 'ColumnLen' }
for d in data:
dd[aliases[d[0]]].append(d[1])
Output:
defaultdict(<class 'list'>, {
'TableName': ['abc.xyz', 'abc.xyz'],
'ColumnName': ['xxx', 'yyy'],
'ColumnLen': [55, 30]
})
I suggest to make a dictionary directly, something look like this:
out_dict = {}
for x in data:
key = x[0]
if key in out_dict.keys():
out_dict[key] = out_dict[key].append(x[1])
else:
out_dict[key] = [x[1]]
using pandas:
import pandas as pd
>>> pd.DataFrame(data).groupby(0)[1].apply(list).to_dict()
'''
{'columnlen': [55, 30],
'columnname': ['xxx', 'yyy'],
'tablename': ['abc.xyz', 'abc.xyz']}

Hierarchical grouping in key value pair with python

I have a list like this:
data = [
{'date':'2017-01-02', 'model': 'iphone5', 'feature':'feature1'},
{'date':'2017-01-02', 'model': 'iphone7', 'feature':'feature2'},
{'date':'2017-01-03', 'model': 'iphone6', 'feature':'feature2'},
{'date':'2017-01-03', 'model': 'iphone6', 'feature':'feature2'},
{'date':'2017-01-03', 'model': 'iphone7', 'feature':'feature3'},
{'date':'2017-01-10', 'model': 'iphone7', 'feature':'feature2'},
{'date':'2017-01-10', 'model': 'iphone7', 'feature':'feature1'},
]
I want to achieve this:
[
{
'2017-01-02':[{'iphone5':['feature1']}, {'iphone7':['feature2']}]
},
{
'2017-01-03': [{'iphone6':['feature2']}, {'iphone7':['feature3']}]
},
{
'2017-01-10':[{'iphone7':['feature2', 'feature1']}]
}
]
I need an efficient way, since it could be much data.
I was trying this:
data = sorted(data, key=itemgetter('date'))
date = itertools.groupby(data, key=itemgetter('date'))
But I'm getting nothing for the value of the 'date' key.
Later I will iterate over this structure for building an HTML.
You can do this pretty efficiently and cleanly using defaultdict. Unfortunately it's a pretty advanced use and it gets hard to read.
from collections import defaultdict
from pprint import pprint
# create a dictionary whose elements are automatically dictionaries of sets
result_dict = defaultdict(lambda: defaultdict(set))
# Construct a dictionary with one key for each date and another dict ('model_dict')
# as the value.
# The model_dict has one key for each model and a set of features as the value.
for d in data:
result_dict[d["date"]][d["model"]].add(d["feature"])
# more explicit version:
# for d in data:
# model_dict = result_dict[d["date"]] # created automatically if needed
# feature_set = model_dict[d["model"]] # created automatically if needed
# feature_set.add(d["feature"])
# convert the result_dict into the required form
result_list = [
{
date: [
{phone: list(feature_set)}
for phone, feature_set in sorted(model_dict.items())
]
} for date, model_dict in sorted(result_dict.items())
]
pprint(result_list)
# [{'2017-01-02': [{'iphone5': ['feature1']}, {'iphone7': ['feature2']}]},
# {'2017-01-03': [{'iphone6': ['feature2']}, {'iphone7': ['feature3']}]},
# {'2017-01-10': [{'iphone7': ['feature2', 'feature1']}]}]
You can try this, here is my way, td is a dict to store { iphone : index } to check if the new item exist in the list of dict:
from itertools import groupby
from operator import itemgetter
r = []
for i in groupby(sorted(data, key=itemgetter('date')), key=itemgetter('date')):
td, tl = {}, []
for j in i[1]:
if j["model"] not in td:
tl.append({j["model"]: [j["feature"]]})
td[j["model"]] = len(tl) - 1
elif j["feature"] not in tl[td[j["model"]]][j["model"]]:
tl[td[j["model"]]][j["model"]].append(j["feature"])
r.append({i[0]: tl})
Result:
[
{'2017-01-02': [{'iphone5': ['feature1']}, {'iphone7': ['feature2']}]},
{'2017-01-03': [{'iphone6': ['feature2']}, {'iphone7': ['feature3']}]},
{'2017-01-10': [{'iphone7': ['feature2', 'feature1']}]}
]
As matter of fact, I think the data structure can be simplified, maybe you don't need so many nesting.
total_result = list()
result = dict()
inner_value = dict()
for d in data:
if d["date"] not in result:
if result:
total_result.append(result)
result = dict()
result[d["date"]] = set()
inner_value = dict()
if d["model"] not in inner_value:
inner_value[d["model"]] = set()
inner_value[d["model"]].add(d["feature"])
tmp_v = [{key: list(inner_value[key])} for key in inner_value]
result[d["date"]] = tmp_v
total_result.append(result)
total_result
[{'2017-01-02': [{'iphone7': ['feature2']}, {'iphone5': ['feature1']}]},
{'2017-01-03': [{'iphone6': ['feature2']}, {'iphone7': ['feature3']}]},
{'2017-01-10': [{'iphone7': ['feature2', 'feature1']}]}]

python generating nested dictionary key error

I am trying to create a nested dictionary from a mysql query but I am getting a key error
result = {}
for i, q in enumerate(query):
result['data'][i]['firstName'] = q.first_name
result['data'][i]['lastName'] = q.last_name
result['data'][i]['email'] = q.email
error
KeyError: 'data'
desired result
result = {
'data': {
0: {'firstName': ''...}
1: {'firstName': ''...}
2: {'firstName': ''...}
}
}
You wanted to create a nested dictionary
result = {} will create an assignment for a flat dictionary, whose items can have any values like "string", "int", "list" or "dict"
For this flat assignment
python knows what to do for result["first"]
If you want "first" also to be another dictionary you need to tell Python by an assingment
result['first'] = {}.
otherwise, Python raises "KeyError"
I think you are looking for this :)
>>> from collections import defaultdict
>>> mydict = lambda: defaultdict(mydict)
>>> result = mydict()
>>> result['Python']['rules']['the world'] = "Yes I Agree"
>>> result['Python']['rules']['the world']
'Yes I Agree'
result = {}
result['data'] = {}
for i, q in enumerate(query):
result['data']['i'] = {}
result['data'][i]['firstName'] = q.first_name
result['data'][i]['lastName'] = q.last_name
result['data'][i]['email'] = q.email
Alternatively, you can use you own class which adds the extra dicts automatically
class AutoDict(dict):
def __missing__(self, k):
self[k] = AutoDict()
return self[k]
result = AutoDict()
for i, q in enumerate(query):
result['data'][i]['firstName'] = q.first_name
result['data'][i]['lastName'] = q.last_name
result['data'][i]['email'] = q.email
result['data'] does exist. So you cannot add data to it.
Try this out at the start:
result = {'data': []};
You have to create the key data first:
result = {}
result['data'] = {}
for i, q in enumerate(query):
result['data'][i] = {}
result['data'][i]['firstName'] = q.first_name
result['data'][i]['lastName'] = q.last_name
result['data'][i]['email'] = q.email

Python - Convert JSON key/values into key/value where value is an array

I have a JSON file with numerous entries like this:
{
"area1": "California",
"area2": "Sierra Eastside",
"area3": "Bishop Area",
"area4": "Volcanic Tablelands (Happy/Sad Boulders)",
"area5": "Fish Slough Boulders",
"grade": "V6 ",
"route": "The Orgasm",
"type1": "Boulder",
"type2": "NONE",
"type3": "NONE",
"type4": "NONE",
},
I want to take the area and type entries and turn them into arrays:
{
"area": ["California","Sierra Eastside","Bishop Area","Volcanic Tablelands (Happy/Sad Boulders)","Fish Slough Boulders"]
"grade": "V6 ",
"route": "The Orgasm",
"type": ["Boulder","NONE","NONE","NONE"]
},
I have this code which almost works:
json_data=open('../json/routes_test.json')
datas = json.load(json_data)
datas_arrays = []
area_keys = ['area1','area2','area3','area4','area5']
type_keys = ['type1','type2','type3','type4']
for data in datas:
areaArray = []
typeArray = []
deleteArray = []
for k, v in data.iteritems():
for area_key in area_keys:
if (k == area_key):
areaArray.append(v)
deleteArray.append(k)
for type_key in type_keys:
if (k == type_key):
typeArray.append(v)
deleteArray.append(k)
for k in deleteArray:
del data[k]
data['area'] = areaArray
data['type'] = typeArray
datas_arrays.append(data)
print datas_arrays
print "********"
out = json.dumps(datas_arrays, sort_keys=True,indent=4, separators=(',', ': '))
print out
f_out= open('../json/toues_test_intoarrays.json', 'w')
f_out.write(out)
f_out.close()
The problem is that the area array is all out of order and the type array is backwards, which I can't have. I find it strange that one is unordered and one is ordered but backwards. To me it seems like the iteration should assure they're placed in order.
Python dictionaries have an arbitrary ordering, they are not sorted. You want to use your prebuilt lists of keys instead:
with open('../json/routes_test.json') as json_data:
datas = json.load(json_data)
area_keys = ['area1','area2','area3','area4','area5']
type_keys = ['type1','type2','type3','type4']
for data in datas:
data['area'] = [data[k] for k in area_keys]
data['type'] = [data[k] for k in type_keys]
for k in area_keys + type_keys:
del data[k]
out = json.dumps(datas, sort_keys=True, indent=4, separators=(',', ': '))
print out
with open('../json/toues_test_intoarrays.json', 'w') as f_out:
f_out.write(out)
which changes the dictionaries in-place.
You could even determine the area and type keys from each entry:
for data in datas:
keys = sorted(data.keys())
area_keys = [k for k in keys if k.startswith('area')]
data['area'] = [data[k] for k in area_keys]
type_keys = [k for k in keys if k.startswith('type')]
data['type'] = [data[k] for k in type_keys]
for k in area_keys + type_keys:
del data[k]
and omit the list literals with the 'area1', 'area2' etc. hardcoded lists altogether.
Iterate the keys in order.
for k, v in sorted(data.iteritems()):
This will fail once you get past 9, but it will do for now.

Categories