I need a defaultdict that can do get the finaldict given a list of query words from the first file.
The final dict is a dictionary of a pair of words from both files that shares the same ID. e.g. foo, oof shares the same 1243 and 1453 ID. It is to facilitate word-pair search later, when i try to search ('foo','oof'), it will return ['1243','1453'].
If i search the finaldict for ('foo','duh'), it will return nothing as the wordpair don't share any same ID.
query = ['foo','barbar']
finaldict = defaultdict(list)
finaldict = {('foo','oof'):['1243','1453']
('foo','rabrab'):['2323']
('barbar','duh'):['6452']}
I've been doing it as below but is there a simpler way of achieving the finaldict?
query = ['foo','barbar']
from collections import defaultdict
dict1 = defaultdict(list)
dict2 = defaultdict(list)
dict1['foo'] = ['1234','1453','2323'];
dict1['bar'] =['5230']; dict1['barbar'] =['6452']
dict2['1243']=['oof']
dict2['1453']=['oof']
dict2['4239']=['rba']
dict2['2323']=['rabrab']
dict2['6452']=['duh']
tt = defaultdict(defaultdict)
for p in sorted(query):
for ss in sorted(dict1[p]):
if len(dict2[ss]) != 0 and dict2[ss] != None:
tt[p][ss] = dict2[ss]
finaldict = defaultdict(set)
for src in tt:
for ss in tt[src]:
for trg in tt[src][ss]:
finaldict[(src, trg)].add(ss)
print finaldict[('foo','oof')]
The above code outputs:
>>> print finaldict[('foo','oof')]
set(['1453'])
>>> for i in finaldict:
... print i, finaldict[i]
...
('foo', 'rabrab') set(['2323'])
('barbar', 'duh') set(['6452'])
('foo', 'oof') set(['1453'])
{(k1,v):k2 for k1 in dict1 for k2 in dict2
for v in dict2[k2] if k2 in dict1[k1]}
{('barbar', 'duh'): '6452', ('foo', 'oof'): '1453', ('foo', 'rabrab'): '2323'}
Related
I have a dict:
my_dict = {'some.key' : 'value'}
and i want to change it like this:
result = {'some' : {'key' : 'value'}}
how i can do this?
I need to this to create nested classes using dicts:
example:
my_dict = {'nested.key' : 'value'}
class Nested:
key : str
class MyDict:
nested : Nested
if you need this for real use, and not as a coding exercise, you can install extradict and use extradict.NestedData:
In [1]: from extradict import NestedData
In [2]: a = NestedData({'some.key' : 'value'})
In [3]: a["some"]
Out[3]: {'key': <str>}
In [4]: a["some"]["key"]
Out[4]: 'value'
In [5]: a.data
Out[5]: {'some': {'key': 'value'}}
(disclaimer: I am the package author)
Not quite sure if I understand your question, but would
result = {key.split('.')[0]: {key.split('.')[1]: value} for key, value in my_dict.items()}
do the trick?
I hope this function will help you
def foo(obj):
result = {}
for k, v in obj.items():
keys = k.split('.')
caret = result
for i in range(len(keys)):
curr_key = keys[i]
if i == len(keys) - 1:
caret[curr_key] = v
else:
caret.setdefault(curr_key, {})
caret = caret[curr_key]
return result
with recurtion it could look like this (having all keys unique is essential):
my_dict = {'key0' : 'value0',
'nested.key' : 'value',
'nested1.nested1.key1' : 'value1',
'nested2.nested2.nested2.key2' : 'value2'}
def func(k,v):
if not '.' in k: return {k:v}
k1,k = k.split('.',1)
return {k1:func(k,v)}
res = {}
for k,v in my_dict.items():
res.update(func(k,v))
>>> res
'''
{'key0': 'value0',
'nested': {'key': 'value'},
'nested1': {'nested1': {'key1': 'value1'}},
'nested2': {'nested2': {'nested2': {'key2': 'value2'}}}}
I have the below list of tuples:
p = [("01","Master"),("02","Node"),("03","Node"),("04","Server")]
I want my output to look like:
y = {
"Master":{"number":["01"]},
"Node":{"number":["02", "03"]},
"Server":{"number":["04"]}
}
I have tried the below code:
y = {}
for line in p:
if line[1] in y:
y[line[1]] = {}
y[line[1]]["number"].append(line[0])
else:
y[line[1]] = {}
y[line[1]]["number"] = [line[0]]
And I get the below error:
Traceback (most recent call last):
File "<stdin>", line 4, in <module>
KeyError: 'number'
How do I solve this?
from collections import defaultdict
d = defaultdict(lambda: defaultdict(list))
for v, k in p:
d[k]["number"].append(v)
print(d)
defaultdict(<function <lambda> at 0x7f8005097578>, {'Node': defaultdict(<type 'list'>, {'number': ['02', '03']}), 'Master': defaultdict(<type 'list'>, {'number': ['01']}), 'Server': defaultdict(<type 'list'>, {'number': ['04']})})
without defaultdict:
d = {}
from pprint import pprint as pp
for v, k in p:
d.setdefault(k,{"number":[]})
d[k]["number"].append(v)
pp(d)
{'Master': {'number': ['01']},
'Node': {'number': ['02', '03']},
'Server': {'number': ['04']}}
It's because you don't initialize your dictionary when needed, and you reset it when not needed.
Try this:
p = [("01","Master"),("02","Node"),("03","Node"),("04","Server")]
y = {}
for (number, category) in p:
if not y.get(category, False):
# initializes your sub-dictionary
y[category] = {"number": []}
# adds the correct number to the sub-dictionary
y[category]["number"].append(number)
Note that using a tuple unpacking for (number, category) in p allows your code to be more readable inside your loop.
You are resetting the dictionary!
for line in p:
if line[1] in y:
#y[line[1]] = {} -- RESET! ["number"] will now disappear.
#.. which leads to error in the next line.
y[line[1]]["number"].append(line[0])
else:
y[line[1]] = {}
y[line[1]]["number"] = [line[0]]
A more pythonic way of achieving the same thing would be by using a defaultdict as demonstrated in other answers.
Do not assign {} to key when key is already present in y.
y = {}
for line in p:
try:
y[line[1]]["number"].append(line[0])
except:
y[line[1]] = {}
y[line[1]]["number"] = [line[0]]
OR
Use defaultdict use:-
>>> from collections import defaultdict
>>> p = [("01","Master"),("02","Node"),("03","Node"),("04","Server")]
>>> d = defaultdict(list)
>>> for k, v in p:
... d[v].append(k)
...
>>> d
defaultdict(<type 'list'>, {'Node': ['02', '03'], 'Master': ['01'], 'Server': ['04']})
I receive data from the Loggly service in dot notation, but to put data back in, it must be in JSON.
Hence, I need to convert:
{'json.message.status.time':50, 'json.message.code.response':80, 'json.time':100}
Into:
{'message': {'code': {'response': 80}, 'status': {'time': 50}}, 'time': 100}
I have put together a function to do so, but I wonder if there is a more direct and simpler way to accomplish the same result.
def dot_to_json(a):
# Create root for JSON tree structure
resp = {}
for k,v in a.items():
# eliminate json. (if metric comes from another type, it will keep its root)
k = re.sub(r'\bjson.\b','',k)
if '.' in k:
# Field has a dot
r = resp
s = ''
k2 = k.split('.')
l = len(k2)
count = 0
t = {}
for f in k2:
count += 1
if f not in resp.keys():
r[f]={}
r = r[f]
if count < l:
s += "['" + f + "']"
else:
s = "resp%s" % s
t = eval(s)
# Assign value to the last branch
t[f] = v
else:
r2 = resp
if k not in resp.keys():
r2[k] = {}
r2[k] = v
return resp
You can turn the path into dictionary access with:
def dot_to_json(a):
output = {}
for key, value in a.iteritems():
path = key.split('.')
if path[0] == 'json':
path = path[1:]
target = reduce(lambda d, k: d.setdefault(k, {}), path[:-1], output)
target[path[-1]] = value
return output
This takes the key as a path, ignoring the first json part. With reduce() you can walk the elements of path (except for the last one) and fetch the nested dictionary with it.
Essentially you start at output and for each element in path fetch the value and use that value as the input for the next iteration. Here dict.setdefault() is used to default to a new empty dictionary each time a key doesn't yet exist. For a path ['foo', 'bar', 'baz'] this comes down to the call output.setdefault('foo', {}).setdefault('bar', {}).setdefault('baz', {}), only more compact and supporting arbitrary length paths.
The innermost dictionary is then used to set the value with the last element of the path as the key.
Demo:
>>> def dot_to_json(a):
... output = {}
... for key, value in a.iteritems():
... path = key.split('.')[1:] # ignore the json. prefix
... target = reduce(lambda d, k: d.setdefault(k, {}), path[:-1], output)
... target[path[-1]] = value
... return output
...
>>> dot_to_json({'json.message.status.time':50, 'json.message.code.response':80, 'json.time':100}))
{'message': {'status': {'time': 50}, 'code': {'response': 80}}, 'time': 100}
I am trying to create a nested dictionary from a mysql query but I am getting a key error
result = {}
for i, q in enumerate(query):
result['data'][i]['firstName'] = q.first_name
result['data'][i]['lastName'] = q.last_name
result['data'][i]['email'] = q.email
error
KeyError: 'data'
desired result
result = {
'data': {
0: {'firstName': ''...}
1: {'firstName': ''...}
2: {'firstName': ''...}
}
}
You wanted to create a nested dictionary
result = {} will create an assignment for a flat dictionary, whose items can have any values like "string", "int", "list" or "dict"
For this flat assignment
python knows what to do for result["first"]
If you want "first" also to be another dictionary you need to tell Python by an assingment
result['first'] = {}.
otherwise, Python raises "KeyError"
I think you are looking for this :)
>>> from collections import defaultdict
>>> mydict = lambda: defaultdict(mydict)
>>> result = mydict()
>>> result['Python']['rules']['the world'] = "Yes I Agree"
>>> result['Python']['rules']['the world']
'Yes I Agree'
result = {}
result['data'] = {}
for i, q in enumerate(query):
result['data']['i'] = {}
result['data'][i]['firstName'] = q.first_name
result['data'][i]['lastName'] = q.last_name
result['data'][i]['email'] = q.email
Alternatively, you can use you own class which adds the extra dicts automatically
class AutoDict(dict):
def __missing__(self, k):
self[k] = AutoDict()
return self[k]
result = AutoDict()
for i, q in enumerate(query):
result['data'][i]['firstName'] = q.first_name
result['data'][i]['lastName'] = q.last_name
result['data'][i]['email'] = q.email
result['data'] does exist. So you cannot add data to it.
Try this out at the start:
result = {'data': []};
You have to create the key data first:
result = {}
result['data'] = {}
for i, q in enumerate(query):
result['data'][i] = {}
result['data'][i]['firstName'] = q.first_name
result['data'][i]['lastName'] = q.last_name
result['data'][i]['email'] = q.email
I have a question related to python code.
I need to aggregate if the key = kv1, how can I do that?
input='num=123-456-7890&kv=1&kv2=12&kv3=0'
result={}
for pair in input.split('&'):
(key,value) = pair.split('=')
if key in 'kv1':
print value
result[key] += int(value)
print result['kv1']
Thanks a lot!!
I'm assuming you meant key == 'kv1' and also the kv within input was meant to be kv1 and that result is an empty dict that doesn't need result[key] += int(value) just result[key] = int(value)
input = 'num=123-456-7890&kv1=1&kv2=12&kv3=0'
keys = {k: v for k, v in [i.split('=') for i in input.split('&')]}
print keys # {'num': '123-456-7890', 'kv2': '12', 'kv1': '1', 'kv3': '0'}
result = {}
for key, value in keys.items():
if key == 'kv1':
# if you need to increase result['kv1']
_value = result[key] + int(value) if key in result else int(value)
result[key] = _value
# if you need to set result['kv1']
result[key] = int(value)
print result # {'kv1': 1}
Assuming you have multiple lines with data like:
num=123-456-7890&kv1=2&kv2=12&kv3=0
num=123-456-7891&kv1=1&kv2=12&kv3=0
num=123-456-7892&kv1=4&kv2=12&kv3=0
Reading line-by-line in a file:
def get_key(data, key):
keys = {k: v for k, v in [i.split('=') for i in data.split('&')]}
for k, v in keys.items():
if k == key: return int(v)
return None
results = []
for line in [line.strip() for line in open('filename', 'r')]:
value = get_key(line, 'kv1')
if value:
results.append({'kv1': value})
print results # could be [{'kv1': 2}, {'kv1': 1}, {'kv1': 4}]
Or just one string:
with open('filename', 'r') as f: data = f.read()
keys = {k: v for k, v in [i.split('=') for i in data.split('&')]}
result = {}
for key, value in keys.items():
if key == 'kv1':
result[key] = int(value)
Console i/o:
c:\nathan\python\bnutils>python
Python 2.7.5 (default, May 15 2013, 22:44:16) [MSC v.1500 64 bit (AMD64)] on win32
Type "help", "copyright", "credits" or "license" for more information.
>>> def get_key(data, key):
... keys = {k: v for k, v in [i.split('=') for i in data.split('&')]}
... for k, v in keys.items():
... if k == key: return int(v)
... return None
...
>>> results = []
>>> for line in [line.strip() for line in open('test.txt', 'r')]:
... value = get_key(line, 'kv1')
... if value:
... results.append({'kv1': value})
...
>>> print results
[{'kv1': 2}, {'kv1': 1}, {'kv1': 4}]
>>>
test.txt:
num=123-456-7890&kv1=2&kv2=12&kv3=0
num=123-456-7891&kv1=1&kv2=12&kv3=0
num=123-456-7892&kv1=4&kv2=12&kv3=0
import urlparse
urlparse.parse_qs(input)
results in: {'num': ['123-456-7890'], 'kv2': ['12'], 'kv': ['1'], 'kv3': ['0']}
The keys are aggregated for you.
You could do it this way, so basically just add an extra if else block dealing with the empty case for key
input='num=123-456-7890&kv=1&kv2=12&kv3=0'
result={}
for pair in input.split('&'):
temp = pair.split('=')
key = temp[0]
value = [1]
if key in 'kv1':
if key in p:
print value //do you really want to output this?
result[key] += int(value)
else:
print value //do you really want to output this?
result[key] = int(value)
print result['kv1']