I receive data from the Loggly service in dot notation, but to put data back in, it must be in JSON.
Hence, I need to convert:
{'json.message.status.time':50, 'json.message.code.response':80, 'json.time':100}
Into:
{'message': {'code': {'response': 80}, 'status': {'time': 50}}, 'time': 100}
I have put together a function to do so, but I wonder if there is a more direct and simpler way to accomplish the same result.
def dot_to_json(a):
# Create root for JSON tree structure
resp = {}
for k,v in a.items():
# eliminate json. (if metric comes from another type, it will keep its root)
k = re.sub(r'\bjson.\b','',k)
if '.' in k:
# Field has a dot
r = resp
s = ''
k2 = k.split('.')
l = len(k2)
count = 0
t = {}
for f in k2:
count += 1
if f not in resp.keys():
r[f]={}
r = r[f]
if count < l:
s += "['" + f + "']"
else:
s = "resp%s" % s
t = eval(s)
# Assign value to the last branch
t[f] = v
else:
r2 = resp
if k not in resp.keys():
r2[k] = {}
r2[k] = v
return resp
You can turn the path into dictionary access with:
def dot_to_json(a):
output = {}
for key, value in a.iteritems():
path = key.split('.')
if path[0] == 'json':
path = path[1:]
target = reduce(lambda d, k: d.setdefault(k, {}), path[:-1], output)
target[path[-1]] = value
return output
This takes the key as a path, ignoring the first json part. With reduce() you can walk the elements of path (except for the last one) and fetch the nested dictionary with it.
Essentially you start at output and for each element in path fetch the value and use that value as the input for the next iteration. Here dict.setdefault() is used to default to a new empty dictionary each time a key doesn't yet exist. For a path ['foo', 'bar', 'baz'] this comes down to the call output.setdefault('foo', {}).setdefault('bar', {}).setdefault('baz', {}), only more compact and supporting arbitrary length paths.
The innermost dictionary is then used to set the value with the last element of the path as the key.
Demo:
>>> def dot_to_json(a):
... output = {}
... for key, value in a.iteritems():
... path = key.split('.')[1:] # ignore the json. prefix
... target = reduce(lambda d, k: d.setdefault(k, {}), path[:-1], output)
... target[path[-1]] = value
... return output
...
>>> dot_to_json({'json.message.status.time':50, 'json.message.code.response':80, 'json.time':100}))
{'message': {'status': {'time': 50}, 'code': {'response': 80}}, 'time': 100}
Related
I am extracting from the log file and print using the below code
for line in data:
g = re.findall(r'([\d.]+).*?(GET|POST|PUT|DELETE)', line)
print (g)
[('1.1.1.1', 'PUT')]
[('2.2.2.2', 'GET')]
[('1.1.1.1', 'PUT')]
[('2.2.2.2', 'POST')]
How to add to the output
output
1.1.1.1: PUT = 2
2.2.2.2: GET = 1,POST=1
You could use a dictionary to count:
# initialize the count dict
count_dict= dict()
for line in data:
g = re.findall(r'([\d.]+).*?(GET|POST|PUT|DELETE)', line)
for tup in g:
# get the counts for tuple tup if we don't have it yet
# use 0 (second argument to .get)
num= count_dict.get(tup, 0)
# increase the count and write it back
count_dict[tup]= num+1
# now iterate over the key (tuple) - value (counts)-pairs
# and print the result
for tup, count in count_dict.items():
print(tup, count)
Ok, I have to admit this doesn't give the exact output, you want, but from this you can do in a similar manner:
out_dict= dict()
for (comma_string, request_type), count in count_dict.items():
out_str= out_dict.get(comma_string, '')
sep='' if out_str == '' else ', '
out_str= f'{out_str}{sep}{request_type} = {count}'
out_dict[comma_string]= out_str
for tup, out_str in out_dict.items():
print(tup, out_str)
From your data that outputs:
1.1.1.1 PUT = 2
2.2.2.2 GET = 1, POST = 1
I would look towards Counter.
from collections import Counter
results = []
for line in data:
g = re.findall(r'([\d.]+).*?(GET|POST|PUT|DELETE)', line)
results.append(g[0])
ip_list = set(result[0] for result in results)
for ip in ip_list:
print(ip, Counter(result[1] for result in results if result[0] == ip ))
You can use collection.defaultdict
Ex:
from collections import defaultdict
result = defaultdict(list)
for line in data:
for ip, method in re.findall(r'([\d.]+).*?(GET|POST|PUT|DELETE)', line):
result[ip].append(method)
for k, v in result.items():
temp = ""
for i in set(v):
temp += " {} = {}".format(i, v.count(i))
print("{}{}".format(k, temp))
from collections import Counter
x = [[('1.1.1.1', 'PUT')],[('2.2.2.2', 'GET')],[('1.1.1.1', 'PUT')],[('2.2.2.2', 'POST')]]
# step 1: convert x into a dict.
m = {}
for i in x:
a, b = i[0]
if a not in m.keys():
m[a] = [b]
else:
x = m[a]
x.append(b)
m[a] = x
print('new dict is {}'.format(m))
# step 2 count frequency
m_values = list(m.values())
yy = []
for i in m_values:
x = []
k = list(Counter(i).keys())
v = list(Counter(i).values())
for i in range(len(k)):
x.append(k[i] + '=' + str(v[i]))
yy.append(x)
# step 3, update the value of the dict
m_keys = list(m.keys())
n = len(m_keys)
for i in range(n):
m[m_keys[i]] = yy[i]
print("final dict is{}".format(m))
Output is
new dict is {'1.1.1.1': ['PUT', 'PUT'], '2.2.2.2': ['GET', 'POST']}
final dict is{'1.1.1.1': ['PUT=2'], '2.2.2.2': ['GET=1', 'POST=1']}
Without dependencies and using a dict for counting, in a very basic way. Given the data_set:
data_set = [[('1.1.1.1', 'PUT')],
[('2.2.2.2', 'GET')],
[('2.2.2.2', 'POST')],
[('1.1.1.1', 'PUT')]]
Initialize the variables (manually, just few verbs) then iterate over the data:
counter = {'PUT': 0, 'GET': 0, 'POST': 0, 'DELETE': 0}
res = {}
for data in data_set:
ip, verb = data[0]
if not ip in res:
res[ip] = counter
else:
res[ip][verb] += 1
print(res)
#=> {'1.1.1.1': {'PUT': 1, 'GET': 0, 'POST': 1, 'DELETE': 0}, '2.2.2.2': {'PUT': 1, 'GET': 0, 'POST': 1, 'DELETE': 0}}
It's required to format the output to better fits your needs.
In my form post request I'm grabbing all form info, but it seems as though there are some empty keys and values. I'd like to remove all instances of empty keys and values. This what I have so far, and it's obviously not working.
post_dict = dict(request.POST)
item_data = {}
for key, value in post_dict.items():
if value is None:
del post_dict[key]
field = key.split('[')[1].replace(']', '')
item_data[field] = ''.join(value)
print(item_data)
What the print item_data looks like:
{'': '', 'username': 'johndoe', 'email': 'johndoe#gmail.com', ...
If you delete the key, will it delete its respective value? How can I get rid of empty keys and values?
Try this:
new_item_data={k:item_data[k] for k in item_data if item_data[k]}
Any keys that do not have values will be removed.
Maybe you can do what you want using one of these:
dict_1 = {'': '', 'username': 'johndoe', 'email':'', }
dict_2 = dict(x for x in dict_1.iteritems() if any(x))
print dict_2 # {'username': 'johndoe', 'email': ''}
dict_3 = dict(x for x in dict_1.iteritems() if all(x))
print dict_3 # {'username': 'johndoe'}
for key, value in post_dict.items():
In your code you are iterating on post_dict.
However, in the line del post_dict[key] you are modifying the iterator, so it will provide an inconsistent view of the dictionary to for. It is not good to add or delete keys to the dictionary that you are iterating on.
This may give the result you wanted
post_dict = dict(request.POST)
item_data = {}
for key, value in post_dict.items():
if value == "":
continue
if key == "":
continue
field = key.split('[')[1].replace(']', '')
item_data[field] = ''.join(value)
print(item_data)
Try this,
post_dict = {'': '', 'item_data[username]': ['johndoe'], 'item_data[email]': ['johndoe#gmail.com'], 'item_data[campus]': ['madison']}
item_data = {}
for key, value in post_dict.items():
strlist = key.split('[')
if len(strlist) == 1:
continue
new_key = strlist[1].replace(']', '')
new_value = ''.join(value)
# add to the dict if both new_key and new_value are non-empty
if all([new_key, new_value]):
item_data[new_key] = new_value
print(item_data)
# Output
{'username': 'johndoe', 'campus': 'madison', 'email': 'johndoe#gmail.com'}
Previous answer: Delete those items from a dict whose key or value is empty.
d = {'': '', 'username': 'johndoe', 'email': 'johndoe#gmail.com'}
for k, v in d.items():
if not any([k, v]):
del d[k]
print(d)
{'username': 'johndoe', 'email': 'johndoe#gmail.com'}
I am trying to create a nested dictionary from a mysql query but I am getting a key error
result = {}
for i, q in enumerate(query):
result['data'][i]['firstName'] = q.first_name
result['data'][i]['lastName'] = q.last_name
result['data'][i]['email'] = q.email
error
KeyError: 'data'
desired result
result = {
'data': {
0: {'firstName': ''...}
1: {'firstName': ''...}
2: {'firstName': ''...}
}
}
You wanted to create a nested dictionary
result = {} will create an assignment for a flat dictionary, whose items can have any values like "string", "int", "list" or "dict"
For this flat assignment
python knows what to do for result["first"]
If you want "first" also to be another dictionary you need to tell Python by an assingment
result['first'] = {}.
otherwise, Python raises "KeyError"
I think you are looking for this :)
>>> from collections import defaultdict
>>> mydict = lambda: defaultdict(mydict)
>>> result = mydict()
>>> result['Python']['rules']['the world'] = "Yes I Agree"
>>> result['Python']['rules']['the world']
'Yes I Agree'
result = {}
result['data'] = {}
for i, q in enumerate(query):
result['data']['i'] = {}
result['data'][i]['firstName'] = q.first_name
result['data'][i]['lastName'] = q.last_name
result['data'][i]['email'] = q.email
Alternatively, you can use you own class which adds the extra dicts automatically
class AutoDict(dict):
def __missing__(self, k):
self[k] = AutoDict()
return self[k]
result = AutoDict()
for i, q in enumerate(query):
result['data'][i]['firstName'] = q.first_name
result['data'][i]['lastName'] = q.last_name
result['data'][i]['email'] = q.email
result['data'] does exist. So you cannot add data to it.
Try this out at the start:
result = {'data': []};
You have to create the key data first:
result = {}
result['data'] = {}
for i, q in enumerate(query):
result['data'][i] = {}
result['data'][i]['firstName'] = q.first_name
result['data'][i]['lastName'] = q.last_name
result['data'][i]['email'] = q.email
I'm attempting to find out how many "usernames" exist. Currently there are two, and I can loop over users to get this, but that feels clunky. Is there a way to get how many usernames exist in user?
open('file.yaml', 'r') as f:
file = yaml.safe_load(f)
# count number of usernames in user...?
file.yaml:
host: "example.com"
timeout: 60
work:
-
processes: 1
users:
-
username: "me"
-
username: "notme"
If you want to get counts from your specific structure:
sum([len(x["users"]) for x in d["work"]])
For a general solution, you could do something like:
f = open("test.yaml")
d = yaml.safe_load(f)
# d is now a dict - {'host': 'example.com', 'work': [{'processes': 1, 'users': [{'username': 'me'}, {'username': 'notme'}]}], 'timeout': 60}
def yaml_count(d, s):
c = 0
if isinstance(d, dict):
for k, v in d.iteritems():
if k == s: c += 1
c += yaml_count(v, s)
elif isinstance(d, list):
for l in d:
c += yaml_count(l, s)
return c
yaml_count(d, "username") # returns 2
I need a defaultdict that can do get the finaldict given a list of query words from the first file.
The final dict is a dictionary of a pair of words from both files that shares the same ID. e.g. foo, oof shares the same 1243 and 1453 ID. It is to facilitate word-pair search later, when i try to search ('foo','oof'), it will return ['1243','1453'].
If i search the finaldict for ('foo','duh'), it will return nothing as the wordpair don't share any same ID.
query = ['foo','barbar']
finaldict = defaultdict(list)
finaldict = {('foo','oof'):['1243','1453']
('foo','rabrab'):['2323']
('barbar','duh'):['6452']}
I've been doing it as below but is there a simpler way of achieving the finaldict?
query = ['foo','barbar']
from collections import defaultdict
dict1 = defaultdict(list)
dict2 = defaultdict(list)
dict1['foo'] = ['1234','1453','2323'];
dict1['bar'] =['5230']; dict1['barbar'] =['6452']
dict2['1243']=['oof']
dict2['1453']=['oof']
dict2['4239']=['rba']
dict2['2323']=['rabrab']
dict2['6452']=['duh']
tt = defaultdict(defaultdict)
for p in sorted(query):
for ss in sorted(dict1[p]):
if len(dict2[ss]) != 0 and dict2[ss] != None:
tt[p][ss] = dict2[ss]
finaldict = defaultdict(set)
for src in tt:
for ss in tt[src]:
for trg in tt[src][ss]:
finaldict[(src, trg)].add(ss)
print finaldict[('foo','oof')]
The above code outputs:
>>> print finaldict[('foo','oof')]
set(['1453'])
>>> for i in finaldict:
... print i, finaldict[i]
...
('foo', 'rabrab') set(['2323'])
('barbar', 'duh') set(['6452'])
('foo', 'oof') set(['1453'])
{(k1,v):k2 for k1 in dict1 for k2 in dict2
for v in dict2[k2] if k2 in dict1[k1]}
{('barbar', 'duh'): '6452', ('foo', 'oof'): '1453', ('foo', 'rabrab'): '2323'}