This question already has answers here:
Recursive function does not return specified value
(2 answers)
Closed 1 year ago.
I have the orders of a platform in a csv file and I want to organize it to put in a database (probably mongodb). So the final data structure may be:
{'user1':
{'meals': {'hamburger': 2}, {'pizza': 1},
{'weekdays': {'monday': 1}},{'tuesday':1}, {'friday': 1}
}
So, I am trying to do a very simple code that organize the number of ocurrencies, but before I want to structure the dictionary keys dinamically. The data is in a csv file
import csv
import pprint
def addKeysNestedDict(dictionary, keys):
# print(dictionary)
if len(keys) > 1:
if keys[0] not in dictionary.keys():
dictionary[keys[0]] = {'meals': '', 'weekdays': ''}
print('inside addKeys IF: ')
print(dictionary)
addKeysNestedDict(dictionary, keys[1:])
else:
dictionary[keys[0]] = {'meals': '', 'weekdays': ''}
print('inside addKeys: ')
print(dictionary) # PRINTS EXPECTED VALUES
return dictionary
def organize_orders(file):
pp = pprint.PrettyPrinter(indent=4)
dict_read = csv.DictReader(file, fieldnames=['name', 'food', 'day'])
list_dict = list(dict_read)
set_names = set()
set_food = set()
set_day = set()
# people = {} # DOESN'T MATTER IF IT IS DECLARED BOFORE
for n in list_dict:
set_names.add(n['name'])
set_food.add(n['food'])
set_day.add(n['day'])
print(addKeysNestedDict({}, list(set_names))) # PRINTS NONE
people = (addKeysNestedDict({}, list(set_names)))
print('people: ')
print(people) # PRINTS NONE
# for s_n in set_names:
# for s_f in set_food:
# for s_d in set_day:
# people[s_n][s_f] = list_dict.count(s_f)
# people[s_n][s_d] = list_dict.count(s_d)
def analyze_log(path_to_file):
with open(path_to_file) as csvfile:
return(organize_orders(csvfile))
analyze_log('some csv file with path')
I culd not figure out why I get none returned from addKeysNestedDict() method, since it prints exactly what I want one line before method return
Change the function to:
def addKeysNestedDict(dictionary, keys):
# print(dictionary)
if len(keys) > 1:
if keys[0] not in dictionary.keys():
dictionary[keys[0]] = {'meals': '', 'weekdays': ''}
print('inside addKeys IF: ')
print(dictionary)
addKeysNestedDict(dictionary, keys[1:])
else:
dictionary[keys[0]] = {'meals': '', 'weekdays': ''}
print('inside addKeys: ')
print(dictionary) # PRINTS EXPECTED VALUES
return dictionary
Notice that I moved the return dictionary statement out of the specific branch, which causes both of your branches to return the dictionary you're building in your function.
Related
This is my code:
I'm trying to use the following code to insert data into an array of dictionaries but unable to insert properly.
Code:
test_list = {'module_serial-1': 'PSUXA12345680', 'module_name-1': 'CH1.FM5', 'module_name-2': 'CH1.FM6', 'module_serial-2': 'PSUXA12345681'}
def parse_subdevice_modules(row):
modules = []
module = {}
for k, v in row.items():
if v:
if re.match("module_name", k):
module['name'] = v
if re.match("module_serial", k):
module['serial'] = v
modules.append(module)
module = {}
return modules
print(parse_subdevice_modules(test_list))
Expected output:
[{'name':'CH1.FM5', serial': 'PSUXA12345680'}, {'name': 'CH1.FM6', 'serial': 'PSUXA12345681'}]
Actual output:
['serial': 'PSUXA12345680'}, {'name': 'CH1.FM6', 'serial': 'PSUXA12345681'}]
Run it here: https://repl.it/repls/WetSteelblueRange
Please note that the order of the data test_list cannot be altered as it comes via an external API so I used regex. Any ideas would be appreciated.
Your code relies on the wrong assumption that keys are ordered and that the serial will always follow the name. The proper solution here is to use a dict (actually a collections.defaultdict to make things easier) to collect and regroup the values you're interested in based on the module number (the final '-N' in the key). Note that you don't need regexps here - Python string already provide the necessary operations for this task:
from collections import defaultdict
def parse_subdevice_modules(row):
modules = defaultdict(dict)
for k, v in row.items():
# first get rid of what we're not interested in
if not v:
continue
if not k.startswith("module_"):
continue
# retrieve the key number (last char) with
# negative string indexing:
key_num = k[-1]
# retrieve the useful part of the key ("name" or "serial")
# by splitting the string:
key_name = k.split("_")[1].split("-")[0]
# and now we just have to store this in our defaultdict
modules[key_num][key_name] = v
# and return only the values.
# NB: in py2.x you don't need the call to `list`,
# you can just return `modules.values()` directly
modules = list(modules.values())
return modules
test_list = {
'profile': '', 'chassis_name': '123', 'supplier_order_num': '',
'device_type': 'mass_storage', 'device_subtype': 'flashblade',
'module_serial-1': 'PSUXA12345680', 'module_name-1': 'CH1.FM5',
'module_name-2': 'CH1.FM6', 'rack_total_pos': '',
'asset_tag': '002000027493', 'module_serial-2': 'PSUXA12345681',
'purchase_order': '0004530869', 'build': 'Test_Build_for_SNOW',
'po_line_num': '00190', 'mac_address': '', 'position': '7',
'model': 'FB-528TB-10X52.8TB', 'manufacturer': 'PureStorage',
'rack': 'Test_Rack_2', 'serial': 'PMPAM1842147D', 'name': 'FB02'
}
print(parse_subdevice_modules(test_list))
You can do somthing like this also.
test_list = {'module_serial-1': 'PSUXA12345680', 'module_name-1': 'CH1.FM5', 'module_name-2': 'CH1.FM6',
'module_serial-2': 'PSUXA12345681'}
def parse_subdevice_modules(row):
modules_list = []
for key, value in row.items():
if not value or key.startswith('module_name'):
continue
if key.startswith('module_serial'):
module_name_key = f'module_name-{key.split("-")[-1]}'
modules_list.append({'serial': value, 'name': row[module_name_key]})
return modules_list
print(parse_subdevice_modules(test_list))
Output:
[{'serial': 'PSUXA12345680', 'name': 'CH1.FM5'}, {'serial': 'PSUXA12345681', 'name': 'CH1.FM6'}]
You would need to check if module contains 2 elements and append it to modules:
test_list = {'module_serial-1': 'PSUXA12345680', 'module_name-1': 'CH1.FM5', 'module_name-2': 'CH1.FM6', 'module_serial-2': 'PSUXA12345681'}
def parse_subdevice_modules(row):
modules = []
module = {}
for k, v in row.items():
if v:
if k.startswith('module_name'):
module['name'] = v
elif k.startswith("module_serial"):
module['serial'] = v
if len(module) == 2:
modules.append(module)
module = {}
return modules
print(parse_subdevice_modules(test_list))
Returns:
[{'serial': 'PSUXA12345680'}, {'name': 'CH1.FM5'}, {'name': 'CH1.FM6'}, {'serial': 'PSUXA12345681'}]
I'm trying to parse the item names and it's corresponding values from the below snippet. dt tag holds names and dd containing values. There are few dt tags which do not have corresponding values. So, all the names do not have values. What I wish to do is keep the values blank against any name if the latter doesn't have any values.
These are the elements I would like to scrape data from:
content="""
<div class="movie_middle">
<dl>
<dt>Genres:</dt>
<dt>Resolution:</dt>
<dd>1920*1080</dd>
<dt>Size:</dt>
<dd>1.60G</dd>
<dt>Quality:</dt>
<dd>1080p</dd>
<dt>Frame Rate:</dt>
<dd>23.976 fps</dd>
<dt>Language:</dt>
</dl>
</div>
"""
I've tried like below:
soup = BeautifulSoup(content,"lxml")
title = [item.text for item in soup.select(".movie_middle dt")]
result = [item.text for item in soup.select(".movie_middle dd")]
vault = dict(zip(title,result))
print(vault)
It gives me messy results (wrong pairs):
{'Genres:': '1920*1080', 'Resolution:': '1.60G', 'Size:': '1080p', 'Quality:': '23.976 fps'}
My expected result:
{'Genres:': '', 'Resolution:': '1920*1080', 'Size:': '1.60G', 'Quality:': '1080p','Frame Rate:':'23.976 fps','Language:':''}
Any help on fixing the issue will be highly appreciated.
You can loop through the elements inside dl. If the current element is dt and the next element is dd, then store the value as the next element, else set the value as empty string.
dl = soup.select('.movie_middle dl')[0]
elems = dl.find_all() # Returns the list of dt and dd
data = {}
for i, el in enumerate(elems):
if el.name == 'dt':
key = el.text.replace(':', '')
# check if the next element is a `dd`
if i < len(elems) - 1 and elems[i+1].name == 'dd':
data[key] = elems[i+1].text
else:
data[key] = ''
You can use BeautifulSoup to parse the dl structure, and then write a function to create the dictionary:
from bs4 import BeautifulSoup as soup
import re
def parse_result(d):
while d:
a, *_d = d
if _d:
if re.findall('\<dt', a) and re.findall('\<dd', _d[0]):
yield [a[4:-5], _d[0][4:-5]]
d = _d[1:]
else:
yield [a[4:-5], '']
d = _d
else:
yield [a[4:-5], '']
d = []
print(dict(parse_result(list(filter(None, str(soup(content, 'html.parser').find('dl')).split('\n')))[1:-1])))
Output:
{'Genres:': '', 'Resolution:': '1920*1080', 'Size:': '1.60G', 'Quality:': '1080p', 'Frame Rate:': '23.976 fps', 'Language:': ''}
For a slightly longer, although cleaner solution, you can create a decorator to strip the HTML tags of the output, thus removing the need for the extra string slicing in the main parse_result function:
def strip_tags(f):
def wrapper(data):
return {a[4:-5]:b[4:-5] for a, b in f(data)}
return wrapper
#strip_tags
def parse_result(d):
while d:
a, *_d = d
if _d:
if re.findall('\<dt', a) and re.findall('\<dd', _d[0]):
yield [a, _d[0]]
d = _d[1:]
else:
yield [a, '']
d = _d
else:
yield [a, '']
d = []
print(parse_result(list(filter(None, str(soup(content, 'html.parser').find('dl')).split('\n')))[1:-1]))
Output:
{'Genres:': '', 'Resolution:': '1920*1080', 'Size:': '1.60G', 'Quality:': '1080p', 'Frame Rate:': '23.976 fps', 'Language:': ''}
from collections import defaultdict
test = soup.text.split('\n')
d = defaultdict(list)
for i in range(len(test)):
if (':' in test[i]) and (':' not in test[i+1]):
d[test[i]] = test[i+1]
elif ':' in test[i]:
d[test[i]] = ''
d
defaultdict(list,
{'Frame Rate:': '23.976 fps',
'Genres:': '',
'Language:': '',
'Quality:': '1080p',
'Resolution:': '1920*1080',
'Size:': '1.60G'})
The logic here is that you know that every key will have a colon. Knowing this, you can write an if else statement to capture the unique combinations, whether that is key followed by key or key followed by value
Edit:
In case you wanted to clean your keys, below replaces the : in each one:
d1 = { x.replace(':', ''): d[x] for x in d.keys() }
d1
{'Frame Rate': '23.976 fps',
'Genres': '',
'Language': '',
'Quality': '1080p',
'Resolution': '1920*1080',
'Size': '1.60G'}
The problem is that empty elements are not present. Since there is no hierarchy between the <dt> and the <dd>, I'm afraid you'll have to craft the dictionary yourself.
vault = {}
category = ""
for item in soup.find("dl").findChildren():
if item.name == "dt":
if category == "":
category = item.text
else:
vault[category] = ""
category = ""
elif item.name == "dd":
vault[category] = item.text
category = ""
Basically this code iterates over the child elements of the <dl> and fills the vault dictionary with the values.
i use this code to store items of dictionaries in doc variable.
This code works fine but I miss the first element of time because of the if statement.
def convert(old):
time_key = 'Time '
# Save the time
time_item = (time_key, old[time_key])
# Add remove it
del old[time_key]
# Copy remaining items to new dicts and save them in a list
return [dict([time_item, item]) for item in old.items()]
row = {
'Time ': '2017-12-01T13:54:04',
'Energy [kWh]': '0.01',
'Voltage [V]': '221.64',
'Current [A]': '0.08',
}
new_data = convert(row)
#print(new_data)
Zeitvalue= ""
Device=""
Value=""
for d in new_data:
#print(d)
for key, value in d.items():
if key == 'Time ':
Zeitvalue = value
#print(value)
continue
else:
Device = key
Value = value
doc = {'Time ':Zeitvalue,'Device':Device, 'Measure':Value}
print("This is doc variable:",doc) # doc vaiable with missed time element
SO when i print doc i got this
Output:
doc: {'Device': 'Voltage [V]', 'Measure': '221.64', 'Time ': ''} # **ISSUE: variable time is missed here, How to fix it ?**
doc: {'Device': 'Current [A]', 'Measure': '0.08', 'Time ': '2017-12-01T13:54:04'}
doc: {'Device': 'Energy [kWh]', 'Measure': '0.01', 'Time ': '2017-12-01T13:54:04'}
See the below changes in the code. Remove continue statement. Also assign value to doc after the inner loop for dictionary is over as you need all three values.
for d in new_data:
#print(d)
for key, value in d.items():
if key == 'Time ':
Zeitvalue = value
#print(value)
else:
Device = key
Value = value
doc = {'Time ':Zeitvalue,'Device':Device, 'Measure':Value}
print(doc)
if you are just setting values then place the doc assignment outside the for loop
for d in new_data:
for key, value in d.items():
if key == 'Time ':
Zeitvalue = value
continue
else:
Device = key
Value = value
doc = {'Time ':Zeitvalue,'Device':Device, 'Measure':Value}
you have problem in this line:
doc = {'Time ':Zeitvalue,'Device':Device, 'Measure':Value} when you use it inside the for loop! , each iteration overrides the previous assignment, furthermore - you cause unexpected behavior , since dictionary is not order data structure - meaning : if you encountered "tine" key first - it will work fine , but if you did not encountered 'time' first - the value of it is still == "" , since you initiate it to that value and you did not updated it since.
move the doc = {'Time ':Zeitvalue,'Device':Device, 'Measure':Value} to the outer loop , and not the one going over each key and value and you will be fine.
please help
value = 'http://localhost:8001/issues/load?project_name=react&since=2016-03-24&until=2017-03-25&state=closed&sort=created&direction=asc&per_page=100&labels=Type:%20Bug'
hashing = hash(value)
words = value.split('&')
for data in words:
words2 = data.split('=')
print(words2)
Since words2 has each split into two like:
['http://localhost:8001/issues/load?project_name', 'react']
['since', '2016-03-24']
['until', '2017-03-25']
Use that to add values to a dictionary:
>>> key_vals = {}
>>> for data in words:
... words2 = data.split('=')
... key_vals[words2[0]] = words2[1]
...
>>> pprint.pprint(key_vals)
{'direction': 'asc',
'http://localhost:8001/issues/load?project_name': 'react',
'labels': 'Type:%20Bug',
'per_page': '100',
'since': '2016-03-24',
'sort': 'created',
'state': 'closed',
'until': '2017-03-25'}
And the assignment to key_vals can be reduced to:
key_vals = {key: val for (key, val) in [data.split('=') for data in words]}
I'm struggling with a recursive merge problem.
Let's say I have:
a=[{'name':"bob",
'age':10,
'email':"bob#bla",
'profile':{'id':1, 'role':"admin"}},
{'name':"bob",
'age':10,
'email':"other mail",
'profile':{'id':2, 'role':"dba"},
'home':"/home/bob"
}]
and I need something to recursively merge entries. If value for an existing given key on the same level is different it appends the value to an array.
b = merge(a)
print b
{'name':"bob",
'age':10,
'email':["bob#bla","other mail"],
'profile':{'id':[1,2], 'role'=["admin", "dba"], 'home':"/home/bob"}
I wrote this code:
def merge(items):
merged = {}
for item in items:
for key in item.keys():
if key in merged.keys():
if item[key] != merged[key]:
if not isinstance(merged[key], list):
merged[key] = [merged[key]]
if item[key] not in merged[key]:
merged[key].append(item[key])
else:
merged[key] = item[key]
return merged
The output is:
{'age': 10,
'email': ['bob#bla', 'other mail'],
'home': '/home/bob',
'name': 'bob',
'profile': [{'id': 1, 'role': 'admin'}, {'id': 2, 'role': 'dba'}]}
Which is not what I want.
I can't figure out how to deal with recursion.
Thanks :)
As you iterate over each dictionary in the arguments, then each key and value in each dictionary, you want the following rules:
If there is nothing against that key in the output, add the new key and value to the output;
If there is a value for that key, and it's the same as the new value, do nothing;
If there is a value for that key, and it's a list, append the new value to the list;
If there is a value for that key, and it's a dictionary, recursively merge the new value with the existing dictionary;
If there is a value for that key, and it's neither a list nor a dictionary, make the value in the output a list of the current value and the new value.
In code:
def merge(*dicts):
"""Recursively merge the argument dictionaries."""
out = {}
for dct in dicts:
for key, val in dct.items():
try:
out[key].append(val) # 3.
except AttributeError:
if out[key] == val:
pass # 2.
elif isinstance(out[key], dict):
out[key] = merge(out[key], val) # 4.
else:
out[key] = [out[key], val] # 5.
except KeyError:
out[key] = val # 1.
return out
In use:
>>> import pprint
>>> pprint.pprint(merge(*a))
{'age': 10,
'email': ['bob#bla', 'other mail'],
'home': '/home/bob',
'name': 'bob',
'profile': {'id': [1, 2], 'role': ['admin', 'dba']}}