Related
I want to iterate through a dictionary array like the following to only copy the 'symbol' and 'product_progress' keys and their corresponding values to new dictionary array.
[{'coin_name': 'Bitcoin', 'coin_id': 'bitcoin', 'symbol': 'btc', 'rank': 1, 'product_progress': 93, 'team': 100, 'token_fundamentals': 100, 'github_activity': 95, 'marketing': 5, 'partnership': 5, 'uniqueness': 5, 'total_score': 96, 'exchange_name': 'Bitfinex', 'exchange_link': 'https://www.bitfinex.com/t/BTCUSD', 'website': 'https://bitcoin.org/en/', 'twitter': 'https://twitter.com/Bitcoin', 'telegram': None, 'whitepaper': 'https://bitcoin.org/en/bitcoin-paper'}, {'coin_name': 'Ethereum', 'coin_id': 'ethereum', 'symbol': 'eth', 'rank': 2, 'product_progress': 87, 'team': 98, 'token_fundamentals': 97, 'github_activity': 100, 'marketing': 5, 'partnership': 5, 'uniqueness': 5, 'total_score': 94, 'exchange_name': 'Gemini', 'exchange_link': 'https://gemini.com/', 'website': 'https://www.ethereum.org/', 'twitter': 'https://twitter.com/ethereum', 'telegram': None, 'whitepaper': 'https://ethereum.org/en/whitepaper/'}] ...
The code I have so far is:
# need to iterate through list of dictionaries
for index in range(len(projectlist3)):
for key in projectlist3[index]:
d['symbol'] = projectlist3[index]['symbol']
d['token_fundamentals'] = projectlist3[index]['token_fundamentals']
print(d)
It's just saving the last entry rather than all of the entries {'symbol': 'eth', 'token_fundamentals': 97}
Given your data:
l = [{
'coin_name': 'Bitcoin',
'coin_id': 'bitcoin',
'symbol': 'btc',
'rank': 1,
'product_progress': 93,
'team': 100,
'token_fundamentals': 100,
'github_activity': 95,
'marketing': 5,
'partnership': 5,
'uniqueness': 5,
'total_score': 96,
'exchange_name': 'Bitfinex',
'exchange_link': 'https://www.bitfinex.com/t/BTCUSD',
'website': 'https://bitcoin.org/en/',
'twitter': 'https://twitter.com/Bitcoin',
'telegram': None,
'whitepaper': 'https://bitcoin.org/en/bitcoin-paper'
}, {
'coin_name': 'Ethereum',
'coin_id': 'ethereum',
'symbol': 'eth',
'rank': 2,
'product_progress': 87,
'team': 98,
'token_fundamentals': 97,
'github_activity': 100,
'marketing': 5,
'partnership': 5,
'uniqueness': 5,
'total_score': 94,
'exchange_name': 'Gemini',
'exchange_link': 'https://gemini.com/',
'website': 'https://www.ethereum.org/',
'twitter': 'https://twitter.com/ethereum',
'telegram': None,
'whitepaper': 'https://ethereum.org/en/whitepaper/'
}]
You can use listcomp
new_l = [{field: d[field] for field in ['symbol', 'token_fundamentals']}
for d in l]
which is better equivalent of this:
new_l = []
for d in l:
new_d = {}
for field in ['symbol', 'token_fundamentals']:
new_d[field] = d[field]
new_l.append(new_d)
Judging by what your writing into d you want to save a list of objects so this would work:
[{"symbol": i['symbol'], "token_fundamentals": i['token_fundamentals']} for i in d]
Result:
[{'symbol': 'btc', 'token_fundamentals': 100}, {'symbol': 'eth', 'token_fundamentals': 97}]
I need your expertise to easy the nested dictionary formatting. I have list of input signals which need to be grouped on the u_id and on timestamp field based on minute precision and convert to respective output format. I have posted the formatting i have tried. I need to easily format and process it as fast as possible, because time complexity is involved. help highly appreciated.
Code snippet
final_output = []
sorted_signals = sorted(signals, key=lambda x: (x['u_id'], str(x['start_ts'])[0:8]))
data = itertools.groupby(sorted_signals, key=lambda x: (x['u_id'], calendar.timegm(time.strptime(datetime.utcfromtimestamp(x['start_ts']).strftime('%Y-%m-%d-%H:%M'),'%Y-%m-%d-%H:%M'))))
def format_signals(v):
result =[]
for i in v:
temp_dict = {}
temp_dict.update({'timestamp_utc': i['start_ts']})
for data in i['sign']:
temp_dict.update({data['name'].split('.')[0]: data['val']})
result.append(temp_dict)
return result
for k, v in data:
output_format = {'ui_id': k[0], 'minute_utc': datetime.fromtimestamp(int(k[1])), 'data': format_signals(v),
'processing_timestamp_utc': datetime.strptime(datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),"%Y-%m-%d %H:%M:%S")}
final_output.append(output_format)
print(final_output)
Input
signals = [
{'c_id': '1234', 'u_id': 288, 'f_id': 331,
'sign': [{'name': 'speed', 'val': 9},
{'name': 'pwr', 'val': 1415}], 'start_ts': 1598440244,
'crt_ts': 1598440349, 'map_crt_ts': 1598440351, 'ca_id': 'AT123', 'c_n': 'demo',
'msg_cnt': 2, 'window': 'na', 'type': 'na'},
{'c_id': '1234', 'u_id': 288, 'f_id': 331,
'sign': [{'name': 'speed', 'val': 10},
{'name': 'pwr', 'val': 1416}], 'start_ts': 1598440243,
'crt_ts': 1598440349, 'map_crt_ts': 1598440351, 'ca_id': 'AT123', 'c_n': 'demo',
'msg_cnt': 2, 'window': 'na', 'type': 'na'},
{'c_id': '1234', 'u_id': 287, 'f_id': 331,
'sign': [{'name': 'speed', 'val': 10},
{'name': 'pwr', 'val': 1417}], 'start_ts': 1598440344,
'crt_ts': 1598440349, 'map_crt_ts': 1598440351, 'ca_id': 'AT123', 'c_n': 'demo',
'msg_cnt': 2, 'window': 'na', 'type': 'na'},
{'c_id': '1234', 'u_id': 288, 'f_id': 331,
'sign': [{'name': 'speed.', 'val': 8.2},
{'name': 'pwr', 'val': 925}], 'start_ts': 1598440345,
'crt_ts': 1598440349, 'map_crt_ts': 1598440351, 'ca_id': 'AT172', 'c_n': 'demo',
'msg_cnt': 2, 'window': 'na', 'type': 'na'}
]
Current output
[{
'ui_id': 287,
'minute_utc': datetime.datetime(2020, 8, 26, 16, 42),
'data': [{
'timestamp_utc': 1598440344,
'speed': 10,
'pwr': 1417
}],
'processing_timestamp_utc': datetime.datetime(2020, 8, 29, 19, 35, 46)
}, {
'ui_id': 288,
'minute_utc': datetime.datetime(2020, 8, 26, 16, 40),
'data': [{
'timestamp_utc': 1598440244,
'speed': 9,
'pwr': 1415
}, {
'timestamp_utc': 1598440243,
'speed': 10,
'pwr': 1416
}],
'processing_timestamp_utc': datetime.datetime(2020, 8, 29, 19, 35, 46)
}, {
'ui_id': 288,
'minute_utc': datetime.datetime(2020, 8, 26, 16, 42),
'data': [{
'timestamp_utc': 1598440345,
'speed': 8.2,
'pwr': 925
}],
'processing_timestamp_utc': datetime.datetime(2020, 8, 29, 19, 35, 46)
}]
Required Output
[{
'ui_id': 287,
'f_id': 311,
'c_id': 1234,
'minute_utc': datetime.datetime(2020, 8, 26, 16, 42),
'data': [{
'timestamp_utc': 1598440344,
'speed': 10,
'pwr': 1417
}],
'processing_timestamp_utc': datetime.datetime(2020, 8, 29, 19, 35, 46)
}, {
'ui_id': 288,
'f_id': 311,
'c_id': 1234,
'minute_utc': datetime.datetime(2020, 8, 26, 16, 40),
'data': [{
'timestamp_utc': 1598440244,
'speed': 9,
'pwr': 1415
}, {
'timestamp_utc': 1598440243,
'speed': 10,
'pwr': 1416
}],
'processing_timestamp_utc': datetime.datetime(2020, 8, 29, 19, 35, 46)
}, {
'ui_id': 288,
'f_id': 311,
'c_id': 1234,
'minute_utc': datetime.datetime(2020, 8, 26, 16, 42),
'data': [{
'timestamp_utc': 1598440345,
'speed': 8.2,
'pwr': 925
}],
'processing_timestamp_utc': datetime.datetime(2020, 8, 29, 19, 35, 46)
}]
So, let's define simple function which will extract from each object keys which required for grouping:
def extract(obj):
return obj['u_id'], obj['f_id'], obj['c_id'], obj['start_ts'] // 60 * 60
Note: to implement "minutes precision" I've divided timestamp to 60 to cut seconds and multiply to 60 to get valid timestamp back.
Then let's group objects and form final list:
from itertools import groupby
from datetime import datetime
...
final_output = []
for (uid, fid, cid, ts), ss in groupby(sorted(signals, key=extract), extract):
obj = {
'ui_id': uid,
'f_id': fid,
'c_id': int(cid),
'minute_utc': datetime.utcfromtimestamp(ts),
'data': [],
'processing_timestamp_utc': datetime.utcnow()
}
for s in ss:
obj['data'].append({
'timestamp_utc': s['start_ts'],
**{i['name']: i['val'] for i in s['sign']}
})
final_output.append(obj)
To print final_output in readable form we could use pprint:
from pprint import pprint
...
pprint(final_output, sort_dicts=False)
Maybe this helps you to write the code in a more straightforward way. If you can just go through the signals and organize them in one loop, maybe you don't need the sort and groupby which may be heavier.
As you want to gather the signals based on the u_id, a dictionary is handy to get a single entry per u_id. This does that much, you just need to add creating the output based on this organized dict of signals:
organized = {}
for s in signals:
u_id = s['u_id']
entry = organized.get(u_id, None)
if entry is None:
entry = []
organized[u_id] = entry
entry.append(s)
pprint.pprint(organized)
Is executable there, and output pasted below, https://repl.it/repls/ShallowQuintessentialInteger
{287: [{'c_id': '1234',
'c_n': 'demo',
'ca_id': 'AT123',
'crt_ts': 1598440349,
'f_id': 331,
'map_crt_ts': 1598440351,
'msg_cnt': 2,
'sign': [{'name': 'speed', 'val': 10}, {'name': 'pwr', 'val': 1417}],
'start_ts': 1598440344,
'type': 'na',
'u_id': 287,
'window': 'na'}],
288: [{'c_id': '1234',
'c_n': 'demo',
'ca_id': 'AT123',
'crt_ts': 1598440349,
'f_id': 331,
'map_crt_ts': 1598440351,
'msg_cnt': 2,
'sign': [{'name': 'speed', 'val': 9}, {'name': 'pwr', 'val': 1415}],
'start_ts': 1598440244,
'type': 'na',
'u_id': 288,
'window': 'na'},
{'c_id': '1234',
'c_n': 'demo',
'ca_id': 'AT123',
'crt_ts': 1598440349,
'f_id': 331,
'map_crt_ts': 1598440351,
'msg_cnt': 2,
'sign': [{'name': 'speed', 'val': 10}, {'name': 'pwr', 'val': 1416}],
'start_ts': 1598440243,
'type': 'na',
'u_id': 288,
'window': 'na'},
{'c_id': '1234',
'c_n': 'demo',
'ca_id': 'AT172',
'crt_ts': 1598440349,
'f_id': 331,
'map_crt_ts': 1598440351,
'msg_cnt': 2,
'sign': [{'name': 'speed.', 'val': 8.2}, {'name': 'pwr', 'val': 925}],
'start_ts': 1598440345,
'type': 'na',
'u_id': 288,
'window': 'na'}]}
Have a dictionary:
data = {'Common': {'height': 165, 'weight': 70, 'measure': ['cm', 'kg']},
'Man': 'handsome',
'Woman': {'feature': 'pretty', 'weight': 50},
'Dog': {'feature': 'barks', 'height': 10, 'weight': 20}}
Would like to convert only dictionary keys to UPPERCASE.
Tried the following code:
d = {}
d1 = {}
for k, v in data.items():
if isinstance(v, dict):
for i, j in v.items():
d1[i.upper()] = j
d[k.upper()] = d1
else:
d[k.upper()] = v
print(d)
...which produces the output with unnecessary keys and height and weight rationalization as follows:
{'COMMON': {'HEIGHT': 10, 'WEIGHT': 20, 'MEASURE': ['cm', 'kg'], 'FEATURE': 'barks'},
'MAN': 'handsome',
'WOMAN': {'HEIGHT': 10, 'WEIGHT': 20, 'MEASURE': ['cm', 'kg'], 'FEATURE': 'barks'},
'DOG': {'HEIGHT': 10, 'WEIGHT': 20, 'MEASURE': ['cm', 'kg'], 'FEATURE': 'barks'}}
My expected output is:
{'COMMON': {'HEIGHT': 165, 'WEIGHT': 70, 'MEASURE': ['cm', 'kg']},
'MAN': 'handsome',
'WOMAN': {'FEATURE': 'pretty', 'WEIGHT': 50},
'DOG': {'FEATURE': 'barks', 'HEIGHT': 10, 'WEIGHT': 20}}
Where am I going wrong?
What is the correct dictionary comprehension like {{i.upper(): j} if isinstance(j, dict) else {k.upper(): v} for k, v in data.items() for i, j in v.items()}?
You can do something like that, copy to other dict with required keys:
data = {'Common': {'height': 165, 'weight': 70, 'measure': ['cm', 'kg']},
'Man': 'handsome',
'Woman': {'feature': 'pretty', 'weight': 50},
'Dog': {'feature': 'barks', 'height': 10, 'weight': 20}}
data2 = {}
for k in data.keys():
data2[k.upper()] = data[k]
UPDATE:
If you want to change not only keys on Level 1, you should use the recursive function:
data = {'Common': {'height': 165, 'weight': 70, 'measure': ['cm', 'kg']},
'Man': 'handsome',
'Woman': {'feature': 'pretty', 'weight': 50},
'Dog': {'feature': 'barks', 'height': 10, 'weight': 20}}
def keys_to_upper(dict1):
dict2 = {}
for k in dict1.keys():
if isinstance(dict1[k], dict):
dict2[k.upper()] = keys_to_upper(dict1[k])
else:
dict2[k.upper()] = dict1[k]
return dict2
d2 = keys_to_upper(data)
The issue with your code is that you are reassigning d1 if the value is a dictionary. You can solve this by using copy.deepcopy():
Code:
from copy import deepcopy
d = {}
d1 = {}
for k, v in data.items():
if isinstance(v, dict):
for i, j in v.items():
d1[i.upper()] = j
d[k.upper()] = deepcopy(d1)
else:
d[k.upper()] = v
Output:
>>> d
{'COMMON': {'HEIGHT': 165, 'WEIGHT': 70, 'MEASURE': ['cm', 'kg']},
'MAN': 'handsome',
'WOMAN': {'FEATURE': 'pretty', 'WEIGHT': 50},
'DOG': {'FEATURE': 'barks', 'HEIGHT': 10, 'WEIGHT': 20}}
Alternatively, as a dictionary comprehension:
>>> {k.upper(): {i.upper(): j for i, j in v.items()} if isinstance(v, dict) else v for k, v in data.items()}
{'COMMON': {'HEIGHT': 165, 'WEIGHT': 70, 'MEASURE': ['cm', 'kg']},
'MAN': 'handsome',
'WOMAN': {'FEATURE': 'pretty', 'WEIGHT': 50},
'DOG': {'FEATURE': 'barks', 'HEIGHT': 10, 'WEIGHT': 20}}
List comprehension is faster because it is optimized for the Python interpreter to spot a predictable pattern during looping. Besides the syntactic benefit of list comprehensions, they are often as fast or faster than equivalent use of map .
data = {'Common': {'height': 165, 'weight': 70, 'measure': ['cm', 'kg']},
'Man': 'handsome',
'Woman': {'feature': 'pretty', 'weight': 50},
'Dog': {'feature': 'barks', 'height': 10, 'weight': 20}}
for k, v in data.items():
if isinstance(v, dict):
data[k.upper()] = data.pop(k)
output>
data = {'COMMON': {'height': 165, 'weight': 70, 'measure': ['cm', 'kg']},
'Man': 'handsome',
'WOMAN': {'feature': 'pretty', 'weight': 50},
'DOG': {'feature': 'barks', 'height': 10, 'weight': 20}}
I have a DataFrame that has a nested dict within a column. I am removing the nested values and creating a column for each associated key. When using the pop function on pricings it removes values that are wanted. I wish to keep the '1 color', '2 color', '3 color', '4 color', '5 color', '6 color'.
The nested dict looks like this, with column name variations
{'name': 'printing on a DARK shirt',
'pricings': {'1 color': [{'max': 47, 'min': 1, 'price': 100.0},
{'max': 71, 'min': 48, 'price': 40.25},
{'max': 143, 'min': 72, 'price': 2.8},
{'max': 287, 'min': 144, 'price': 2.5}],
'2 color': [{'max': 47, 'min': 1, 'price': 200.0},
{'max': 71, 'min': 48, 'price': 4.25},
{'max': 143, 'min': 72, 'price': 3.8},
{'max': 287, 'min': 144, 'price': 3.5}],
'3 color': [{'max': 47, 'min': 1, 'price': 300.0},
{'max': 71, 'min': 48, 'price': 5.25},
{'max': 143, 'min': 72, 'price': 4.8},
{'max': 287, 'min': 144, 'price': 4.5}],
'4 color': [{'max': 47, 'min': 1, 'price': 400.0},
{'max': 71, 'min': 48, 'price': 6.25},
{'max': 143, 'min': 72, 'price': 5.8},
{'max': 287, 'min': 144, 'price': 5.5}],
'5 color': [{'max': 47, 'min': 1, 'price': 500.0},
{'max': 71, 'min': 48, 'price': 7.5},
{'max': 143, 'min': 72, 'price': 7.0},
{'max': 287, 'min': 144, 'price': 6.6}],
'6 color': [{'max': 47, 'min': 1, 'price': 600.0},
{'max': 71, 'min': 48, 'price': 8.5},
{'max': 143, 'min': 72, 'price': 8.0},
{'max': 287, 'min': 144, 'price': 7.6}]}}
The code I'm using looks like this
df2 = (pd.concat({i: pd.DataFrame(x) for i, x in df1.pop('variations').items()})
.reset_index(level=1, drop=True)
.join(df1 , how='left', lsuffix='_left', rsuffix='_right')
.reset_index(drop=True))
The output is as follows, with the new column name pricing added.
[{'max': 47, 'min': 1, 'price': 20.0},
{'max': 71, 'min': 48, 'price': 4.25},
{'max': 143, 'min': 72, 'price': 3.8},
{'max': 287, 'min': 144, 'price': 3.5}]
If its not clear in the DataFrame the actual list of colors '1 color', '2 color', '3 color', '4 color', '5 color', '6 color'. ranges has fallen off. This is important and the portion I want most. the colors have not created there own column so we are clear.
I have a dictionary of lists and the lists contain dictionaries like so:
my_dict = {
'list1': [{'catch': 100, 'id': '1'}, {'catch': 101, 'id': '2'},
{'catch': 50, 'id': '1'}],
'list2': [{'catch': 189, 'id': '1'}, {'catch': 120, 'id': '12'}],
'list3': [{'catch': 140, 'id': '1'}, {'catch': 10, 'id': '100'}]
}
What is the most pythonic way of removing the list items with commin 'id' values and storing them in a separate list? So the output would be something like this:
my_dict = {
'list1': [{'catch': 101, 'id': '2'}],
'list2': [{'catch': 120, 'id': '12'}],
'list3': [ {'catch': 10, 'id': '100'}],
'list4': [{'catch': 100, 'id': '1'}, , {'catch': 50, 'id': '1'},
{'catch': 189, 'id': '1'}, {'catch': 140, 'id': '1'}]
}
In my program I have 7 lists similar to this, and if an 'id' appears in two or more of these lists, I want to store all appearances of an item with that 'id' in the 8th list for further processing
with regards,
finnurtorfa
Consider restructuring your data into something like this:
>>> import itertools
>>> { k: [d['catch'] for d in v] for k, v in itertools.groupby(sorted(itertools.chain(*my_dict.itervalues()), key=lambda d: d['id']), lambda d: d['id']) }
{'1': [100, 50, 140, 189], '2': [101], '100': [10], '12': [120]}
You haven't described what your data represents, so this may not be appropriate for you. But the tools used (chain and groupby from itertools) should at least give you some ideas.
Edit: I used the sample answer from the question in my testing by accident. Fixed by adding sorting to the input to groupby.
>>> get_id = operator.itemgetter("id")
>>> flattened_dict = itertools.chain.from_iterable(my_dict.values())
>>> groups = itertools.groupby(sorted(flattened_dict, key=get_id), get_id)
>>> {k: list(v) for k, v in groups}
{'1': [{'catch': 100, 'id': '1'},
{'catch': 50, 'id': '1'},
{'catch': 140, 'id': '1'},
{'catch': 189, 'id': '1'}],
'100': [{'catch': 10, 'id': '100'}],
'12': [{'catch': 120, 'id': '12'}],
'2': [{'catch': 101, 'id': '2'}]}
Explanation:
get_id is a function that takes an object x and returns x["id"].
flattened_dict is just an iterable over all the lists (i.e. concatenating all the .values() of my_dict
Now we sort flattened_dict with the key function get_id -- that is, sort by ID -- and group the result by id.
This basically works because itertools.groupby is awesome.
Something along the following line:
my_dict = {
'list1': [{'catch': 100, 'id': '1'}, {'catch': 101, 'id': '2'},
{'catch': 50, 'id': '1'}],
'list2': [{'catch': 189, 'id': '1'}, {'catch': 120, 'id': '12'}],
'list3': [{'catch': 140, 'id': '1'}, {'catch': 10, 'id': '100'}]
}
from itertools import groupby
sub = {}
for k in my_dict:
for kk, g in groupby( my_dict[k], lambda v: v["id"] ):
if not kk in sub:
sub[kk] = []
sub[kk] = sub[kk] + list( g )
print sub
{'1': [{'catch': 100, 'id': '1'}, {'catch': 50, 'id': '1'}, {'catch': 140, 'id': '1'}, {'catch': 189, 'id': '1'}], '12': [{'catch': 120, 'id': '12'}], '100': [{'catch': 10, 'id': '100'}], '2': [{'catch': 101, 'id': '2'}]}