flat delimited list to nested multilevel list - python

I am trying to make nested list from flat delimited list.
As an example:
L1=[
'YYYYY', 'OPEN', ' 111', ' 222', 'CLOSE',
'XXXX','OPEN', ' 333', ' 444', 'OPEN', ' 555', ' 666', 'CLOSE','CLOSE'
]
How to get nested multilevel list:
L2=
[
['YYYYY',
' 111',
' 222',
],
['XXXX',
' 333',
[' 444',
' 555',
' 666',
]
]
]

Use a stack:
def build_multilevel(entries):
result = []
stack = [result]
for i, entry in enumerate(entries):
if entry == 'OPEN':
# convert last element of the top-most list on the stack
# to a new, nested list, and push that new list on top
stack[-1][-1] = [stack[-1][-1]]
stack.append(stack[-1][-1])
elif entry == 'CLOSE':
stack.pop()
else:
stack[-1].append(entry)
return result
Demo:
>>> L1=[
... 'YYYYY', 'OPEN', ' 111', ' 222', 'CLOSE',
... 'XXXX','OPEN', ' 333', ' 444', 'OPEN', ' 555', ' 666', 'CLOSE','CLOSE'
... ]
>>> def build_multilevel(entries):
... result = []
... stack = [result]
... for i, entry in enumerate(entries):
... if entry == 'OPEN':
... # convert last element of the top-most list on the stack
... # to a new, nested list, and push that new list on top
... stack[-1][-1] = [stack[-1][-1]]
... stack.append(stack[-1][-1])
... elif entry == 'CLOSE':
... stack.pop()
... else:
... stack[-1].append(entry)
... return result
...
>>> build_multilevel(L1)
[['YYYYY', ' 111', ' 222'], ['XXXX', ' 333', [' 444', ' 555', ' 666']]]

def flat_list(_list):
"""
:param _list:
:return:
"""
res = []
if type(_list) is list:
for item in _list:
if type(item) is not list:
res.append(item)
else:
[res.append(x) for x in flat_list(item)]
else:
res.append(_list)
return res

Related

Function to remove specific word from lists

I have several lists that contain the word 'Data' at varying locations in the list. I'd like to build a function that removes just that word (not the whole element) from each list.
example_list = ['Client 1 Data','Client 2','Client 3 Data']
output_list = ['Client 1','Client 2','Client 3']
I've tried this but it is not working:
def my_func(column):
for i in range(len(column)):
column.replace('Data', '')
Consider using a list comprehension:
def remove_word(words: list[str], word_to_remove: str) -> list[str]:
return [w.replace(word_to_remove, '').strip() for w in words]
def main() -> None:
example_list = ['Client 1 Data', 'Client 2', 'Client 3 Data']
print(f'{example_list = }')
output_list = remove_word(words=example_list, word_to_remove='Data')
print(f'{output_list = }')
if __name__ == '__main__':
main()
Output:
example_list = ['Client 1 Data', 'Client 2', 'Client 3 Data']
output_list = ['Client 1', 'Client 2', 'Client 3']
you can use map on list but if you want correct your code, you need to specify which index you want to change:
>>> list(map(lambda x: x.replace('Data', ''), example_list))
['Client 1 ', 'Client 2', 'Client 3 ']
Your Code:
def my_func(column):
for i in range(len(column)):
column[i] = column[i].replace('Data', '')
Output:
my_func(example_list)
print(example_list)
# ['Client 1 ', 'Client 2', 'Client 3 ']
Calling replace returns a modified string, it doesn't modify the original string. If you want to modify the list, you need to call replace on each individual string and assign the modified strings to the indices in the original list:
>>> def my_func(column):
... for i in range(len(column)):
... column[i] = column[i].replace('Data', '')
...
>>> example_list = ['Client 1 Data','Client 2','Client 3 Data']
>>> my_func(example_list)
>>> example_list
['Client 1 ', 'Client 2', 'Client 3 ']
If modifying the original list inside the function isn't a requirement, it's actually easier to do this by building and returning a new list:
>>> def my_func(column):
... return [entry.replace('Data', '') for entry in column]
...
>>> my_func(['Client 1 Data','Client 2','Client 3 Data'])
['Client 1 ', 'Client 2', 'Client 3 ']
Another option is to build the new list inside the function (e.g. via comprehension) and then assign it into the original list via a slice assignment:
>>> def my_func(column):
... column[:] = [entry.replace('Data', '') for entry in column]
...
>>> example_list = ['Client 1 Data','Client 2','Client 3 Data']
>>> my_func(example_list)
>>> example_list
['Client 1 ', 'Client 2', 'Client 3 ']

Matching a list's item with another list in python

I have list1 let's say:
items=['SETTLEMENT DATE:', 'CASH ACCOUNT:', 'ISIN:', 'TRADE DATE:', 'PRICE CFA', 'CASH ACCOUNT:', 'SECURITY NAME:']
I have a list2 let's say:
split_t=['{1:F01SCBLMUMUXSSU0438794344}{2:O5991054200218SCBLGHACXSSU04387943442002181454N}{3:{108:2175129}}{4:', ':20:EPACK', 'SALE', 'CDI', ':21:EPACK', 'SALE', 'CDI', ':79:ATTN:MU', 'TEAM', 'KINDLY', 'ACCEPT', 'THIS', 'AS', 'AUTHORISATION', 'TO', 'SETTLE', 'TRADE', 'WITH', 'DETAILS', 'BELOW', 'MARKET:', 'COTE', 'DIVOIRE', 'CLIENT', 'NAME:', 'EPACK', 'OFFSHORE', 'ACCOUNT', 'NAME:', 'STANDARD', 'CHARTERED', 'GHANA', 'NOMINEE', 'RE', 'DATABANK', 'EPACK', 'INVESTMENT', 'FUND', 'LTD', 'IVORY', 'COAST', 'TRADE', 'TYPE:', 'DELIVER', 'AGAINST', 'PAYMENT', 'SCA:', '2CEPACKIVO', 'CASH', 'ACCOUNT:', '420551901501', 'TRADE', 'DETAILS:', 'TRADE', 'DATE:', '17.02.2020', 'SETTLEMENT', 'DATE:', '20.02.2020', 'SECURITY', 'NAME:', 'SONATEL', 'ISIN:', 'SN0000000019', 'CLEARING', 'BIC:', 'SCBLCIABSSUXXX', 'QUANTITY:', '10,500', 'PRICE', 'CFA', '14,500.4667', 'CONSIDERATION', 'CFA', '152,254,900.00', 'TOTAL', 'FEES', '1,796,608.00', 'SETTLEMENT', 'AMOUNT', 'CFA', '150,458,292.35', 'CURRENCY:', 'CFA', 'AC:', 'CI0000010373', 'REGARDS', 'STANDARD', 'CHARTERED', 'BANK', '-}']
I want to search contiguously the items of list1 in list2 and return the immediate next element of list2 when there's a match.
As you can see, one item of list1 is probably two contiguous item in list2.
For example, the 1st element of list1, 'SETTLEMENT DATE:', There's a match in list2 and I want to return the next element of the match in list2, '20.02.2020'.
I have written my python function accordingly:
def test(items, split_t):
phrases = [w for w in items]
for i, t in enumerate(split_t):
to_match = split_t[i+1: i+1+len(phrases)]
if to_match and all(p == m for p,m in zip(phrases, to_match)):
return [*map(lambda x:split_t[i])]
Which is returning None even when it has matches as you can see. I might be wrong in implementing the *map in the return statement which I'm failing to understand from debugging. Any help is highly appreciated.
One way is:
>>> import re
>>> def test(items, split_t):
... split_t_str = ' '.join(split_t)
... res = {}
... for i in items:
... m = re.search(rf'(?<={i})\s(.*?)\s', split_t_str)
... res[i] = m.group(1)
... return res
...
>>> test(items, split_t)
{'SETTLEMENT DATE:': '20.02.2020', 'CASH ACCOUNT:': '420551901501', 'ISIN:': 'SN0000000019', 'TRADE DATE:': '17.02.2020', 'PRICE CFA': '14,500.4667', 'SECURITY NAME:': 'SONATEL'}
The above:
creates a str from split_t, i.e., split_t_str,
iterates over items using each element to construct a regex for performing a positive lookbehind assertion (see re's docs) against split_t_str,
stores each element as key in a dict, called res, and the corresponding match as value, and
returns the dict
If there is no spaces in "list 2" items. This way you can.
def match(l1, l2):
result = []
string = ' '.join(l2) + ' '
for i in l1:
index = string.find(i)
if index != -1:
result.append(string[index + len(i) + 1:string.find(' ', index + len(i) + 1)])
return result
print(match(items, split_t))
Output:
['20.02.2020', '420551901501', 'SN0000000019', '17.02.2020', '14,500.4667', '420551901501', 'SONATEL']

Convert a list of tab prefixed strings to a dictionary

Text mining attempts here, I would like to turn the below:
a=['Colors.of.the universe:\n',
' Black: 111\n',
' Grey: 222\n',
' White: 11\n'
'Movies of the week:\n',
' Mission Impossible: 121\n',
' Die_Hard: 123\n',
' Jurassic Park: 33\n',
'Lands.categories.said:\n',
' Desert: 33212\n',
' forest: 4532\n',
' grassland : 431\n',
' tundra : 243451\n']
to this:
{'Colors.of.the universe':{Black:111,Grey:222,White:11},
'Movies of the week':{Mission Impossible:121,Die_Hard:123,Jurassic Park:33},
'Lands.categories.said': {Desert:33212,forest:4532,grassland:431,tundra:243451}}
Tried this code below but it was not good:
{words[1]:words[1:] for words in a}
which gives
{'o': 'olors.of.the universe:\n',
' ': ' tundra : 243451\n',
'a': 'ands.categories.said:\n'}
It only takes the first word as the key which is not what's needed.
A dict comprehension is an interesting approach.
a = ['Colors.of.the universe:\n',
' Black: 111\n',
' Grey: 222\n',
' White: 11\n',
'Movies of the week:\n',
' Mission Impossible: 121\n',
' Die_Hard: 123\n',
' Jurassic Park: 33\n',
'Lands.categories.said:\n',
' Desert: 33212\n',
' forest: 4532\n',
' grassland : 431\n',
' tundra : 243451\n']
result = dict()
current_key = None
for w in a:
# If starts with tab - its an item (under category)
if w.startswith(' '):
# Splitting item (i.e. ' Desert: 33212\n' -> [' Desert', ' 33212\n']
splitted = w.split(':')
# Setting the key and the value of the item
# Removing redundant spaces and '\n'
# Converting value to number
k, v = splitted[0].strip(), int(splitted[1].replace('\n', ''))
result[current_key][k] = v
# Else, it's a category
else:
# Removing ':' and '\n' form category name
current_key = w.replace(':', '').replace('\n', '')
# If category not exist - create a dictionary for it
if not current_key in result.keys():
result[current_key] = {}
# {'Colors.of.the universe': {'Black': 111, 'Grey': 222, 'White': 11}, 'Movies of the week': {'Mission Impossible': 121, 'Die_Hard': 123, 'Jurassic Park': 33}, 'Lands.categories.said': {'Desert': 33212, 'forest': 4532, 'grassland': 431, 'tundra': 243451}}
print(result)
That's really close to valid YAML already. You could just quote the property labels and parse. And parsing a known format is MUCH superior to dealing with and/or inventing your own. Even if you're just exploring base python, exploring good practices is just as (probably more) important.
import re
import yaml
raw = ['Colors.of.the universe:\n',
' Black: 111\n',
' Grey: 222\n',
' White: 11\n',
'Movies of the week:\n',
' Mission Impossible: 121\n',
' Die_Hard: 123\n',
' Jurassic Park: 33\n',
'Lands.categories.said:\n',
' Desert: 33212\n',
' forest: 4532\n',
' grassland : 431\n',
' tundra : 243451\n']
# Fix spaces in property names
fixed = []
for line in raw:
match = re.match(r'^( *)(\S.*?): ?(\S*)\s*', line)
if match:
fixed.append('{indent}{safe_label}:{value}'.format(
indent = match.group(1),
safe_label = "'{}'".format(match.group(2)),
value = ' ' + match.group(3) if match.group(3) else ''
))
else:
raise Exception("regex failed")
parsed = yaml.load('\n'.join(fixed), Loader=yaml.FullLoader)
print(parsed)

python transform complex list of lists into a string

I have a complex list of lists that looks like that :
[[['MARIA DUPONT',
' infos : ',
[' age = 28',
' yeux = bleus',
' sexe = femme']],
[' + ']],
[['PATRICK MARTIN',
' infos : ',
[' age = 53',
' yeux = marrons',
' sexe = homme']],
[' + ']],
[['JULIE SMITH',
' infos : ',
[' age = 17',
'yeux = verts',
'sexe = femme']],
[' fin ']]]
I am trying to transform it into a string. At the end I want to print that :
MARIA DUPONT,
infos :
age = 28
yeux = bleus
sexe = femme
+
PATRICK MARTIN
infos :
age = 53
yeux = marrons
sexe = homme
+
JULIE SMITH
infos :
age = 17
yeux = verts
sexe = femme
fin
My real data are more complicated and I have lists into level 5.
So I am looking for a way to solve the problem I explained to be able to adapt it and apply it to my real data.
I am trying with
''.join(list)
and
''.join(x for x in list)
But in both cases I have the error TypeError: list indices must be integers or slices, not list
I've tryed other ways but now I'm confused and I didn't found a good solution to reach my goal.
Any help would be appreciated, and thanks in advance. (and sorry for my bad english!)
You can use str.join with a single pass over the lists:
data = [[['MARIA DUPONT', ' infos : ', [' age = 28', ' yeux = bleus', ' sexe = femme']], [' + ']], [['PATRICK MARTIN', ' infos : ', [' age = 53', ' yeux = marrons', ' sexe = homme']], [' + ']], [['JULIE SMITH', ' infos : ', [' age = 17', 'yeux = verts', 'sexe = femme']], [' fin ']]]
r = '\n'.join('\n'.join([a, b, *c, f'\n{k}\n']) for [a, b, c], [k] in data)
Output:
MARIA DUPONT
infos :
age = 28
yeux = bleus
sexe = femme
+
PATRICK MARTIN
infos :
age = 53
yeux = marrons
sexe = homme
+
JULIE SMITH
infos :
age = 17
yeux = verts
sexe = femme
fin
If your lists are arbitrarily nested, then you can use recursion with a generator:
def flatten(d):
if isinstance(d, str):
yield d
else:
yield from [i for b in d for i in flatten(b)]
print('\n'.join(flatten(data)))
.join() won't work with a list in the list. I can offer you a solution based on recursion.
def list_to_str(_list):
result = ""
if isinstance(_list, list):
for l in _list:
result += list_to_str(l)
else:
result += _list
return result
result_string = list_to_str(your_list)
print(result_string)
I can't tell if you have a list with varying levels of lists but if so, you would probably need a conditional to see if the list goes further and recursively iterate the list.
def convert_list(dataset):
result = ''
for element in dataset:
if isinstance(element, list):
result += convert_list(element)
else:
result += str(element)
return result
This will not print the newlines you want but it does return the list as a string.
Write a recursive function to get inside your lists like below:
def print_data(input_list):
for obj in input_list:
if isinstance(obj, list):
print_data(obj)
else:
print(obj)
input_list = [[['MARIA DUPONT',
' infos : ',
[' age = 28',
' yeux = bleus',
' sexe = femme']],
[' + ']],
[['PATRICK MARTIN',
' infos : ',
[' age = 53',
' yeux = marrons',
' sexe = homme']],
[' + ']],
[['JULIE SMITH',
' infos : ',
[' age = 17',
'yeux = verts',
'sexe = femme']],
[' fin ']]]
print_data(input_list)

Matching value of 1 dictionary with key of another dictionary using Python

I have 2 dictionaries. Is it possible to loop DictA's key with DictB's values, and if DictB's value has a match, it would return DictB's key.
​
Is it possible to be done?
Please give me some advice/tips. What am I doing wrong? Thank you.
Example:
A: {'WNT3A': '1732', 'RG59L': ' ', 'SCZD9': ' ', 'CD241': '6005', 'C2': '', 'RH': ' '}
B: {'': [''], '6005': ['RH50A', ' CD241', ' SLC42A1'], '603': [''], '6000': [''], '8787': ['PERRS', ' RGS9L', ' MGC26458'], '41': ['ACCN2', ' BNaC2', ' hBNaC2'], '8490': [''], '9628': [''], '5999': ['SCZD9']}
Result :
new_A: {'WNT3A': '1732', 'RG59L': ' ', 'SCZD9': '5999 ', 'CD241': '6005', 'C2': '', 'RH': ' '}​
So far, I've coded this but it only seems to return me a dictionary that is whole or a dictionary containing only dictionary B.
new_a = {}
for key in ref_dict:
for value in (map(lambda x: x.strip(), ref_dict[key][0])):
if(not fill_dict.has_key(key)):
continue
elif(ref_dict[value] != fill_dict[key]):
new_a[value] = (ref_dict[key], fill_dict[key])
print new_a
Result:
{'WNT3A': '1732', 'RG59L': '', 'SCZD9': '', 'CD241': '6005', 'C2': '', 'RH': ''}
Another code I've tried is :
new_dict = {}
for k, v in fill_dict.iteritems():
vals = []
if isinstance(v, list):
for i in v:
vals.append(ref_dict.get(i))
else:
vals.append(ref_dict.get(v))
if not vals:
continue
new_dict[k] = vals
print new_dict
Result:
{'WNT3A': [None], 'RG59L': [['']], 'SCZD9': [['']], 'CD241': [['RH50A', ' CD241', ' SLC42A1']], 'C2': [['']], 'RH': [['']]}
You can use a dict-comprehension with next() for this:
>>> {k: next((k1 for k1, v1 in B.items() if k in v1), v) for k, v in A.items()}
{'WNT3A': '1732', 'RG59L': ' ', 'SCZD9': '5999', 'CD241': '6005', 'C2': '', 'RH': ' '}
Here I've used next(<gen exp>, v), if the generator expression returned an empty iterator(i.e if none of the B's values contains the key k) then simply use the value v from A, otherwise use the key returned from the generator expression.
The above code can also be written as:
d = {}
for k, v in A.items():
for k1, v1 in B.items():
if k in v1:
d[k] = k1
break
else:
#this will be executed if no value in `B` contains `k`
d[k] = v
This will update dictionary a with the values you want:
for key_a in a:
for key_b, value_b in b.iteritems():
for item in value_b:
if item == key_a:
a[key_a] = key_b
If you need a whole new dict, then just make a copy of a and update that one.

Categories