a=[['kyle','movie_1','c_13'],
['blair','food','a_29'],
['reese','movie_2','abc_76']]
b=['df.movie_1',
'ghk.food',
'df.movie_2']
x = {}
for i in b:
y = i.split('.')
for j in a:
if y[1] in j : x[y[0]]=j
print(x)
This is my code to check if there is string inside a list a .
The output that I got is
{'df': ['reese', 'movie_2', 'abc_76'], 'ghk': ['blair', 'food', 'a_29']}
My desired output is
{'df': [['kyle','movie_1','c_13'],['reese', 'movie_2', 'abc_76']], 'ghk': ['blair', 'food', 'a_29']}
The cause is that the value would be cover when it exists x['df'].
You could use defaultdict to save them(A little different from you expect, though.But it is very easy):
from collections import defaultdict
a = [['kyle', 'movie_1', 'c_13'],
['blair', 'food', 'a_29'],
['reese', 'movie_2', 'abc_76']]
b = ['df.movie_1',
'ghk.food',
'df.movie_2']
x = defaultdict(list)
for i in b:
y = i.split('.')
for j in a:
if y[1] in j:
x[y[0]].append(j)
print(x)
# defaultdict(<class 'list'>, {'df': [['kyle', 'movie_1', 'c_13'], ['reese', 'movie_2', 'abc_76']], 'ghk': [['blair', 'food', 'a_29']]})
As mentioned in a previous answer, the problem is that your loops end up overwriting the value of x[y[0]]. Based on your desired output, what you need is to append to a list instead. There is already a nice solution using defaultdict. If instead you want to just use standard list, this is one way to do it:
a = [
['kyle','movie_1','c_13'],
['blair','food','a_29'],
['reese','movie_2','abc_76']]
b = [
'df.movie_1',
'ghk.food',
'df.movie_2']
x = {}
for i in b:
y = i.split('.')
for j in a:
if y[1] in j:
if y[0] not in x: # if this is the first time we append
x[y[0]] = [] # make it an empty list
x[y[0]].append(j) # then always append
print(x)
Hope This works:
A single line code
Code:
op_dict={}
[op_dict.setdefault(x.split('.')[0], []).append(y) for x in b for y in a if x.split('.')[1] in y]
Output:
Related
Here are my two txt files which I need to compare and I need to separate the list in a according to the keywords in b.
For example, df.movie , hence I want the 'kyle','movie','c_13' into df=[].
a=[['kyle','movie','c_13'],
['blair','food','a_29'],
['reese','phone','abc_76'],....]
b=[['df.movie'],
['ghk.food'],
['qwe.phone'],....]
for line_a in a:
for line_b in b:
if line_b in line_a:
print(line_a)
The answer by Emrah is good. I just wanted to add an alternative solution.
a=[['kyle','movie','c_13'],
['blair','food','a_29'],
['reese','phone','abc_76']]
b=['df.movie',
'ghk.food',
'qwe.phone']
b2 = [b_.split(".")[1] for b_ in b]
for item in range(len(a)):
if any(elem in a[item] for elem in b2):
print(a[item])
I think you are trying to learn Python and you must work more on creating solutions with python lists.
seperateds = {}
for b_ in b:
b_ = b_[0].split(".")
b_header = b_[0]
b_string = b_[1]
for a_ in a:
if b_string in a_:
seperateds[b_header] = a_
break
print(seperateds)
You can use a dictionary to get this. You can try using this
x = {}
for i in b:
y = i[0].split('.')
for j in a:
if y[1] in j : x[y[0]]=j
print (x)
Output will be as follows:
{'df': ['kyle', 'movie', 'c_13'], 'ghk': ['blair', 'food', 'a_29'], 'qwe': ['reese', 'phone', 'abc_76']}
Say I have the following dict
{'red':'boop','white':'beep','rose':'blip'}
And I want to get it to a list like so
['red','boop','end','white','beep','rose','blip','end']
The key / value which is to be placed in front of the list is an input.
So I essentially I want [first_key, first_value,end, .. rest of the k/v pairs..,end]
I wrote a brute force approach but I feel like there's a more pythonic way of doing it (and also because once implemented the snippet would make my code O(n^2) )
for item in lst_items
data_lst = []
for key, value in item.iteritems():
data_lst.append(key)
ata_lst.append(value)
#insert 'end' at the appropiate indeces
#more code ...
Any pythonic approach?
The below relies on itertools.chain.from_iterable to flatten the items into a single list. We pull the first two values from the chain and then use them to build a new list, which we extend with the rest of the values.
from itertools import chain
def ends(d):
if not d:
return []
c = chain.from_iterable(d.iteritems())
l = [next(c), next(c), "end"]
l.extend(c)
l.append("end")
return l
ends({'red':'boop','white':'beep','rose':'blip'})
# ['rose', 'blip', 'end', 'white', 'beep', 'red', 'boop', 'end']
If you know the key you want first, and don't care about the rest, we can use a lazily evaluated generator expression to remove it from the flattened list.
def ends(d, first):
if not d:
return []
c = chain.from_iterable((k, v) for k, v in d.iteritems() if k != first)
l = [first, d[first], "end"]
l.extend(c)
l.append("end")
return l
ends({'red':'boop','white':'beep','rose':'blip'}, 'red')
# ['red', 'boop', 'end', 'rose', 'blip', 'white', 'beep', 'end']
The first key is specified in first variable:
first = 'red'
d = {'red':'boop','white':'beep','rose':'blip'}
new_l = [first, d[first], 'end']
for k, v in d.items():
if k == first:
continue
new_l.append(k)
new_l.append(v)
new_l.append('end')
print(new_l)
Prints:
['red', 'boop', 'end', 'white', 'beep', 'rose', 'blip', 'end']
You could use enumerate and check the current index:
>>> d = {'red':'boop','white':'beep','rose':'blip'}
>>> [x for i, e in enumerate(d.items())
... for x in (e + ("end",) if i in (0, len(d)-1) else e)]
...
['white', 'beep', 'end', 'red', 'boop', 'rose', 'blip', 'end']
However, your original idea, first chaining the keys and values and then inserting the "end" items would not have O(n²), either. It would be O(n) followed by another O(n), hence still O(n).
from itertools import chain
list(chain(*item.items())) + ['end']
data_lst = [x for k, v in lst_itemsL.iteritems() for x in (k, v) ]
data_lst.insert(2, 'end')
data_lst.append('end')
This is pythonic; though will likely have the same efficiency (which can't be helped here).
This should be faster than placing if blocks inside the loops...
I have a list of strings like this:
lst = ['23532','user_name=app','content=123',
'###########################',
'54546','user_name=bee','content=998 hello','source=fb',
'###########################',
'12/22/2015']
I want a similar method like string.split('#') that can give me output like this:
[['23532','user_name=app','content='123'],
['54546','user_name=bee',content='998 hello','source=fb'],
['12/22/2015']]
but I know list has not split attribute. I cannot use ''.join(lst) either because this list comes from part of a txt file I read in and my txt.file was too big, so it will throw an memory error to me.
I don't think there's a one-liner for this, but you can easily write a generator to do what you want:
def sublists(lst):
x = []
for item in lst:
if item == '###########################': # or whatever condition you like
if x:
yield x
x = []
else:
x.append(item)
if x:
yield x
new_list = list(sublists(old_list))
If you can't use .join(), you can loop through the list and save the index of any string that contains # then loop again to slice the list:
lst = ['23532', 'user_name=app', 'content=123', '###########################' ,'54546','user_name=bee','content=998 hello','source=fb','###########################','12/22/2015']
idx = []
new_lst = []
for i,val in enumerate(lst):
if '#' in val:
idx.append(i)
j = 0
for x in idx:
new_lst.append(lst[j:x])
j = x+1
new_lst.append(lst[j:])
print new_lst
output:
[['23532', 'user_name=app', 'content=123'], ['54546', 'user_name=bee', 'content=998 hello', 'source=fb'], ['12/22/2015']]
sep = '###########################'
def split_list(_list):
global sep
lists = list()
sub_list = list()
for x in _list:
if x == sep:
lists.append(sub_list)
sub_list = list()
else:
sub_list.append(x)
lists.append(sub_list)
return lists
l = ['23532','user_name=app','content=123',
'###########################',
'54546','user_name=bee','content=998 hello','source=fb',
'###########################',
'12/22/2015']
pprint(split_list(l))
Output:
[['23532', 'user_name=app', 'content=123'],
['54546', 'user_name=bee', 'content=998 hello', 'source=fb'],
['12/22/2015']]
You can achieve this by itertools.groupby
from itertools import groupby
lst = ['23532','user_name=app','content=123',
'###########################','54546','user_name=bee','content=998 hello','source=fb',
'###########################','12/22/2015']
[list(g) for k, g in groupby(lst, lambda x: x == '###########################') if not k ]
Output
[['23532', 'user_name=app', 'content=123'],
['54546', 'user_name=bee', 'content=998 hello', 'source=fb'],
['12/22/2015']]
In my function I will create different tuples and add to an empty list :
tup = (pattern,matchedsen)
matchedtuples.append(tup)
The patterns have format of regular expressions. I am looking for apply groupby() on matchedtuples in following way:
For example :
matchedtuples = [(p1, s1) , (p1,s2) , (p2, s5)]
And I am looking for this result:
result = [ (p1,(s1,s2)) , (p2, s5)]
So, in this way I will have groups of sentences with the same pattern. How can I do this?
My answer for your question will work for any input structure you will use and print the same output as you gave. And i will use only groupby from itertools module:
# Let's suppose your input is something like this
a = [("p1", "s1"), ("p1", "s2"), ("p2", "s5")]
from itertools import groupby
result = []
for key, values in groupby(a, lambda x : x[0]):
b = tuple(values)
if len(b) >= 2:
result.append((key, tuple(j[1] for j in b)))
else:
result.append(tuple(j for j in b)[0])
print(result)
Output:
[('p1', ('s1', 's2')), ('p2', 's5')]
The same solution work if you add more values to your input:
# When you add more values to your input
a = [("p1", "s1"), ("p1", "s2"), ("p2", "s5"), ("p2", "s6"), ("p3", "s7")]
from itertools import groupby
result = []
for key, values in groupby(a, lambda x : x[0]):
b = tuple(values)
if len(b) >= 2:
result.append((key, tuple(j[1] for j in b)))
else:
result.append(tuple(j for j in b)[0])
print(result)
Output:
[('p1', ('s1', 's2')), ('p2', ('s5', 's6')), ('p3', 's7')]
Now, if you modify your input structure:
# Let's suppose your modified input is something like this
a = [(["p1"], ["s1"]), (["p1"], ["s2"]), (["p2"], ["s5"])]
from itertools import groupby
result = []
for key, values in groupby(a, lambda x : x[0]):
b = tuple(values)
if len(b) >= 2:
result.append((key, tuple(j[1] for j in b)))
else:
result.append(tuple(j for j in b)[0])
print(result)
Output:
[(['p1'], (['s1'], ['s2'])), (['p2'], ['s5'])]
Also, the same solution work if you add more values to your new input structure:
# When you add more values to your new input
a = [(["p1"], ["s1"]), (["p1"], ["s2"]), (["p2"], ["s5"]), (["p2"], ["s6"]), (["p3"], ["s7"])]
from itertools import groupby
result = []
for key, values in groupby(a, lambda x : x[0]):
b = tuple(values)
if len(b) >= 2:
result.append((key, tuple(j[1] for j in b)))
else:
result.append(tuple(j for j in b)[0])
print(result)
Output:
[(['p1'], (['s1'], ['s2'])), (['p2'], (['s5'], ['s6'])), (['p3'], ['s7'])]
Ps: Test this code and if it breaks with any other kind of inputs please let me know.
If you require the output you present, you'll need to manually loop through the grouping of matchedtuples and build your list.
First, of course, if the matchedtuples list isn't sorted, sort it with itemgetter:
from operator import itemgetter as itmg
li = sorted(matchedtuples, key=itmg(0))
Then, loop through the result supplied by groupby and append to the list r based on the size of the group:
r = []
for i, j in groupby(matchedtuples, key=itmg(0)):
j = list(j)
ap = (i, j[0][1]) if len(j) == 1 else (i, tuple(s[1] for s in j))
r.append(ap)
Although poorly written, this code:
marker_array = [['hard','2','soft'],['heavy','2','light'],['rock','2','feather'],['fast','3'], ['turtle','4','wet']]
marker_array_DS = []
for i in range(len(marker_array)):
if marker_array[i-1][1] != marker_array[i][1]:
marker_array_DS.append(marker_array[i])
print marker_array_DS
Returns:
[['hard', '2', 'soft'], ['fast', '3'], ['turtle', '4', 'wet']]
It accomplishes part of the task which is to create a new list containing all nested lists except those that have duplicate values in index [1]. But what I really need is to concatenate the matching index values from the removed lists creating a list like this:
[['hard heavy rock', '2', 'soft light feather'], ['fast', '3'], ['turtle', '4', 'wet']]
The values in index [1] must not be concatenated. I kind of managed to do the concatenation part using a tip from another post:
newlist = [i + n for i, n in zip(list_a, list_b]
But I am struggling with figuring out the way to produce the desired result. The "marker_array" list will be already sorted in ascending order before being passed to this code. All like-values in index [1] position will be contiguous. Some nested lists may not have any values beyond [0] and [1] as illustrated above.
Quick stab at it... use itertools.groupby to do the grouping for you, but do it over a generator that converts the 2 element list into a 3 element.
from itertools import groupby
from operator import itemgetter
marker_array = [['hard','2','soft'],['heavy','2','light'],['rock','2','feather'],['fast','3'], ['turtle','4','wet']]
def my_group(iterable):
temp = ((el + [''])[:3] for el in marker_array)
for k, g in groupby(temp, key=itemgetter(1)):
fst, snd = map(' '.join, zip(*map(itemgetter(0, 2), g)))
yield filter(None, [fst, k, snd])
print list(my_group(marker_array))
from collections import defaultdict
d1 = defaultdict(list)
d2 = defaultdict(list)
for pxa in marker_array:
d1[pxa[1]].extend(pxa[:1])
d2[pxa[1]].extend(pxa[2:])
res = [[' '.join(d1[x]), x, ' '.join(d2[x])] for x in sorted(d1)]
If you really need 2-tuples (which I think is unlikely):
for p in res:
if not p[-1]:
p.pop()
marker_array = [['hard','2','soft'],['heavy','2','light'],['rock','2','feather'],['fast','3'], ['turtle','4','wet']]
marker_array_DS = []
marker_array_hit = []
for i in range(len(marker_array)):
if marker_array[i][1] not in marker_array_hit:
marker_array_hit.append(marker_array[i][1])
for i in marker_array_hit:
lists = [item for item in marker_array if item[1] == i]
temp = []
first_part = ' '.join([str(item[0]) for item in lists])
temp.append(first_part)
temp.append(i)
second_part = ' '.join([str(item[2]) for item in lists if len(item) > 2])
if second_part != '':
temp.append(second_part);
marker_array_DS.append(temp)
print marker_array_DS
I learned python for this because I'm a shameless rep whore
marker_array = [
['hard','2','soft'],
['heavy','2','light'],
['rock','2','feather'],
['fast','3'],
['turtle','4','wet'],
]
data = {}
for arr in marker_array:
if len(arr) == 2:
arr.append('')
(first, index, last) = arr
firsts, lasts = data.setdefault(index, [[],[]])
firsts.append(first)
lasts.append(last)
results = []
for key in sorted(data.keys()):
current = [
" ".join(data[key][0]),
key,
" ".join(data[key][1])
]
if current[-1] == '':
current = current[:-1]
results.append(current)
print results
--output:--
[['hard heavy rock', '2', 'soft light feather'], ['fast', '3'], ['turtle', '4', 'wet']]
A different solution based on itertools.groupby:
from itertools import groupby
# normalizes the list of markers so all markers have 3 elements
def normalized(markers):
for marker in markers:
yield marker + [""] * (3 - len(marker))
def concatenated(markers):
# use groupby to iterator over lists of markers sharing the same key
for key, markers_in_category in groupby(normalized(markers), lambda m: m[1]):
# get separate lists of left and right words
lefts, rights = zip(*[(m[0],m[2]) for m in markers_in_category])
# remove empty strings from both lists
lefts, rights = filter(bool, lefts), filter(bool, rights)
# yield the concatenated entry for this key (also removing the empty string at the end, if necessary)
yield filter(bool, [" ".join(lefts), key, " ".join(rights)])
The generator concatenated(markers) will yield the results. This code correctly handles the ['fast', '3'] case and doesn't return an additional third element in such cases.