Extract element in sublist using other sublist - python

I have 2 nested arrays
Test = [['c','d','b','t','j','n','k','s','p','t','k'],['l','u','y','r','c','b']]
Sample = [[1,0,1,1,2,0,3,4,0,0,4],[1,0,1,2,0,3]]
I want output like whenever 0 in Sample array.I want to extract corresponding letter in Test array.Both array lengths are same
Output = [['d','n','p','t],['u','c']]

This should work:
Test = [['c','d','b','t','j','n','k','s','p','t','k'],['l','u','y','r','c','b']]
Sample = [[1,0,1,1,2,0,3,4,0,0,4],[1,0,1,2,0,3]]
final_list = []
for j in range(len(Test)):
sub_list = []
for i in range(len(Test[j])):
if Sample[j][i] == 0:
sub_list.append(Test[j][i])
final_list.append(sub_list)
Where final_list is your expected output

import numpy as np
res = [list(np.array(a)[np.array(b) == 0]) for a,b in zip(Test, Sample)]

for loop and zip() does all the work
final_list = []
for x,y in zip(Test, Sample):
_list=[] # Temp. list to append to
for i,j in zip(x,y):
if j == 0:
_list.append(i)
final_list.append(_list) # appending to final list to create list of list
del _list # del. the temp_list to avoid duplicate values
final_list

This seems like a job for zip() and list comprehensions:
result = [
[t for t, s in zip(test, sample) if s == 0]
for test, sample in zip(Test, Sample)
]
Result:
[['d', 'n', 'p', 't'], ['u', 'c']]

Related

Python object of arrays count number of similar array value occurrences

Apologies for the wording of this question. I have the beneath list containing sub-objects that also contains key, value pairs.
l = [{'melissa': ["power"]}, {'Linda': ["power", "a"]}, {'Rachel': ["power", "document"]}]
This is my current solution that counts the amount of occurrences of strings in each objects list as expected:
cnt = {}
for i in l:
for x in i.values():
for j in x:
if j not in cnt:
cnt[j] = 1
else:
cnt[j] += 1
final = list(map(list, cnt.items()))
print(final)
Output:
final = [['power', 3], ['a', 1], ['document', 1]]
Is there a better/more succinct method of doing this?
I would still like the output to be a list of sub-lists.
Thanks
Use collections.Counter:
from collections import Counter
l = [
{"melissa": ["power"]},
{"Linda": ["power", "a"]},
{"Rachel": ["power", "document"]},
]
out = Counter(i for d in l for lst in d.values() for i in lst)
out = list(map(list, out.items()))
print(out)
Prints:
[['power', 3], ['a', 1], ['document', 1]]

adding empty string while joining the 2 lists - Python

I have 2 lists
mainlist=[['RD-12',12,'a'],['RD-13',45,'c'],['RD-15',50,'e']] and
sublist=[['RD-12',67],['RD-15',65]]
if i join both the list based on 1st element condition by using below code
def combinelist(mainlist,sublist):
dict1 = { e[0]:e[1:] for e in mainlist }
for e in sublist:
try:
dict1[e[0]].extend(e[1:])
except:
pass
result = [ [k] + v for k, v in dict1.items() ]
return result
Its results in like below
[['RD-12',12,'a',67],['RD-13',45,'c',],['RD-15',50,'e',65]]
as their is no element in for 'RD-13' in sublist, i want to empty string on that.
The final output should be
[['RD-12',12,'a',67],['RD-13',45,'c'," "],['RD-15',50,'e',65]]
Please help me.
Your problem can be solved using a while loop to adjust the length of your sublists until it matches the length of the longest sublist by appending the wanted string.
for list in result:
while len(list) < max(len(l) for l in result):
list.append(" ")
You could just go through the result list and check where the total number of your elements is 2 instead of 3.
for list in lists:
if len(list) == 2:
list.append(" ")
UPDATE:
If there are more items in the sublist, just subtract the lists containing the 'keys' of your lists, and then add the desired string.
def combinelist(mainlist,sublist):
dict1 = { e[0]:e[1:] for e in mainlist }
list2 = [e[0] for e in sublist]
for e in sublist:
try:
dict1[e[0]].extend(e[1:])
except:
pass
for e in dict1.keys() - list2:
dict1[e].append(" ")
result = [[k] + v for k, v in dict1.items()]
return result
You can try something like this:
mainlist=[['RD-12',12],['RD-13',45],['RD-15',50]]
sublist=[['RD-12',67],['RD-15',65]]
empty_val = ''
# Lists to dictionaries
maindict = dict(mainlist)
subdict = dict(sublist)
result = []
# go through all keys
for k in list(set(list(maindict.keys()) + list(subdict.keys()))):
# pick the value from each key or a default alternative
result.append([k, maindict.pop(k, empty_val), subdict.pop(k, empty_val)])
# sort by the key
result = sorted(result, key=lambda x: x[0])
You can set up your empty value to whatever you need.
UPDATE
Following the new conditions, it would look like this:
mainlist=[['RD-12',12,'a'], ['RD-13',45,'c'], ['RD-15',50,'e']]
sublist=[['RD-12',67], ['RD-15',65]]
maindict = {a:[b, c] for a, b, c in mainlist}
subdict = dict(sublist)
result = []
for k in list(set(list(maindict.keys()) + list(subdict.keys()))):
result.append([k, ])
result[-1].extend(maindict.pop(k, ' '))
result[-1].append(subdict.pop(k, ' '))
sorted(result, key=lambda x: x[0])
Another option is to convert the sublist to a dict, so items are easily and rapidly accessible.
sublist_dict = dict(sublist)
So you can do (it modifies the mainlist):
for i, e in enumerate(mainlist):
data: mainlist[i].append(sublist_dict.get(e[0], ""))
#=> [['RD-12', 12, 'a', 67], ['RD-13', 45, 'c', ''], ['RD-15', 50, 'e', 65]]
Or a one liner list comprehension (it produces a new list):
[ e + [sublist_dict.get(e[0], "")] for e in mainlist ]
If you want to skip the missing element:
for i, e in enumerate(mainlist):
data = sublist_dict.get(e[0])
if data: mainlist[i].append(data)
print(mainlist)
#=> [['RD-12', 12, 'a', 67], ['RD-13', 45, 'c'], ['RD-15', 50, 'e', 65]]

Group consecutive similar items in a python list

I have a list containing different data types, say numbers and strings:
foo = [5,2,'a',8,4,'b','y',9, 'd','e','g']
Let's say I want to find all consecutive strings in the the list, and group them together:
bar = [ ['a'],['b','y'],['d','e','g'] ]
How can I do this
This is a wonderful opportunity to use groupby:
from itertools import groupby
foo = [5,2,'a',8,4,'b','y',9, 'd','e','g']
bar = [list(g) for k, g in groupby(foo, key=lambda x: isinstance(x, str)) if k]
which produces the desired:
[['a'], ['b', 'y'], ['d', 'e', 'g']]
Iterate through each element in the list, if it is of type str, append it to one_d_array, otherwise, append one_d_array to two_d_array, provided one_d_array is not empty. Reset one_d_array whenever the element is not of type str
lst = [5,2,'a',8,4,'b','y',9, 'd','e','g', 3]
ind = 0
two_d_arr = []
one_d_arr = []
while(ind < len(lst)):
cur_element = lst[ind]
if(isinstance(cur_element, str) == True):
one_d_arr.append(cur_element)
else:
if(len(one_d_arr) != 0):
two_d_arr.append(one_d_arr)
one_d_arr = []
ind = ind+1
if(len(one_d_arr) != 0):
two_d_arr.append(one_d_arr)
print(two_d_arr)
Without using any import, you can do it through a good old "for loop" iterating over the elements of the lists. Here is a code working also for any type you want, not only string:
def group_list(a_list, a_type):
res = []
sublist = []
for elem in a_list:
if isinstance(elem, a_type):
# Here the element is of type a_type: append it to a sublist
sublist.append(elem)
else:
# Here the element is not of type a_type: append the sublist (if not empty) to the result list
if sublist:
res.append(sublist)
sublist = []
# If the last element of the list is of type a_type, the last sublist has not been appended: append it now
if sublist:
res.append(sublist)
return res
foo = [5,2,'a',8,4,'b','y',9, 'd','e','g']
print(group_list(foo,str))
# [['a'], ['b', 'y'], ['d', 'e', 'g']]

Get a unique list of items that occur more than once in a list

I have a list of items:
mylist = ['A','A','B','C','D','E','D']
I want to return a unique list of items that appear more than once in mylist, so that my desired output would be:
[A,D]
Not sure how to even being this, but my though process is to first append a count of each item, then remove anything equal to 1. Then dedupe, but this seems like a really roundabout, inefficient way to do it, so I am looking for advice.
You can use collections.Counter to do what you have described easily:
from collections import Counter
mylist = ['A','A','B','C','D','E','D']
cnt = Counter(mylist)
print [k for k, v in cnt.iteritems() if v > 1]
# ['A', 'D']
>>> mylist = ['A','A','B','C','D','E','D']
>>> set([i for i in mylist if mylist.count(i)>1])
set(['A', 'D'])
import collections
cc = collections.Counter(mylist) # Counter({'A': 2, 'D': 2, 'C': 1, 'B': 1, 'E': 1})
cc.subtract(cc.keys()) # Counter({'A': 1, 'D': 1, 'C': 0, 'B': 0, 'E': 0})
cc += collections.Counter() # remove zeros (trick from the docs)
print cc.keys() # ['A', 'D']
Try some thing like this:
a = ['A','A','B','C','D','E','D']
import collections
print [x for x, y in collections.Counter(a).items() if y > 1]
['A', 'D']
Reference: How to find duplicate elements in array using for loop in Python?
OR
def list_has_duplicate_items( mylist ):
return len(mylist) > len(set(mylist))
def get_duplicate_items( mylist ):
return [item for item in set(mylist) if mylist.count(item) > 1]
mylist = [ 'oranges' , 'apples' , 'oranges' , 'grapes' ]
print 'List: ' , mylist
print 'Does list have duplicate item(s)? ' , list_has_duplicate_items( mylist )
print 'Redundant item(s) in list: ' , get_duplicate_items( mylist )
Reference https://www.daniweb.com/software-development/python/threads/286996/get-redundant-items-in-list
Using a similar approach to others here, heres my attempt:
from collections import Counter
def return_more_then_one(myList):
counts = Counter(my_list)
out_list = [i for i in counts if counts[i]>1]
return out_list
It can be as simple as ...
print(list(set([i for i in mylist if mylist.count(i) > 1])))
Use set to help you do that, like this maybe :
X = ['A','A','B','C','D','E','D']
Y = set(X)
Z = []
for val in Y :
occurrences = X.count(val)
if(occurrences > 1) :
#print(val,'occurs',occurrences,'times')
Z.append(val)
print(Z)
The list Z will save the list item which occur more than once. And the part I gave comment (#), that will show the number of occurrences of each list item which occur more than once
Might not be as fast as internal implementations, but takes (almost) linear time (since set lookup is logarithmic)
mylist = ['A','A','B','C','D','E','D']
myset = set()
dups = set()
for x in mylist:
if x in myset:
dups.add(x)
else:
myset.add(x)
dups = list(dups)
print dups
another solution what's written:
def delete_rep(list_):
new_list = []
for i in list_:
if i not in list_[i:]:
new_list.append(i)
return new_list
This is my approach without using packages
result = []
for e in listy:
if listy.count(e) > 1:
result.append(e)
else:
pass
print(list(set(result)))

Merge nested list items based on a repeating value

Although poorly written, this code:
marker_array = [['hard','2','soft'],['heavy','2','light'],['rock','2','feather'],['fast','3'], ['turtle','4','wet']]
marker_array_DS = []
for i in range(len(marker_array)):
if marker_array[i-1][1] != marker_array[i][1]:
marker_array_DS.append(marker_array[i])
print marker_array_DS
Returns:
[['hard', '2', 'soft'], ['fast', '3'], ['turtle', '4', 'wet']]
It accomplishes part of the task which is to create a new list containing all nested lists except those that have duplicate values in index [1]. But what I really need is to concatenate the matching index values from the removed lists creating a list like this:
[['hard heavy rock', '2', 'soft light feather'], ['fast', '3'], ['turtle', '4', 'wet']]
The values in index [1] must not be concatenated. I kind of managed to do the concatenation part using a tip from another post:
newlist = [i + n for i, n in zip(list_a, list_b]
But I am struggling with figuring out the way to produce the desired result. The "marker_array" list will be already sorted in ascending order before being passed to this code. All like-values in index [1] position will be contiguous. Some nested lists may not have any values beyond [0] and [1] as illustrated above.
Quick stab at it... use itertools.groupby to do the grouping for you, but do it over a generator that converts the 2 element list into a 3 element.
from itertools import groupby
from operator import itemgetter
marker_array = [['hard','2','soft'],['heavy','2','light'],['rock','2','feather'],['fast','3'], ['turtle','4','wet']]
def my_group(iterable):
temp = ((el + [''])[:3] for el in marker_array)
for k, g in groupby(temp, key=itemgetter(1)):
fst, snd = map(' '.join, zip(*map(itemgetter(0, 2), g)))
yield filter(None, [fst, k, snd])
print list(my_group(marker_array))
from collections import defaultdict
d1 = defaultdict(list)
d2 = defaultdict(list)
for pxa in marker_array:
d1[pxa[1]].extend(pxa[:1])
d2[pxa[1]].extend(pxa[2:])
res = [[' '.join(d1[x]), x, ' '.join(d2[x])] for x in sorted(d1)]
If you really need 2-tuples (which I think is unlikely):
for p in res:
if not p[-1]:
p.pop()
marker_array = [['hard','2','soft'],['heavy','2','light'],['rock','2','feather'],['fast','3'], ['turtle','4','wet']]
marker_array_DS = []
marker_array_hit = []
for i in range(len(marker_array)):
if marker_array[i][1] not in marker_array_hit:
marker_array_hit.append(marker_array[i][1])
for i in marker_array_hit:
lists = [item for item in marker_array if item[1] == i]
temp = []
first_part = ' '.join([str(item[0]) for item in lists])
temp.append(first_part)
temp.append(i)
second_part = ' '.join([str(item[2]) for item in lists if len(item) > 2])
if second_part != '':
temp.append(second_part);
marker_array_DS.append(temp)
print marker_array_DS
I learned python for this because I'm a shameless rep whore
marker_array = [
['hard','2','soft'],
['heavy','2','light'],
['rock','2','feather'],
['fast','3'],
['turtle','4','wet'],
]
data = {}
for arr in marker_array:
if len(arr) == 2:
arr.append('')
(first, index, last) = arr
firsts, lasts = data.setdefault(index, [[],[]])
firsts.append(first)
lasts.append(last)
results = []
for key in sorted(data.keys()):
current = [
" ".join(data[key][0]),
key,
" ".join(data[key][1])
]
if current[-1] == '':
current = current[:-1]
results.append(current)
print results
--output:--
[['hard heavy rock', '2', 'soft light feather'], ['fast', '3'], ['turtle', '4', 'wet']]
A different solution based on itertools.groupby:
from itertools import groupby
# normalizes the list of markers so all markers have 3 elements
def normalized(markers):
for marker in markers:
yield marker + [""] * (3 - len(marker))
def concatenated(markers):
# use groupby to iterator over lists of markers sharing the same key
for key, markers_in_category in groupby(normalized(markers), lambda m: m[1]):
# get separate lists of left and right words
lefts, rights = zip(*[(m[0],m[2]) for m in markers_in_category])
# remove empty strings from both lists
lefts, rights = filter(bool, lefts), filter(bool, rights)
# yield the concatenated entry for this key (also removing the empty string at the end, if necessary)
yield filter(bool, [" ".join(lefts), key, " ".join(rights)])
The generator concatenated(markers) will yield the results. This code correctly handles the ['fast', '3'] case and doesn't return an additional third element in such cases.

Categories