count sublists that has a specific term in python - python

I'm a newbie I want to write a function that outputs the count of sublists that contain a particular element. But my function just outputs the total count of that particular term in all the sublists.
My function:
def count(myList):
tmp = []
d = {}
for item in myList: tmp += item
for key in tmp: d[key] = d.get(key, 0) + 1
return d
My output:
>>res = count_doc_frequencies([['a', 'b', 'a'], ['a', 'b', 'c'], ['a']])
>>res['a']
4
>>res['b']
2
Desired output:
>>res = count_doc_frequencies([['a', 'b', 'a'], ['a', 'b', 'c'], ['a']])
>>res['a']
3
As 'a' is present in 3 sublists..
can anyone help me modify my function to achieve the desired output ??

lst = [['a', 'b', 'a'], ['a', 'b', 'c'], ['a']]
def count(lst):
# declare dictionary that we are going to return
foo = {}
# iterate sublist
for sublist in lst:
# make sublist into unique element list
sublist = list(set(sublist))
for element in sublist:
# if element found in foo dic, increment
if element in foo:
foo[element] += 1
# else, init with 1
else:
foo[element] = 1
return foo
res = count(lst)
print res

You should change this statement
tmp += item
to
tmp += set(item)
This will eliminate the duplication count of elements in your sublists.

Another way to write this will be
def count(myList,ele):
tmp = []
key = 0
for item in myList:
if ele in item:
key += 1
return key

Related

Making list that takes in value if value appears more than once

If my list has values that appears more than once I want to do the following:
my_list = ['a','b','c','a','a','b']
I want that my_list becomes ['a','b','c']
and at the same time new_list = ['a','a','b']
I have started with the code but can't manage to finish it:
def func(word):
tgt = 1
found = []
lst = [1,2,3,45,6,1]
if lst.count(word)> 1:
found.append(word)
return found, lst
print(func(1))
You can iterate through the list, store the element in one list if it is not visited or in a new list if it is already visited:
my_list = ['a','b','c','a','a','b']
visited, lst, new_list = set(), [], []
for x in my_list:
if x not in visited:
lst.append(x)
visited.add(x)
else:
new_list.append(x)
print(lst, new_list)
# ['a', 'b', 'c'] ['a', 'a', 'b']
my_list = ['a','b','c','a','a','b']
new_list = my_list.copy()
my_list = list(set(my_list))
my_list.sort()
# remove unique items from new_list
for item in my_list:
new_list.pop(new_list.index(item))
I'm going to use a collections.Counter() to count up the occurrences of each items in the list. At that point the keys() become your new my_list and then we will use some list multiplication to construct your new_list.
import collections
data = ['a','b','c','a','a','b']
counted = collections.Counter(data)
At this point finding your new my_list is dead simple:
my_list = list(counted)
print(my_list)
gives you:
['a', 'b', 'c']
Now we can leverage the fact that ['a'] * 4 == ['a', 'a', 'a', 'a'] to construct a list from they keys of counted based on the number of times they were identified.
new_list = []
for key, value in counted.items():
if value > 1:
new_list.extend([key]*(value-1))
print(new_list)
This will give us back:
['a', 'a', 'b']
Full Solution:
import collections
data = ['a','b','c','a','a','b']
counted = collections.Counter(data)
my_list = list(counted)
new_list = []
for key, value in counted.items():
if value > 1:
new_list.extend([key]*(value-1))
print(my_list, new_list)
lst = ["a","b","a","c","d","b"]
new_list=[]
for i in lst:
if i not in new_list: # check the new list if values repeat or not
new_list.append(i) # add the repeating values in new list
for i in new_list:
if i in lst:
lst.remove(i) # remove the repeating values from first list
print(lst,new_list)
at first you can add the repeating values in a different list then remove these values from your first list.
l = ['a', 'b', 'c', 'a', 'c']
arr = []
for x in l:
if x not in a:
arr.append(x)
# now arr only have the non repeating elements of the array l

How to number elements in a list of string and return a new list?

So I have a list of strings. I want to create a new list of string which turns the same string into a new string and name it "A". If there's a different string in the list, name it "B" and so on.
If the string is:
['F4','A3','F4','B5','A3','K2']
Then it should give me a result of:
['A','B','A','C','B','D']
I don't know how to start the code and can only think of something like a dictionary.
dict = {}
result = []
for line in list1:
if line not in dict:
dict.update({line:str(chr(65+len(dict)))})
result.append(dict.get(line))
Then I don't know how to continue. Any help will be appreciated.
You can make an iterator of ascii upper-case strings and pull them off one-at-a-time in a defaultdict constructor. One you have that, it's just a list comprehension. Something like:
import string
from collections import defaultdict
keys = iter(string.ascii_uppercase)
d = defaultdict(lambda: next(keys))
l = ['F4','A3','F4','B5','A3','K2']
[d[k] for k in l]
# ['A', 'B', 'A', 'C', 'B', 'D']
import string
mapping = {}
offset = 0
for item in l:
if item in mapping:
continue
mapping[item] = string.ascii_uppercase[offset]
offset += 1
[mapping.get(item) for item in l]
Output
['A', 'B', 'A', 'C', 'B', 'D']
You can create a simple class to store the running results:
import string
class L:
def __init__(self):
self.l = {}
def __getitem__(self, _v):
if (val:=self.l.get(_v)) is not None:
return val
self.l[_v]= (k:=string.ascii_uppercase[len(self.l)])
return k
l = L()
vals = ['F4','A3','F4','B5','A3','K2']
result = [l[i] for i in vals]
Output:
['A', 'B', 'A', 'C', 'B', 'D']

Split a list in python with an element as the delimiter? [duplicate]

This question already has answers here:
Python split for lists
(6 answers)
Closed 2 years ago.
I want to create sub-lists from a list which has many repeating elements, ie.
l = ['a', 'b', 'c', 'c', 'b', 'a', 'b', 'c', 'b', 'a']
Wherever the 'a' begins the list should be split. (preferably removing 'a' but not a must)
As such:
l = [ ['b', 'c', 'c', 'b'], ['b', 'c', 'b'] ]
I have tried new_list = [x.split('a')[-1] for x in l] but I am not getting the desired "New list" effect.
When you write,
new_list = [x.split('a')[-1] for x in l]
you are essentially performing,
result = []
for elem in l:
result.append(elem.split('a')[-1])
That is, you're splitting each string contained in l on the letter 'a', and collecting the last element of each of the strings into the result.
Here's one possible implementation of the mechanic you're looking for:
def extract_parts(my_list, delim):
# Locate the indices of all instances of ``delim`` in ``my_list``
indices = [i for i, x in enumerate(my_list) if x == delim]
# Collect each end-exclusive sublist bounded by each pair indices
sublists = []
for i in range(len(indices)-1):
part = my_list[indices[i]+1:indices[i+1]]
sublists.append(part)
return sublists
Using this function, we have
>>> l = ['a', 'b', 'c', 'c', 'b', 'a', 'b', 'c', 'b', 'a']
>>> extract_parts(l, 'a')
[['b', 'c', 'c', 'b'], ['b', 'c', 'b']]
You can use zip and enumerate to do that. Create a list of ids for separation and just break it at those points.
size = len(l)
id_list = [id + 1 for id, val in
enumerate(test_list) if val == 'a']
result = [l[i:j] for i, j in zip([0] + id_list, id_list +
([size] if id_list[-1] != size else []))]
It will not include the delimiter
import itertools
lst = ['a', 'b', 'c', 'c', 'b', 'a', 'b', 'c', 'b', 'a']
delimiter = lst[0]
li=[list(value) for key,value in itertools.groupby(lst, lambda e: e == delimiter) if not key]
print(li)
Explanation: groupby function will create a new group each time key will change
Key value
True itertools._grouper object pointing to group 'a'
False itertools._grouper object pointing to group 'b', 'c', 'c', 'b'
True itertools._grouper object pointing to group 'a'
False itertools._grouper object pointing to group 'b', 'c', 'b'
True itertools._grouper object pointing to group 'a'
In if condition checking if the key is false, return the itertools._grouper object and then pass itertool object to list.
Create a counters array for each element you want to split at then write a condition in this fashion:
l = ['a', 'b', 'c', 'c', 'b', 'a', 'b', 'c', 'b', 'a']
counters = [0,0,0] #to count instances
index = 0 #index of l
startIndex = 0 #where to start split
endIndex = 0 #where to end split
splitLists = [] #container for splits
for element in l:
if element == 'a': #find 'a'
counters[0] += 1 #increase counter
if counters[0] == 1: #if first instance
startIndex = index + 1 #start split after
if counters[0] == 2:
endIndex = index #if second instance
splitList = l[startIndex:endIndex] #end split here
counters[0] = 1 #make second starting location
startIndex = index + 1
splitLists.append(splitList) #append to main list of lists
index += 1
print(splitLists)
So basically you are finding the start and end index of the matching pattern within the list. You use these to split the list, and append this list into a main list of lists (2d list).

There is a list from which i need to find duplicates

I have written a code, which is able to do so. However, i am not able to find the exact value. The code i have written is sharing all values with me, instead of just 1 duplicate value
some_list=['a','b','c','b','d','m','n','n']
a=[]
for items in some_list:
if some_list.count(items) > 1:
a.append(items)
print (a)
Your code is appending all duplicates value to new list i.e. 'a'. To keep only 1 duplicate value, you need to check whether the same value is already there in your list or not. If this is not there, then only you should append that value to your list. I have modified your code, to suite this need .
enter code here
some_list=['a','b','c','b','d','m','n','n']
a=[]
for items in some_list:
if some_list.count(items) > 1 and a.count(items) == 0:
a.append(items)
print (a)
Output:
['b', 'n']
some_list=['a','b','c','b','d','m','n','n']
a=[]
for items in some_list:
if some_list.count(items) > 1:
if items not in a:
a.append(items)
print (a)
output:
['b', 'n']
or:
some_list=['a','b','c','b','d','m','n','n']
a=[]
for items in some_list:
if some_list.count(items) > 1:
a.append(items)
a=set(a)
print (a)
out:
['b', 'n']
We can utilize Counter module from Collections package.
from collections import Counter
final_list = []
cntr = Counter(['a','b','c','b','d','m','n','n'])
for i in cntr.iteritems():
if i[1] > 1:
final_list.append(i[0])
print final_list
For more info on Counter module please go through https://docs.python.org/2/library/collections.html#collections.Counter
Counter and defaultdict data structures from collections module perfectly suits the use-case, here is another approach using normal dict data structure which may be more intuitive for beginners in understanding the approach.
Approach:
Step-1: Building a dictionary with item as key and its occurrences as value.
Step-2 Filter out items whose occurrences is less than 2, in other words, select items whose occurrences are greater than 1.
def get_duplicates(some_list):
frequency_counter = {}
# Step-1
for item in some_list:
if item in frequency_counter:
frequency_counter[item] += 1
else:
frequency_counter[item] = 1
# Step-2
return [item for item, count in frequency_counter.items() if count > 1]
In [2]: get_duplicates(['a','b','c','b','d','m','n','n'])
Out[2]: ['b', 'n']
you can use list comprehension with collections.Counter:
from collections import Counter
[e for e, freq in Counter(some_list).items() if freq > 1]
output:
['b', 'n']
another approach will be to sort your list:
previous = None
elemetns = set()
for i in sorted(some_list):
if previous == i:
elemetns.update(i)
previous = i
print(elements)
output:
{'b', 'n'}
Your code will work,If you'd add one more condition in your if Statement.
some_list = ['a', 'b', 'c', 'b', 'd', 'm', 'n', 'n']
a = []
for items in some_list:
if some_list.count(items) > 1 and items not in a:
a.append(items)
print (a)
Output:-
['b', 'n']

Replace values in nested list with dict in Python?

I have a nested list:
my_list = ['a','b','c','dog', ['pig','cat'], 'd']
and a dict:
my_dict = {'dog':'c','pig':'a','cat':'d'}
I would like to use the dict, such that I get a list:
new_list = ['a', 'b', 'c', 'c', ['a', 'd'], 'd']
I've tried something like:
new_list = []
for idx1, item1 in enumerate(my_list):
for idx2, item2 in enumerate(item1):
new_list[idx1][idx2] = my_dict[item2]
but I get an error when item2 does not exist in the dict. Is there a better way to do this?
You could write a simple recursive function that uses list comprehension to generate the result. If item on your list is a list then recurse, otherwise use dict.get with default parameter to convert the item:
my_list = ['a','b','c','dog', ['pig','cat'], 'd']
my_dict = {'dog':'c','pig':'a','cat':'d'}
def convert(l, d):
return [convert(x, d) if isinstance(x, list) else d.get(x, x) for x in l]
print(convert(my_list, my_dict))
Output:
['a', 'b', 'c', 'c', ['a', 'd'], 'd']
You look l ike you're trying to substitute into a nested list. Try this function this will recursively swap the values for you:
def replace_vals(_list, my_dict):
new_list = []
for item in _list:
if isinstance(item, list):
new_list.append(replace_vals(item, my_dict))
elif item in my_dict:
new_list.append(my_dict.get(item))
else:
new_list.append(item)
return new_list
print replace_vals(my_list, my_dict)
my_list = ['a','b','c','dog', ['pig','cat'], 'd']
my_dict = {'dog':'c','pig':'a','cat':'d'}
def replace(item):
if item in my_dict:
return my_dict[item]
else:
return item
new_list=[];
for item in my_list:
if isinstance(item,list):
temp_list=[]
for subitem in item:
temp_list.append(replace(subitem))
new_list.append(temp_list)
else:
new_list.append(replace(item))
print new_list
This piece of code will not work if the list inside my_list is nested in itself.

Categories