find the first n most frequent characters in it - python

Given a string, you have to find the first n most frequent characters in it.
If there are two letters with the same frequency, then the alphabetically earlier value should be picked first:
string= "aabbccc"
n =2
list = []
#write your code here
char_dict = {}
for char in string:
if char not in char_dict:
char_dict[char] = 1
else:
char_dict[char] += 1
sorted_dict=sorted(char_dict.items(), key=lambda x: (x[1],x[0]))
sorted_dict = sorted_dict[-2:]
for key, value in sorted_dict:
list.append(key)
print(list)
My output is ['b', 'c'] but it should actually be c and a.

The problem is with the sorting. You need to sort by two fields in different directions (1 ascending and 1 descending). Change the sorted_dict 2 lines to:
sorted_dict = sorted(char_dict.items(), key=lambda x: (-x[1], x[0]))
sorted_dict = sorted_dict[:n]
btw: Avoid using python's keywords (such as list) as variable names. Name it myList or something similar.

Below is one of the perfect solution for your problem(with less code)
string=input()
n=int(input())
import collections
out=[collections.Counter(string).most_common(i+1)[i][0] for i in range(n)]
out.sort()
print(out)

I added the print outputs after the statement.
The code should be selfdescripting.
from collections import defaultdict
string= "aabbccc"
n = 2
result = defaultdict(int)
for char in string:
result[char] += 1
print(result) # defaultdict(<class 'int'>, {'b': 2, 'a': 2, 'c': 3})
ordered_result = sorted(result.items(), key=lambda x: (-x[1], x[0]))
print(ordered_result) # [('c', 3), ('a', 2), ('b', 2)]
ordered_list = [x[0] for x in ordered_result]
print(ordered_list[:n]) # ['c', 'a']

def char_frequency(string,n):
letter_freq = dict()
for letter in string:
if letter not in letter_freq.keys():
letter_freq[letter] = 1
else:
letter_freq[letter] += 1
list_of_tuples = sorted(letter_freq.items(), key=lambda x: (-x[1],x[0]))[:n]
print(list_of_tuples)
final_list = []
for tup in list_of_tuples:
final_list.append(tup[0])
return(sorted(final_list))
print(char_frequency("aabbccc",2))

string="good"
dictionary = {}
n=2
for char in string:
if(char in dictionary. keys()):
dictionary[char]+=1
else:
dictionary[char]=1
duplicate=[]
for char in dictionary:
if(dictionary[char] ==n):
print(char)

string="aabbccc"
unique_chars = list(set(string))
count_chars = map(lambda i: string.count(i), unique_chars)
order = sorted(zip(unique_chars, count_chars), key=lambda x: (-x[1], x[0]))
n=2
nth=order[:n]
the variable order is basically mapping each letter to its count
or in 1 line
sorted(zip(list(set(string)), map(lambda i: string.count(i),list(set(string)))), key=lambda x: (-x[1], x[0]))[:2]
The result is in the following format
[('c', 3), ('a', 2)]

Related

I want to sort a dictionary descending by number and then ascending alphabetically

I have this code
L = ['c', 'c', 'b', 'a','b']
from collections import Counter
import numpy as np
x = Counter(L)
dict1 = {k: v for k, v in sorted(x.items(), key=lambda item: -item[1])}
print(dict1)
It gives me {'c': 2, 'b': 2, 'a': 1}
But I want it to return me this: {'b': 2,'c': 2, 'a': 1}
if I turn it into a list and use sorted It will return me this [('a', 1), ('b', 2), ('c', 2)] which is not what I want, because so, the list will lose the descending sort by number.
b and c have the same precedence since they have the same value. Thus, their order is maintained, and since c appeared first in L, it will also appear first in x and in the dict comprehension expression from it. If you want b to come before c you could explicitly add a secondary key to the key lambda:
dict1 = {k: v for k, v in sorted(x.items(), key=lambda item: (-item[1], item[0]))}
# Here ---------------------------------------------------------------^

List to nested list and dictionary

I have this one long list and want to convert it to a nested list and a dictionary.
L= ["a","abc","de","efg","", "b","ijk","lm","op","qr","", "c","123","45","6789"]
output:
nested list:
[["a","abc","de","efg"], ["b","ijk","lm","op","qr"], ["c","123","45","6789"]]
dictionary:
{"a":["abc","de","efg"],
"b":["ijk","lm","op","qr"], "c":["123","45","6789] }
Can anyone tell me how to do that in python?
And I can't import anything
I assume the groups are separated by the empty strings. For this you can use itertools.groupby:
from itertools import groupby
data = ["a","abc","de","efg","", "b","ijk","lm","op","qr","", "c","123","45","6789"]
nl = [list(g) for k, g in groupby(data, ''.__ne__) if k]
d = {next(g): list(g) for k, g in groupby(data, ''.__ne__) if k}
print(nl)
print(d)
Results:
[['a', 'abc', 'de', 'efg'], ['b', 'ijk', 'lm', 'op', 'qr'], ['c', '123', '45', '6789']]
{'a': ['abc', 'de', 'efg'], 'b': ['ijk', 'lm', 'op', 'qr'], 'c': ['123', '45', '6789']}
In the groupby I'm using ''.__ne__ which is the function for "not equal" of an empty string. This way it's only capturing groups of non-empty strings.
EDIT
I just read that you cannot import. Here's a solution just using a loop:
nl = [[]]
for s in data:
if s:
nl[-1].append(s)
else:
nl.append([])
And for the dict:
itr = iter(data)
key = next(itr)
d = {key: []}
while True:
try: val = next(itr)
except StopIteration: break
if val:
d[key].append(val)
else:
key = next(itr)
d[key] = []
Here's how to convert L to a nested list:
L= ["a","abc","de","efg","","b","ijk","lm","op","qr","","c","123","45","6789"]
nested_list_L = []
temp = []
for item in L:
if item != "":
temp.append(item)
else:
nested_list_L.append(temp)
temp = []
nested_list_L.append(temp)
And here's how to convert L to a dictionary:
L= ["a","abc","de","efg","","b","ijk","lm","op","qr","","c","123","45","6789"]
dict_L = {}
temp = []
key = ""
for item in L:
if len(item) == 1:
key = item
elif len(item) > 1:
temp.append(item)
else:
dict_L[key] = temp
temp = []
key = ""
dict_L[key] = temp
From my understanding, you are trying to:
Split a list by empty string, then
Convert the resulting nested list into a dictionary, using first element of each sub-list as the key and the rest as value.
You can certainly accomplish the task without any imports. To split a list, just iterate over it and build the nested list along the way:
def split(data, on):
nested = []
curr = []
for x in data:
if x == on:
nested.append(curr)
curr = []
else:
curr.append(x)
if curr != [] or data[-1:] == [on]:
nested.append(curr)
return nested
Then, again, iterate over this nested list to build your desired dictionary:
def build_dict(key_valss):
d = {}
for key_vals in key_valss:
if key_vals != []:
key = key_vals[0]
vals = key_vals[1:]
d[key] = vals
return d
Compose the two functions to get what you want:
>>> build_dict( split(data = ["a","abc","de","efg","", "b","ijk","lm","op","qr","", "c","123","45","6789"] , on = '') )
{'a': ['abc', 'de', 'efg'], 'b': ['ijk', 'lm', 'op', 'qr'], 'c': ['123', '45', '6789']}

How to add index to a list inside a dictionary

I have a dictionary here:
dict = {'A':['1','1','1','1','1'], 'B':['2','2'], 'C':['3','3','3','3']}
What is the necessary process to get the following result?
dict = {'A':['1_01','1_02','1_03','1_04','1_05'], 'B':['2_01','2_02'], 'C':['3_01','3_02','3_03','3_04']}
I have been learning python for quite a while now but dictionary is kind of new to me.
As others mentioned, refrain from using built-in keywords as variable names, such as dict. I kept it for simplicity on your part.
This is probably the most pythonic way of doing it (one line of code):
dict = {key:[x+"_0"+str(cnt+1) for cnt,x in enumerate(value)] for key,value in dict.items()}
You could also iterate through each dictionary item, and then each list item and manually change the list names as shown below:
for key,value in dict.items():
for cnt,x in enumerate(value):
dict[key][cnt] = x+"_0"+str(cnt+1)
Also, as some others have mentioned, if you want numbers greater than 10 to save as 1_10 rather than 1_010 you can you an if/else statement inside of the list comprehension...
dict = {key:[x+"_0"+str(cnt+1) if cnt+1 < 10 else x+"_"+str(cnt+1) for cnt,x in enumerate(value)] for key,value in dict.items()}
First iterate on keys.
Then loop on keys you are getting on key like for 'A' value is ['1','1','1','1','1'] then we can change the element at ['1','1','1','1','1']
enumerate() helps you iterate on index,value then index starts with zero as per your expected output add 1 to index. As you want the trailing 0 before each count we did '%02d'% (index+1)
Like this:
dict = {'A':['1','1','1','1','1'], 'B':['2','2'], 'C':['3','3','3','3']}
for i in dict.keys(): #iterate on keys
for index,val in enumerate(dict[i]): #took value as we have key in i
element='%02d'% (index+1) #add trailing 0 we converted 1 to int 01
dict[i][index]=val+"_"+ str(element) #assign new value with converting integer to string
print(dict)
Output:
{'A': ['1_01', '1_02', '1_03', '1_04', '1_05'], 'C': ['3_01', '3_02', '3_03', '3_04'], 'B': ['2_01', '2_02']}
Use enumerate to iterate over list keeping track of index:
d = {'A':['1','1','1','1','1'], 'B':['2','2'], 'C':['3','3','3','3']}
newd = {}
for k, v in d.items():
newd[k] = [f'{x}_0{i}' for i, x in enumerate(v, 1)]
print(newd)
Also a dictionary-comprehension:
d = {k: [f'{x}_0{i}' for i, x in enumerate(v, 1)] for k, v in d.items()}
Note: Don't name your dictionary as dict because it shadows the built-in.
d= {'A':['1','1','1','1','1'], 'B':['2','2'], 'C':['3','3','3','3']}
{x:[j + '_'+ '{:02}'.format(i+1) for i,j in enumerate(y)] for x,y in d.items()}
adict = {'A':['1','1','1','1','1'], 'B':['2','2'], 'C':['3','3','3','3'], 'D': '23454'}
newdict = {}
for i,v in adict.items():
if isinstance(v, list):
count = 0
for e in v:
count += 1
e += '_0' + str(count)
newdict[i] = newdict.get(i, [e]) + [e]
else:
newdict[i] = newdict.get(i, v)
print (newdict)
#{'A': ['1_01', '1_01', '1_02', '1_03', '1_04', '1_05'], 'B': ['2_01', '2_01', '2_02'], 'C': ['3_01', '3_01', '3_02', '3_03', '3_04'], 'D': '23454'}
This solution will check for whether your value in the dictionary is a list before assigning an index to it
You can use a dictcomp:
from itertools import starmap
d = {
'A': ['1', '1', '1', '1', '1'],
'B': ['2', '2'],
'C': ['3', '3', '3', '3']
}
f = lambda x, y: '%s_%02d' % (y, x)
print({k: list(starmap(f, enumerate(v, 1))) for k, v in d.items()})
# {'A': ['1_01', '1_02', '1_03', '1_04', '1_05'], 'B': ['2_01', '2_02'], 'C': ['3_01', '3_02', '3_03', '3_04']}

py3k: mapping dictionary (string->number) into list (of strings)

Assume we have dictionary that translates strings into numbers.
How to reverse it into list ?
Let assume, we can fill not mapped numbers with empty string ''.
Here example how it works:
>>> dic_into_list({'x':0, 'z':2, 'w':3})
['x', '', 'z', 'w']
d = {'x':0, 'z':2, 'w':3}
lst = [""] * (max(d.values()) + 1)
for k, v in d.items():
lst[v] = k
print(lst)
prints
['x', '', 'z', 'w']
The simplest way is to flip the dict and then iterate up to the maximum value (now key) in the dict:
original = {'x':0, 'z':2, 'w':3}
d = dict((v, k) for k, v in original.iteritems())
print [d.get(i, '') for i in range(max(d) + 1)]
I share my current solution: (I look for shorter and cleared implementation in other posts):
def dic_into_list(dic):
maxindex = max([v for i,v in dic.items()])
dicrev = {num:name for name,num in dic.items()}
l=[]
for i in range(0,maxindex+1):
if i in dicrev:
l.append(dicrev[i])
else:
l.append('')
return l

TypeError: 'int' object is not iterable. Why am i getting this error? please help

def get_top_k(frequency, k):
temp = frequency
key = ""
tvalues = []
values = []
kk = int(k)
i = 0
for i in temp.keys():
key = i
num = [int(frequency[key])]
tvalues += num
tvalues = bubble_sort(tvalues)
i = 0
for i in kk:
num = [int(tvalues[i])]
values += num
print(values)
i = 0
result = {}
for i in kk:
result += {(str(temp[values[i]])):(int(values[i]))}
return result
Perhaps you meant
for i in range(kk):
a bit off topic, but:
for i in temp.keys():
key = i
num = [int(frequency[key])]
tvalues += num
should just be:
tvalues = temp.values()
example:
>>> D = {'a':1, 'b':2, 'c':3, 'd':4}
>>> D.keys()
['a', 'c', 'b', 'd']
>>> D.values()
[1, 3, 2, 4]
>>> D.items()
[('a', 1), ('c', 3), ('b', 2), ('d', 4)]
>>>
and it looks like your code could be changed to this:
>>> D = {'a':1, 'b':2, 'c':3, 'd':4}
>>> def get_top_k(D, k):
... return sorted(D.items(), reverse=True, key=lambda x: x[1])[:k]
...
>>> get_top_k(D, 2)
[('d', 4), ('c', 3)]
>>>
You have for i in kk and kk is just an integer. You can't iterate over an integer, you can only iterate over a sequence/iterable.
You probably want for i in range(kk) if you want to iterate from 0 to (kk-1).
Because kk = int(k)
kk is only one single number, not an array of numbers
What are you trying to do, for us to help you fixing it?

Categories