Filling a dictionary using for loop in a set - python

I have 2 strings:
s1 = "Are they here"
s2 = "yes, they are here"
I want to create a dictionary (e) that has as key the maximum number of times each shared element is present in the string that contains it the most and as value the element (i.e. the "y" is contained once in s1 and twice in s2. Therefore I want a dict that goes:
e = {2:y} # and so on
To describe my code, I thought of creating a list (c) with all the shared elements:
c = ['r', 'e', 't', 'h', 'e', 'y', 'h', 'e', 'r', 'e', 'y', 'e', 't', 'h', 'e', 'y', 'r', 'e', 'h', 'e', 'r', 'e']
then switch it to a set to eliminate duplicates and using them as iterators:
d = {'h', 'y', 'r', 't', 'e'}
Ultimately I thought of using a for loop to fill the dict (e) by iterating every element in d and reporting the maximum times it was present.
Here's my full code
please note that I don't want to use any library.
Also note that the code works with dict comprehension:
def mix(s1, s2):
c = [] # create a var to be filled with all shared chars
for i in s1:
if i != " ":
if i in s2:
c.append(i)
for i in s2:
if i != " ":
if i in s1:
c.append(i) # end of 1st process
d = set(c) # remove duplicates
e = {} # create a dict to align counting and relative char
for i in d:
a = s1.count(i)
b = s2.count(i)
m = max(a, b)
e[m] = i
# z = {i:max(s1.count(i), s2.count(i)) for i in d} this is what actually works
return e # z works instead
The issue I get is that the for loop stops after 3 iteration.

Edit: I see that Rakshith B S has made a better version of my comment, refer to thiers.
I'll start by saying I'm an amateur, and the following can absolutely be simplified.
First, decide about capitalization, A != a, use str.lower or str.upper.
Second, switching the dictionary to be {'letter':count} would make everything easier.
Then, it would most likely be easier to create two dictionaries to count the unique letters in each string.
d1 = {}
s1 = s1.lower()
for letter in s1:
if letter != " ":
if letter in d1:
d1[letter] += 1 # if in dict, add one to count
else:
d1[letter] = 1 #add new letter to dict
d2 = {}
s2 = s2.lower()
for letter in s2:
if letter != " ":
if letter in d2:
d2[letter] += 1 # if in dict, add one to count
else:
d2[letter] = 1 #add new letter to dict
That should make two dictionaries, for loop it to compare and append the max values (this part can be made more efficiently).
d3 = {}
for let in d1:
if let not in d2:
d3[let] = d1.get(let)
if let in d2:
if d1[let] >= d2[let]:
d3[let] = d1.get(let)
else:
d3[let] = d2.get(let)
for let in d2:
if let not in d1:
d3[let] = d2.get(let)
del d3[',']
This should at least get you on the right track.

I have just realized that sets can obviously have UNIQUE values as keys, so, of course my code will be display "partially".
When it gets the same key, it overwrites it.
So using the element as key will work and the for loop can be like so:
for i in d:
a = s1.count(i)
b = s2.count(i)
m = max(a, b)
e[i] = m

def mix(s1, s2):
dict1 = dict()
dict2 = dict()
for i in s1:
if i != " " and i != ",":
if i in dict1:
dict1[i] += 1
else:
dict1[i] = 1
for i in s2:
if i != " " and i != ",":
if i in dict2:
dict2[i] += 1
else:
dict2[i] = 1
# print(dict1)
# print(dict2)
for key, value in dict2.items():
if key in dict1:
# print(f' check {key}, {value}')
if value >= dict1[key]:
dict1[key] = value
else:
dict1[key] = value
#print(f' create {key}, {value}')
return {v: k for k, v in dict1.items()} #inverted
s1 = "eeeeaaabbbcccc"
s2 = "eeeeeaaa"
print(mix(s1, s2))
Why create a merged list and recheck against the counter set
Here I've compared values from dict1( which is s1) and dict2(again s2) and overwritten dict1 if the value is high else if its not found I've assigned it as the highest
OUTPUTS:
{'e': 5, 'a': 3, 'b': 3, 'c': 4}
{5: 'e', 3: 'b', 4: 'c'}
This might end up overwriting as 'a' is overwritten by 'b'

Related

Python - Replace string characters and get all combinations

I want to map the dictionary d = {'R': ['a', 'g'], 'Y': ['c', 't']} to the string s = '----YY----RR----' to get the following output:
----cc----aa----
----cc----ag----
----cc----ga----
----cc----gg----
----ct----aa----
----ct----ag----
----ct----ga----
----ct----gg----
----tc----aa----
----tc----ag----
----tc----ga----
----tc----gg----
----tt----aa----
----tt----ag----
----tt----ga----
----tt----gg----
My (very) inefficient code is as below:
seqs = set()
for k,v in d.items():
for i in v:
i_seq = seq.replace(k,i,1)
for n in v:
n_seq = i_seq.replace(k,n,1)
for k2,v2 in d.items():
for i2 in v2:
i2_seq = n_seq.replace(k2,i2,1)
for n2 in v2:
n2_seq = i2_seq.replace(k2,n2,1)
if not 'Y' in n2_seq and not 'R' in n2_seq:
seqs.add(n2_seq)
What is a smarter way to do that?
A general approach without itertools:
def replaceCombinations(s, d):
string_set = set([s])
for i in range(len(s)):
if s[i] in d.keys():
new_set = set()
for c in string_set:
new_set.update(set(c.replace(s[i], new_char, 1) for new_char in d[s[i]]))
string_set = new_set
return string_set
string = "----YY----RR----"
d = {'R': ['a', 'g'], 'Y': ['c', 't']}
for c in sorted(replaceCombinations(string, d)):
print(c)
Use itertools.product:
from itertools import product
for p in product(product(d['R'],repeat=2),product(d['Y'],repeat=2)):
print(f'----{p[0][0]}{p[0][1]}-----{p[1][0]}{p[1][1]}-----')
----aa-----cc-----
----aa-----ct-----
----aa-----tc-----
----aa-----tt-----
----ag-----cc-----
----ag-----ct-----
----ag-----tc-----
----ag-----tt-----
----ga-----cc-----
----ga-----ct-----
----ga-----tc-----
----ga-----tt-----
----gg-----cc-----
----gg-----ct-----
----gg-----tc-----
----gg-----tt-----

Replace character in string and form all possible combination

I am doing text recognition with pytesseract. Sometimes text is not properly extracted.
For example, "DDR4" might be interpreted as "ODR4"
Hence I have a dictionary which record all possible escape, and code to detect how many char needed to be replace and its index for example,
my_dictionary= {
'D': ['O', '0'],
'O': 'D',
'0': 'D'
}
user_input = "DDR4"
char_to_replace = 0
char_index = []
for index, val in enumerate(user_input):
if val in my_dictionary:
char_to_replace += 1
char_index.append(index)
In this case, how could I produce a list of all possible combination, for example
D0R4, DOR4, 00R4, 0OR4, OOR4, O0R4, 0DR4, ODR4
Appreciate for any inputs
This is what i have come up with, pretty ugly code and im sure it can be done much easier with itertools, but what the heck - i hope it helps:
my_dictionary = {
'D': ['O', '0'],
'O': 'D',
'0': 'D'
}
user_input = "DDR4"
def replace_char(variable=None, replace_index=None, replace_with=None):
"""Supplementary function"""
return variable[:replace_index] + replace_with + variable[replace_index+1:]
# create maximum required iterations (max len of list in dict)
number_of_iterations_required = max([len(f) for f in my_dictionary.values()])
# list for preliminary word combinations
baseword_combinations = [user_input]
for key, val in my_dictionary.items():
for idx, char in enumerate(user_input):
if char == key:
for v in val:
baseword_combinations.append(replace_char(variable=user_input, replace_index=idx, replace_with=v))
# list for final returns, again append initial input
possible_combinations = [user_input]
for word in baseword_combinations:
for idx, char in enumerate(word):
for key, val in my_dictionary.items():
for v in val:
if char == key:
possible_combinations.append(replace_char(variable=word, replace_index=idx, replace_with=v))
if char == val:
possible_combinations.append(replace_char(variable=word, replace_index=idx, replace_with=key))
# get rid of duplicates, print result
print(list(set(possible_combinations)))
Result:
['OOR4', '00R4', 'ODR4', 'D0R4', '0DR4', 'DDR4', '0OR4', 'O0R4', 'DOR4']
Edit
The part with the number_of_iterations_required was unused in my above code, also i reworked it a little to use list comprehension - which makes it much less understandable, but much shorter, so here you go:
my_dictionary = {
'D': ['O', '0'],
'O': 'D',
'0': 'D'
}
user_input = "DDR4"
def replace_char(variable=None, replace_index=None, replace_with=None):
"""Supplementary function"""
return variable[:replace_index] + replace_with + variable[replace_index+1:]
# list for preliminary word combinations
base = [user_input]
base.extend([replace_char(user_input, idx, v) for key, val in my_dictionary.items()
for idx, char in enumerate(user_input) for v in val if char == key])
# list for final results
final_results = [user_input]
final_results.extend([replace_char(word, idx, key) for word in base for idx, char in enumerate(word)
for key, val in my_dictionary.items() for v in val if char == val])
result = list(set(final_results))
print(result)
I'm try to solve this problem in this way,hope that can help you:
import itertools as it
my_list= ['D','O','0']
result = []
input = 'DDR4'
# 'DDR4' -> '**R4'
for i in range(len(my_list)):
if input[i] in my_list:
input = input.replace(input[i],'*')
for e in it.product('DO0',repeat=input.count('*')):
a = list(e)
input_copy = input
for i in a:
# print(i)
input_copy = input_copy.replace('*',i,1)
result.append(input_copy)
result:
['DDR4', 'DOR4', 'D0R4', 'ODR4', 'OOR4', 'O0R4', '0DR4', '0OR4', '00R4']

I have two letters with equal count in a name, how to print both the letters with their count

I need to display the letter and it's count if it has maximum count in a name. However, I have two letters (n:2, u:2) with equal count in a name, how to print both the letters with their count as they have maximum and equal count. I could only do for one letter.
name = 'Annuu'
name = name.lower()
names = set(name)
highest = 0
p = ''
for i in names:
if name.count(i) > highest:
highest = name.count(i)
p = i
print(f"{p} {highest}")
You can use Counter object to find the count.
Then find the maximum count to filter the letters.
from collections import Counter
name = "annuu"
count_dict = Counter(name)
max_count = max(count_dict.values())
for letter, count in count_dict.items():
if count == max_count:
print(letter, count)
This is without using any imports:
name = "Onnuu"
name = name.lower()
names = set(name)
print(names)
l = []
for i in names:
l.append((name.count(i),i))
l.sort(reverse = True)
for i in l:
if l[0][0] == i[0]:
print(i[1])
Store the values in dict and find the max_frequency
name = 'Annuu'
name = name.lower()
d={}
for i in name:
d[i]=d.get(i,0)+1
max_freq = max((d.values()))
for k,v in sorted(d.items(),key=lambda (x,y):(y,x), reverse=True):
if v == max_freq:
print(k,v)
else:
break
The following code works and produces the output:
The maximum characters and their respective count is as follows:
n 2
u 2
name = 'Annuu'
name = name.lower()
names = set(name)
name_count_dict = {} # Use dictionary because of easy mapping between character and respective max
for current_char in names:
# First save the counts in a dictionary
name_count_dict[current_char] = name.count(current_char)
# Use the max function to find the max (only one max at this point but will find the remaining in the lines below)
max_char = max(name_count_dict, key=name_count_dict.get)
max_value = name_count_dict[max_char]
# Find all other characters which match the max-value, i.e. all maximums
array_of_all_maxes = [k for k, v in name_count_dict.items() if v == max(name_count_dict.values())]
print("The maximum characters and their respective count is as follows:")
for max_chars in array_of_all_maxes:
print(f"{max_chars} {max_value}")
I think this would be a simple solution for the problem without using any external package like collections.
Here, I written 2 test cases and repeated the same lines of code. You haven't to do like that. What you have more than 2, 3 etc. So it's better to write any other function to test the code by passing different values to it.
def get_count_and_highest (name):
name = name.lower()
names = set(name)
highest = 0
d = {}
for ch in names:
count = name.count(ch)
if count >= highest:
highest = count
if highest in d:
d[highest].append(ch)
else:
d[highest] = [ch]
return highest, d
#Test case 1
highest, d = get_count_and_highest("Annuu")
l = d.get(highest, []) # in case if dictionary d is empty then l will be an empty list
output = {ch: highest for ch in l}
print(highest) # 2
print(d) # {1: ['a'], 2: ['n', 'u']}
print(l) # ['n', 'u']
print (output) # {'u': 2, 'n': 2}
# Test case 2
highest, d = get_count_and_highest("Babylon python new one")
l = d.get(highest, []) # in case if dictionary d is empty then l will be an empty list
output = {ch: highest for ch in l}
print(highest) # 4
print(d) # {3: ['o', 'p', 'l', 'b', 't', ' ', 'h', 'y', 'w'], 4: ['n', 'e', 'a']}
print(l) # ['n', 'e', 'a']
print (output) # {'n': 4, 'e': 4, 'a': 4}
An example with collections.Counter
from collections import Counter
name = 'Annuu'
c = Counter(name.lower())
mc = c.most_common()
max_count = mc[0][1]
for i, x in enumerate(mc):
if x[1] < max_count:
break
print(mc[:i+1]) # [('n', 2), ('u', 2)]

Joining dictionary terms to form string

So, I have a dictionary of terms where each key is a word from a text file, and the value is a list of the next two words in that text file.
def dict(txt, n):
txt = txt.split()
output = {}
for i in range(len(txt)-n+1):
t = ' '.join(txt[i:i+1])
p = text[i+1:i+n]
output.setdefault(t, 0)
output[t] = p
return output
The output is a dictionary of things like:
{'had':['a','little'], 'Mary':['had','a'], 'a': ['little', 'lamb.']}
(Mine is actually much longer, as it is analyzing a long paper.)
My question is, how do I join these terms back together by reading the key, and then printing the values, then reading the last value and then finding a key that matches that value. The goal is ultimately to get a randomized paragraph, provided using a large document.
So far, I have something along the lines of:
if output[t] == text[1]:
return output
print(output.join(' ')
But this isn't returning anything. Suggestions?
Python's join does not work like you expect, perhaps.
You are thinking that you write
collection.join(join_character)
but it is
join_character.join(collection)
You should expect to write code like
' '.join(output)
What exactly you need for output is up to you; I expect you can figure that part out. It just looks like you were using join incorrectly here.
This will add terms until dic does not contain key.
dic = {'had': ['a', 'little'], 'Mary': ['had', 'a'], 'a': ['little', 'lamb.']}
key = 'Mary'
res = []
while True:
try:
res.extend(dic[key])
key = dic[key][-1]
except KeyError:
break
print ' '.join(res)
This yields:
['had', 'a', 'little', 'lamb.']
Be aware: You will enter an infinite loop if all values also are a key. You will also encounter this if there is a repeating sequence in your dictionary, such as
{'a': ['b', 'c'], 'b': ['a', 'c'], 'c': ['a', 'b'], 'foo': ['bar', 'foobar']}
To avoid this, you could do one out of two things:
Set a maximum iteration value
Stop the iteration when you encounter a key that previously have been seen.
Maximum iteration value:
dic = {'had': ['a', 'little'], 'Mary': ['had', 'a'], 'a': ['little', 'lamb.']}
key = 'Mary'
res = []
max_iterations = 10
i = 0
while i < max_iterations
try:
res.extend(dic[key])
key = dic[key][-1]
except KeyError:
break
i += 1
if i > max_iterations:
break
print ' '.join(res)
Stop at previously seen key
dic = {'had': ['a', 'little'], 'Mary': ['had', 'a'], 'a': ['little', 'lamb.']}
key = 'Mary'
res = []
seen_keys = []
while True:
if key in seen_keys:
break
try:
res.extend(dic[key])
seen_keys.append(key)
key = dic[key][-1]
except KeyError:
break
print ' '.join(res)
Your data structure dict['word1'] = ('word2', 'word3') requires you to search into the data values, which is very inefficient.
It would be much easier to look up if it was organized as dict[('word1', 'word2')] = ['possible', 'word3', 'values'].
from itertools import tee, izip
from collections import defaultdict
from random import choice
def triwise(iterable):
a,b,c = tee(iter(iterable), 3)
next(b, None)
next(c, None)
next(c, None)
return izip(a, b, c)
def make_lookup(txt):
res = defaultdict(list)
words = ['', ''] + txt.strip().split() + ['', '']
for w1, w2, w3 in triwise(words):
res[(w1, w2)].append(w3)
return dict(res)
def make_sentence(lookup):
w1, w2 = '', ''
words = []
while True:
w1, w2 = w2, choice(lookup[(w1, w2)])
if w2 == '':
return ' '.join(words)
else:
words.append(w2)
def main():
txt = 'Mary had a little lamb whose fleece was white as snow'
lookup = make_lookup(txt)
print(make_sentence(lookup))
if __name__=="__main__":
main()

py3k: mapping dictionary (string->number) into list (of strings)

Assume we have dictionary that translates strings into numbers.
How to reverse it into list ?
Let assume, we can fill not mapped numbers with empty string ''.
Here example how it works:
>>> dic_into_list({'x':0, 'z':2, 'w':3})
['x', '', 'z', 'w']
d = {'x':0, 'z':2, 'w':3}
lst = [""] * (max(d.values()) + 1)
for k, v in d.items():
lst[v] = k
print(lst)
prints
['x', '', 'z', 'w']
The simplest way is to flip the dict and then iterate up to the maximum value (now key) in the dict:
original = {'x':0, 'z':2, 'w':3}
d = dict((v, k) for k, v in original.iteritems())
print [d.get(i, '') for i in range(max(d) + 1)]
I share my current solution: (I look for shorter and cleared implementation in other posts):
def dic_into_list(dic):
maxindex = max([v for i,v in dic.items()])
dicrev = {num:name for name,num in dic.items()}
l=[]
for i in range(0,maxindex+1):
if i in dicrev:
l.append(dicrev[i])
else:
l.append('')
return l

Categories