Recently I've heared about #lru_chace in python and I tried it out with the following function:
#lru_cache(maxsize=1024)
def n_words_frequency(document: str, numberOfWords = 1) -> dict:
"""
This function returns a sorted dict that contatins the frequency of words in document
"""
words = document.split()
queue = deque()
dict = {}
update_dict = dict.update
append_right = queue.append
pop_left = queue.popleft
for idx, word in enumerate(words):
# We need to create the queue
if idx < numberOfWords - 1:
append_right(word)
# Queue size equals to numOfWords
else:
append_right(word)
expre = ' '.join(queue)
if expre in dict:
dict[expre] += 1
else:
update_dict({expre : 1})
pop_left()
return {k[::-1] : v for k, v in sorted(dict.items(), reverse=True, key=lambda item: item[1])}
The function get a string document and number of wanted words in expression and returns all of the expression with thier frequence.
For example, "my name is liam what is your name" and numberOfWords equal to 1 will return:
{'name': 2, 'is': 2, 'my': 1, 'liam': 1, 'what': 1, 'your': 1}
Now, when I added #lru_chace I got improvment of more than X150 in speed and after reading the chace_info I saw that the #lru_chace have'nt done anything
chace_info -> CacheInfo(hits=0, misses=1, maxsize=1024, currsize=1)
Can someone please explain me why was it so helpful?
Related
I have an example dictionaty for rules, quantifiers, and transformations, essentially, inside each key there belongs another key containing ids equal to id. I am trying to find all those that match and return these id's that match as a dictionary in this format:
dictionary = {'rules':[...], 'quantifiers':[...], 'transformations':[...]}
Here is the sample:
test_dict = {
'rules': [{'id': 123,'logic': '{"$or":[{"$and":[{"baseOperator":null,"operator":"does_not_contain_ignore_case","operand1":"metrics.123","operand2":"metrics.456"}]}]}',},
{'id': 589,
'logic': '{"$or":[{"$and":[{"baseOperator":null,"operator":"does_not_contain_ignore_case","operand1":"metrics.123","operand2":0}, {"baseOperator":null,"operator":"does_not_contain_ignore_case","operand1":"metrics.456","operand2":0}]}]}',},
{'id': 51,
'logic': '{"$or":[{"$and":[{"baseOperator":null,"operator":"does_not_contain_ignore_case","operand1":"metrics.789","operand2":"metrics.1"}]}]}',},],
'quant': [{'id':123,
'transIds': [1, 2, 3],
'qualifiedId': 'metrics.123'},
{'id':456,
'transIds': [1, 6],
'qualifiedId': 'metrics.456'},
{'id':789,
'transIds': [9],
'qualifiedId': 'metrics.789'}],
'trans': [{'id':1,
'rules': [123, 120]},
{'id':6,
'rules':[589, 2]}]
}
Here was my attempt, however, I realised that the list trans, rules would be specific to each index ID, therefore, because rules is first in the test_dict, then the loop won't capture it because all values side by it are empty.
Essentially, I wanted to enter logic and capture all values metric that belong to the ids in quantifiers
Capture all ids from quantifiers that match the values inside attr
attr = [123, 456]
keys = list(test_dict.keys())
trans = []
rules = []
for iter in range(len(keys)):
for in_iter in range(len(test_dict[keys[iter]])):
if test_dict[keys[iter]][in_iter].get('id') in attr:
if test_dict[keys[iter]][in_iter].get('transIds') is not None:
for J in test_dict[keys[iter]][in_iter].get('transIds'):
trans.append(J)
if test_dict[keys[iter]][in_iter].get('id') in trans:
if test_dict[keys[iter]][in_iter].get('rules') is not None:
for K in test_dict[keys[iter]][in_iter].get('rules'):
rules.append(K)
if test_dict[keys[iter]][in_iter].get('id') in rules:
if test_dict[keys[iter]][in_iter].get('logic') is not None:
print(test_dict[keys[iter]][in_iter].get('logic'))
I figured it out thanks to the comments; Instead of running it all inside a single loop, then I split the loops into parts which solved the list issue. However, the lines of code is far too long for this attempt:
attr = [123, 456]
keys = list(test_dict.keys())
trans = []
rules = []
qualified = []
quant_id = set()
import json
for iter in range(len(keys)):
for in_iter in range(len(test_dict[keys[iter]])):
if test_dict[keys[iter]][in_iter].get('id') in attr:
qualified.append(test_dict[keys[iter]][in_iter].get('qualifiedId'))
if test_dict[keys[iter]][in_iter].get('transIds') is not None:
for J in test_dict[keys[iter]][in_iter].get('transIds'):
trans.append(J)
trans2 = set()
for iter in range(len(keys)):
for in_iter in range(len(test_dict[keys[iter]])):
if test_dict[keys[iter]][in_iter].get('id') in trans:
trans2.add(test_dict[keys[iter]][in_iter].get('id'))
if test_dict[keys[iter]][in_iter].get('rules') is not None:
for K in test_dict[keys[iter]][in_iter].get('rules'):
rules.append(K)
rules2 = set()
for iter in range(len(keys)):
for in_iter in range(len(test_dict[keys[iter]])):
if test_dict[keys[iter]][in_iter].get('id') in rules:
rules2.add(test_dict[keys[iter]][in_iter].get('id'))
if test_dict[keys[iter]][in_iter].get('logic') is not None:
logic = json.loads(test_dict[keys[iter]][in_iter].get('logic'))
ks_or = list(logic.keys())
for or_ in range(len(logic)):
for unl_or_ in range(len(logic[ks_or[or_]])):
and_logic = logic[ks_or[or_]][unl_or_]
ks_and = list(logic[ks_or[or_]][unl_or_].keys())
for and_ in range(len(and_logic)):
for unl_and_ in range(len(and_logic[ks_and[and_]])):
if and_logic[ks_and[and_]][unl_and_].get('operand1') in qualified:
quant_id.add(and_logic[ks_and[and_]][unl_and_].get('operand1').split('.')[-1])
elif and_logic[ks_and[and_]][unl_and_].get('operand2') in qualified:
quant_id.add(and_logic[ks_and[and_]][unl_and_].get('operand2').split('.')[-1])
else:
continue
dictionary = {'rules':rules2, 'transformations': trans2, 'quantifiers': quant_id}
print(dictionary)
Result:
{'rules': {123, 589}, 'transformations': {1, 6}, 'quantifiers': {'456', '123'}}
Updated with set instead of list so only unique values remain.
Link to problem statement
Please help. I am very confused on how to execute this:
This is what I currently have:
def similarityAnalysis(paragraph1, paragraph2):
dict = {}
for word in lst:
if word in dict:
dict[word] = dict[word] + 1
else:
dict[word] = 1
for key, vale in dict.items():
print(key, val)
see below.
For find common words we use set intersection
For counting we use a dict
Code
lst1 = ['jack','Jim','apple']
lst2 = ['chair','jack','ball','steve']
common = set.intersection(set(lst1),set(lst2))
print('commom words below:')
print(common)
print()
print('counter below:')
counter = dict()
for word in lst1:
if word not in counter:
counter[word] = [0,0]
counter[word][0] += 1
for word in lst2:
if word not in counter:
counter[word] = [0,0]
counter[word][1] += 1
print(counter)
output
commom words below:
{'jack'}
counter below:
{'jack': [1, 1], 'Jim': [1, 0], 'apple': [1, 0], 'chair': [0, 1], 'ball': [0, 1], 'steve': [0, 1]}
Analysing your code as follows:
You use the variable name dict which is a reserved keyword (for creating dictionaries). By using this as a variable name, you will loose the ability to use the dict function.
The function uses a variable named lst which is not one of its arguments. Where do the values for this variable come from?
In the second for loop, you use the variable name vale but then later reference a different variable called val.
Otherwise, looks good. There may be other issues, that's as far as I got.
Recommend googling the following and seeing what code you find
"Python count the number of words in a paragraph"
Update:
There are many ways to do this, but here's one answer:
def word_counts(lst):
counts = {}
for word in lst:
counts[word] = counts.get(word, 0) + 1
return counts
def similarityAnalysis(paragraph1, paragraph2):
lst1 = paragraph1.split()
lst2 = paragraph2.split()
counts1 = word_counts(lst1)
counts2 = word_counts(lst2)
common_words = set(lst1).intersection(lst2)
return {word: (counts1[word], counts2[word]) for word in common_words}
paragraph1 = 'one three two one two four'
paragraph2 = 'one two one three three one'
print(similarityAnalysis(paragraph1, paragraph2))
Output:
{'three': (1, 2), 'one': (2, 3), 'two': (2, 1)}
Hello I asked this question previously and I wanted to adjust the code that I have now. I want to adjust this code so that if a letter is not present in a text string it still returns the value 0 to it assigned.
count = {}
for l in text.lower():
if l in let:
if l in count.keys():
count[l] += 1
else:
count[l] = 1
return count
It currently returns this:
example = "Sample String"
print(func(example, "sao")
{'s': 2, 'a' : 1}
This would be my desired output
example = "Sample String"
print(func(example, "sao"))
{'s': 2, 'a' : 1, 'o' :0}
If you don't mind using tools designed especially for your purpose, then the following will do:
from collections import Counter
def myfunc(inp, vals):
c = Counter(inp)
​return {e: c[e] for e in vals}
s = 'Sample String'
print(myfunc(s, 'sao')
Otherwise you can explicitly set all missing values in your functions.
def func(inp, vals):
count = {e:0 for e in vals}
for s in inp:
if s in count:
count[s] += 1
return count
# create a function
def stringFunc(string, letters):
# convert string of letters to a list of letters
letter_list = list(letters)
# dictionary comprehension to count the number of times a letter is in the string
d = {letter: string.lower().count(letter) for letter in letter_list}
return d
stringFunc('Hello World', 'lohdx')
# {'l': 3, 'o': 2, 'h': 1, 'd': 1, 'x': 0}
You can use a Dict Comprehensions and str.count:
def count_letters(text, letters):
lower_text = text.lower()
return {c: lower_text.count(c) for c in letters}
print(count_letters("Sample String", "sao"))
result: {'s': 2, 'a': 1, 'o': 0}
You can use collections.Counter and obtain character counts via the get method:
from collections import Counter
def func(string, chars):
counts = Counter(string.lower())
return {c: counts.get(c, 0) for c in chars}
I'm trying to write a code that does the following:
Takes a number of strings as input
Splits each string into two contiguous substrings of equal length
Returns the minimum number of characters to change to make the two substrings into anagrams of one another (if it's not possible, it must return -1).
Sample Input
6
aaabbb
ab
abc
mnop
xyyx
xaxbbbxx
Sample Output
3
1
-1
2
0
1
For a more detailed explanation about the problem, kindly check this link (no login or sign-up needed).
I've approached the solution pretty well, but it seems like I'm not getting something right, my output is usually a bit greater or smaller than what's expected, and I really don't know what's causing the problem. Here's my code:
n = int(input())
user_input = []
for k in range(n):
user_input.append(input())
results = []
for i in user_input:
if len(list(i))%2 == 0:
left = i[:len(list(i))//2]
right = i[len(list(i))//2:]
left_dict = dict((letter,left.count(letter)) for letter in set(left))
right_dict = dict((letter,right.count(letter)) for letter in set(right))
if left_dict == right_dict:
results.append(0)
else:
shared_items = {k: left_dict[k] for k in left_dict if k in right_dict and left_dict[k] == right_dict[k]}
results.append(len(left) - len(shared_items))
else:
results.append(-1)
print(results)
I appreciate any help in advance.
You started great, with calculating the count for each character in both the substrings, but you never used this power technically.
In this statement:
shared_items = {k: left_dict[k] for k in left_dict if k in right_dict and left_dict[k] == right_dict[k]}
you just calculate items that are in both the dictionary and have same count:
eg. in your 6th testcase:
xaxbbbxx
left_dict will be {'b': 1, 'a': 1, 'x': 2}
right_dict will be {'b': 2, 'x': 2}
and shared_item the way you calculate will give you: {'x':2}
But this doesn't correctly list all the items that are shared.
The correct no. of shared_items should be : {'x':2, 'b':1}
So for that,
What we could then do is calculate the minimum of the item quantities common in the left_dict and right_dict.
i.e. min(left_dict[k],right_dict[k])
the result.append statement will also change accordingly:
else:
shared_items = {k:min(left_dict[k],right_dict[k]) for k in left_dict if k in right_dict}
results.append(len(left)-sum(shared_items.values()))
Full execution:
n = int(input())
user_input = []
for k in range(n):
user_input.append(input())
results = []
for i in user_input:
if len(list(i))%2 == 0:
left = i[:len(list(i))//2]
right = i[len(list(i))//2:]
left_dict = dict((letter,left.count(letter)) for letter in set(left))
right_dict = dict((letter,right.count(letter)) for letter in set(right))
if left_dict == right_dict:
results.append(0)
else:
shared_items = {k:min(left_dict[k],right_dict[k]) for k in left_dict if k in right_dict}
results.append(len(left)-sum(shared_items.values()))
else:
results.append(-1)
print(results)
Input:
6
aaabbb
ab
abc
mnop
xyyx
xaxbbbxx
Output:
[3, 1, -1, 2, 0, 1]
which you could then ofcourse use and print '\n'.join(results) to get the output in the required format.
Two words are anagrams if the same letters appear with the same occurrencies.
from collections import Counter
sl = ["aaabbb", "ab", "abc", "mnop", "xyyx", "xaxbbbxx"]
def f(s):
if len(s)%2 != 0:
return -1
a = s[:len(s)//2]
b = s[len(s)//2:]
print(Counter(b) - Counter(a))
return sum( (Counter(b) - Counter(a)).values() )
list(map(f, sl))
Counter({'b': 3})
Counter({'b': 1})
Counter({'o': 1, 'p': 1})
Counter()
Counter({'b': 1})
[3, 1, -1, 2, 0, 1]
I have a String like this str = "aabcccdfffeeeeettaaaattiioccc"
I need output like this Result ={aa: 1;b:1;ccc:2;d:1;fff:1;eeeee:1;tt:2;aaaa:1;ii:1;o:1;ccc:1}
I have tried it like this so far:
def repeating_letters(the_string):
temp = []
count = 0
for i in range(len(the_string)):
if(the_string[i] == the_string[i]):
if(the_string[i] == the_string[i+1]):
temp = the_string[i]
# count = count+1
print(the_string[i])
if name__== "__main":
the_string = "aaafassskfahfioejwwa"
repeating_letters(the_string)
Hints
I would follow this steps:
Create a list where I will store my partial strings
Start iterating the string
Store the initial position and the current character
Keep iterating until the character is different
Store in the list the partial string from the initial position you stored until 1 less than the current position
Update the initial position to the current one and the current character
Use the list to create a collections.Counter
About your code, the_string[i] == the_string[i] will always be true.
SPOILER: solution
from collections import Counter
def repeating_letters(the_string):
partials = []
initial = 0
for i, character in enumerate(the_string):
if character == the_string[initial]:
continue
partials.append(the_string[initial:i])
initial = i
partials.append(the_string[initial:]) # Needed for the last partial string
return Counter(partials)
As #prahantrana mentions in a comment, getting the partials can be done in a one-liner with the groupby method from the itertools library.
from collections import Counter
from itertools import groupby
def repeating_letters(the_string):
return Counter(''.join(group) for _, group in groupby(the_string))
Or
from collections import Counter
from itertools import groupby
def repeating_letters(the_string):
return Counter(char*len(list(group)) for char, group in groupby(the_string))
I'm not sure which of them is faster.
from collections import Counter
from itertools import groupby
def splitter(text):
"""
text: str
return : frequency of continous characters
"""
string = [''.join(group) for key, group in groupby(text)]
return Counter(string)
l = 'aaaabcccdfffeeeeettfffaaaattiioccceeeeeeaaaa'
print(splitter(l))
output
Counter({'aaaa': 3, 'ccc': 2, 'fff': 2, 'tt': 2, 'b': 1, 'd': 1, 'eeeee': 1, 'ii': 1, 'o': 1, 'eeeeee': 1})
other way , coded method, not using any library
from collections import Counter
def function(string):
"""
string: str
return: frequency of continous same character
"""
res = []
tmp = []
if len(string)==0:
return Counter('')
val = string[0]
for i in range(1, len(string)):
if string[i] == val:
tmp.append(val)
val =string[i]
else:
tmp.append(val)
res.append(tmp)
tmp = []
val = string[i]
tmp.append(val)
res.append(tmp)
p = [''.join(i) for i in res]
return Counter(p)
l ='aaaabcccdfffeeeeettfffaaaattiioccceeeeeeaaaa'
print(function(l))
output
Counter({'aaaa': 3, 'ccc': 2, 'fff': 2, 'tt': 2, 'b': 1, 'd': 1, 'eeeee': 1, 'ii': 1, 'o': 1, 'eeeeee': 1})