calculation of measures of descriptive statistics - python

In this program you CANNOT USE python libraries (pandas, numpy, etc), nor python functions (sum, etc).
Fulfilling all this, I would like to know how I could calculate these measures of my quantitative variable: mean, median and mode.
This is the data reading of my quantitative variable.
#we enter people's salaries
def salary(n):
L=[]
for elem in range(n):
print("enter the person's salary:")
L.append(float(input()))
return(L)

You have to count several numbers separately and first sort the list of numbers (the following example assumes that the list of numbers you pass in is unordered)
median: just take the middle digit of the sorted list
plural: distinguish between the presence or absence of a plural and the existence of multiple pluralities
average: sum and divide by length, try this:
def get_sort_lst(lst):
n = len(lst)
for i in range(1, n):
tmp, j = lst[i], i - 1
while j >= 0 and lst[j] > tmp:
lst[j + 1] = lst[j]
j -= 1
lst[j + 1] = tmp
return lst
def get_median(lst):
if len(lst) % 2 == 0:
n = len(lst) // 2
return (lst[n-1] + lst[n]) / 2
else:
return lst[len(lst)//2]
def get_mean(lst):
res = 0
for item in lst:
res += item
return res / len(lst)
def get_plural(lst):
res, plural = {}, []
for item in lst:
if item not in res:
res[item] = 1
else:
res[item] += 1
for k, v in res.items():
if not plural:
plural.append(k)
else:
if v > res[plural[0]]:
plural = [k]
elif k not in plural and v == res[plural[0]]:
plural.append(k)
if res[plural[0]] == 1:
return "No plural"
else:
return plural
def salary(lst):
lst = get_sort_lst(lst)
print("Mean: {}, Median: {}, Plural: {}".format(get_mean(lst), get_median(lst), get_plural(lst)))
salary([1, 2, 3, 4, 5, 5])

You may try something like this
total = 0
count = 0
for i in L:
total += i
count += 1
Mean
mean = total/count
Median
median = L[count//2]
You can see this post to calculate mode

Related

Code for consecutive strings works but can't pass random tests

In this problem, I'm given an array(list) strarr of strings and an integer k. My task is to return the first longest string consisting of k consecutive strings taken in the array. My code passed all the sample tests from CodeWars but can't seem to pass the random tests.
Here's the link to the problem.
I did it in two days. I found the max consecutively combined string first. Here's the code for that.
strarr = []
def longest_consec(strarr, k):
strarr.append('')
length = len(strarr)
cons_list = []
end = k
start = 0
freq = -length/2
final_string = []
largest = max(strarr, key=len, default='')
if k == 1:
return largest
elif 1 < k < length:
while(freq <= 1):
cons_list.append(strarr[start:end])
start += k-1
end += k-1
freq += 1
for index in cons_list:
final_string.append(''.join(index))
return max(final_string, key=len, default='')
else:
return ""
Since that didn't pass all the random tests, I compared the combined k strings on both sides of the single largest string. But, this way, the code doesn't account for the case when the single largest string is in the middle. Please help.
strarr = []
def longest_consec(strarr, k):
strarr.append('')
length = len(strarr)
largest = max(strarr, key=len, default='')
pos = int(strarr.index(largest))
if k == 1:
return largest
elif 1 < k < length:
prev_string = ''.join(strarr[pos+1-k:pos+1])
next_string = ''.join(strarr[pos:pos+k])
if len(prev_string) >= len(next_string):
res = prev_string
else:
res = next_string
return res
else:
return ""
print(longest_consec(["zone", "abigail", "theta", "form", "libe"], 2))
Let's start from the first statement of your function:
if k == 1:
while(p <= 1):
b.append(strarr[j:i])
j += 1
i += 1
p += 1
for w in b:
q.append(''.join(w))
return max(q, key=len)
Here q is finally equal strarr so you can shorten this code to:
if k == 1:
return max(strarr, key=len)
I see that second statement's condition checks if k value is between 1 and length of string array inclusive:
elif k > 1 and k <= 2*a:
...
If you want no errors remove equality symbol, last element of every array has index lesser than its length (equal exactly length of it minus 1).
Ceiling and division is not necessary in a definition, so you can shorten this:
a = ceil(len(strarr)/2)
into this:
a = len(strarr)
then your elif statement may look like below:
elif 1 < k < a: # Same as (k > 1 and k < a)
...
again, I see you want to concatenate (add) the longest string to k next strings using this code:
while(p <= 1):
b.append(strarr[j:i])
j += k-1
i += k-1
p += 1
for w in b:
q.append(''.join(w))
return max(q, key=len)
the more clearer way of doing this:
longest = max(strarr, key=len) # Longest string in array.
index = 0 # Index of the current item.
for string in strarr:
# If current string is equal the longest one ...
if string == longest:
# Join 'k' strings from current index (longest string index).
return ''.join(strarr[index:index + k])
index += 1 # Increase current index.
And the last statement which is:
elif k > 2*a or k<1:
return ""
if all previous statements failed then value is invalid so you can instead write:
return "" # Same as with else.
Now everything should work. I advice you learning the basics (especially lists, strings and slices), and please name your variables wisely so they are more readable.
You can try this as well
this has passed all the test cases on the platform you suggested.
def longest_consec(strarr, k):
i = 0
max_ = ""
res = ""
if (k<=0) or (k>len(strarr)):
return ""
while i<=(len(strarr)-k):
start = "".join(strarr[i:i+k])
max_ = max(max_, start, key=len)
if max_==start:
res=strarr[i:i+k]
i+=1
return max_
#output: ["zone", "abigail", "theta", "form", "libe", "zas", "theta", "abigail"], 2 -> abigailtheta
#output: ["zones", "abigail", "theta", "form", "libe", "zas", "theta", "abigail"],2 -> zonesabigail

How to group a list of number together based on threshold using python while loop?

How to group a list of numbers together based on a threshold using python while loop?
For example, I have a list of numbers which are [1,2,3,2,5,6,2] and the threshold is 8. My expected output should be [[1,2,3,2], [5], [6,2]]. Since 5+6 is not equal to my threshold of 8 only 5 will be return. The function should continue till it finishes the list of numbers.
def group_numbers(num_list, threshold):
i = 0
total = 0
temp_list = []
result_list = []
while i < len(num_list) and total + num_list[i] <= threshold:
total += num_list[i]
temp_list.append(num_list[i])
print(i)
print(temp_list)
i += 1
else:
result_list.append(temp_list)
total = 0
temp_list = []
return result_list
my current code will only return [[1, 2, 3, 2]] and it will not continue. Not sure what is the problem here
There are probably better ways to do it, but that's what i have:
def group_list(lst, thresh):
output =[]
foo = []
for i in lst:
if (sum(foo) + i) > thresh:
output.append(foo)
foo = [i]
else:
foo.append(i)
output.append(foo)
return output

Finding first pair of numbers in array that sum to value

Im trying to solve the following Codewars problem: https://www.codewars.com/kata/sum-of-pairs/train/python
Here is my current implementation in Python:
def sum_pairs(ints, s):
right = float("inf")
n = len(ints)
m = {}
dup = {}
for i, x in enumerate(ints):
if x not in m.keys():
m[x] = i # Track first index of x using hash map.
elif x in m.keys() and x not in dup.keys():
dup[x] = i
for x in m.keys():
if s - x in m.keys():
if x == s-x and x in dup.keys():
j = m[x]
k = dup[x]
else:
j = m[x]
k = m[s-x]
comp = max(j,k)
if comp < right and j!= k:
right = comp
if right > n:
return None
return [s - ints[right],ints[right]]
The code seems to produce correct results, however the input can consist of array with up to 10 000 000 elements, so the execution times out for large inputs. I need help with optimizing/modifying the code so that it can handle sufficiently large arrays.
Your code inefficient for large list test cases so it gives timeout error. Instead you can do:
def sum_pairs(lst, s):
seen = set()
for item in lst:
if s - item in seen:
return [s - item, item]
seen.add(item)
We put the values in seen until we find a value that produces the specified sum with one of the seen values.
For more information go: Referance link
Maybe this code:
def sum_pairs(lst, s):
c = 0
while c<len(lst)-1:
if c != len(lst)-1:
x= lst[c]
spam = c+1
while spam < len(lst):
nxt= lst[spam]
if nxt + x== s:
return [x, nxt]
spam += 1
else:
return None
c +=1
lst = [5, 6, 5, 8]
s = 14
print(sum_pairs(lst, s))
Output:
[6, 8]
This answer unfortunately still times out, even though it's supposed to run in O(n^3) (since it is dominated by the sort, the rest of the algorithm running in O(n)). I'm not sure how you can obtain better than this complexity, but I thought I might put this idea out there.
def sum_pairs(ints, s):
ints_with_idx = enumerate(ints)
# Sort the array of ints
ints_with_idx = sorted(ints_with_idx, key = lambda (idx, num) : num)
diff = 1000000
l = 0
r = len(ints) - 1
# Indexes of the sum operands in sorted array
lSum = 0
rSum = 0
while l < r:
# Compute the absolute difference between the current sum and the desired sum
sum = ints_with_idx[l][1] + ints_with_idx[r][1]
absDiff = abs(sum - s)
if absDiff < diff:
# Update the best difference
lSum = l
rSum = r
diff = absDiff
elif sum > s:
# Decrease the large value
r -= 1
else:
# Test to see if the indexes are better (more to the left) for the same difference
if absDiff == diff:
rightmostIdx = max(ints_with_idx[l][0], ints_with_idx[r][0])
if rightmostIdx < max(ints_with_idx[lSum][0], ints_with_idx[rSum][0]):
lSum = l
rSum = r
# Increase the small value
l += 1
# Retrieve indexes of sum operands
aSumIdx = ints_with_idx[lSum][0]
bSumIdx = ints_with_idx[rSum][0]
# Retrieve values of operands for sum in correct order
aSum = ints[min(aSumIdx, bSumIdx)]
bSum = ints[max(aSumIdx, bSumIdx)]
if aSum + bSum == s:
return [aSum, bSum]
else:
return None

Given a string which consists of only 0, 1 or 2s, count the number of substring which have equal number of 0s, 1s and 2s

I am trying to learn algorithm/data structure. To improve my knowledge, I am trying to solve some of the online problems.
One of the problem I am trying to solve is given at practiceque
I have tried below method:
def count_zero_one_two():
s = '102100211'
s_len = len(s)
count = 0
for i in range (s_len-1):
j = i+1
k = j+1
#print i, j, k, count
#print s[i], s[j], s[k]
if k > (s_len-1):
print "end"
break
elif (s[i] != s[j]) and (s[i] !=s[k]) and (s[j] != s[k]):
print s[i], s[j], s[k]
print "not equal"
count = count+1
#print count
else:
print s[i], s[j], s[k]
print "equal"
k = j +i
print count
count_zero_one_two()
Question: if my input string is "102100211" then count should be 5 but I am getting 4. Any idea?
I would solve it like this:
def count_zero_one_two(s):
num = 0
for i in range(len(s)):
for j in range(1, len(s)/3 + 1):
if all(s[i:i+3*j].count(n) == j for n in '012'):
num += 1
return num
all() is used to check that all the 3 characters (for each iteration) are in '012'.
The inner for loop is used to count the number of 0, 1 and 2 in sequences of length 3, 6, 9, etc.
Output:
>>> s = '0102010'
>>> count_zero_one_two(s)
2
>>>
>>> s = '102100211'
>>> count_zero_one_two(s)
5
from collections import Counter
def countSub(s):
result = []
for i in range(3, len(s), 3):
t = s[:i]
c = list(Counter(t).values())
if (c[0]==c[1]==c[2]):
result.append((t, c[0]))
return result
def count(s):
result = []
for i in range(len(s)-2):
result.extend(countSub(s[i:]))
return set(result)
ss = count("102100211")
print("%s substrings found: " % len(ss), ss)
output:
4 substrings found (not counting duplicates and empty strings):
{('021', 1), ('210021', 2), ('210', 1), ('102', 1)}

List of strings, get common substring of n elements, Python

My problem is maybe similar to this, but another situation.
Consider this list in input :
['ACCCACCCGTGG','AATCCC','CCCTGAGG']
And the other input is n,n is a number, the dimension of the substring in common in every element of the list. So the output has to be the maximum occorence substring with the number of occorences, similar to this:
{'CCC' : 4}
4 becouse in the first element of list are twice, and one time in the other two strings.CCC becouse is the longhest substring with 3 elements,that repeats at least 1 time per string
I started in that way :
def get_n_repeats_list(n,seq_list):
max_substring={}
list_seq=list(seq_list)
for i in range(0,len(list_seq)):
if i+1<len(list_seq):
#Idea : to get elements in common,comparing two strings at time
#in_common=set(list_seq[i])-set(list_seq[i+1])
#max_substring...
return max_substring
Maybe here a solution
import operator
LL = ['ACCCACCCGTGG','AATCCC','CCCTGAGG']
def createLenList(n,LL):
stubs = {}
for l in LL:
for i,e in enumerate(l):
stub = l[i:i+n]
if len(stub) == n:
if stub not in stubs: stubs[stub] = 1
else: stubs[stub] += 1
maxKey = max(stubs.iteritems(), key=operator.itemgetter(1))[0]
return [maxKey,stubs[maxKey]]
maxStub = createLenList(3,LL)
print maxStub
So this is my take on it. It is definitely not the prettiest thing on the planet but it should work just fine.
a = ['ACCCWCCCGTGG', 'AATCCC', 'CCCTGAGG']
def occur(the_list, a_substr):
i_found = 0
for a_string in the_list:
for i_str in range(len(a_string) - len(a_substr) + 1):
#print('Comparing {:s} to {:s}'.format(substr, a_string[i_str:i_str + len(substr)]))
if a_substr == a_string[i_str:i_str + len(a_substr)]:
i_found += 1
return i_found
def found_str(original_List, n):
result_dict = {}
if n > min(map(len, original_List)):
print("The substring has to be shorter than the shortest string!")
exit()
specialChar = '|'
b = specialChar.join(item for item in original_List)
str_list = []
for i in range(len(b) - n):
currStr = b[i:i+n]
if specialChar not in currStr:
str_list.append(currStr)
else:
continue
str_list = set(str_list)
for sub_strs in str_list:
i_found = 0
for strs in original_List:
if sub_strs in strs:
i_found += 1
if i_found == len(original_List):
#print("entered with sub = {:s}".format(sub_strs))
#print(occur(original_List, sub_strs))
result_dict[sub_strs] = occur(original_List, sub_strs)
if result_dict == {}:
print("No common substings of length {:} were found".format(n))
return result_dict
end = found_str(a, 3)
print(end)
returns: {'CCC': 4}
def long_substr(data):
substr = ''
if len(data) > 1 and len(data[0]) > 0:
for i in range(len(data[0])):
for j in range(len(data[0])-i+1):
if j > len(substr) and is_substr(data[0][i:i+j], data):
substr = data[0][i:i+j]
return substr
def is_substr(find, data):
if len(data) < 1 and len(find) < 1:
return False
for i in range(len(data)):
if find not in data[i]:
return False
return True
input_list = ['A', 'ACCCACCCGTGG','AATCCC','CCCTGAGG']
longest_common_str = long_substr(input_list)
if longest_common_str:
frequency = 0
for common in input_list:
frequency += common.count(longest_common_str)
print (longest_common_str, frequency)
else:
print ("nothing common")
Output
A 6

Categories