Related
I have two lists:
lookup_list = [1,2,3]
my_list = [1,2,3,4,5,2,1,2,2,1,2,3,4,5,1,3,2,3,1]
I want to count how many times the lookup_list appeared in my_list with the following logic:
The order should be 1 -> 2 -> 3
In my_list, the lookup_list items doesn't have to be next to each other: 1,4,2,1,5,3 -> should generate a match since there is a 2 comes after a 1 and a 3 comes after 2.
The mathces based on the logic:
1st match: [1,2,3,4,5,2,1,2,2,1,2,3,4,5,1,3,2,3,1]
2nd match: [1,2,3,4,5,2,1,2,2,1,2,3,4,5,1,3,2,3,1]
3rd match: [1,2,3,4,5,2,1,2,2,1,2,3,4,5,1,3,2,3,1]
4th match: [1,2,3,4,5,2,1,2,2,1,2,3,4,5,1,3,2,3,1]
The lookup_list is dynamic, it could be defined as [1,2] or [1,2,3,4], etc. How can I solve it? All the answers I've found is about finding matches where 1,2,3 appears next to each other in an ordered way like this one: Find matching sequence of items in a list
I can find the count of consecutive sequences with the below code but it doesn't count the nonconsecutive sequences:
from nltk import ngrams
lookup_list = [1,2,3]
my_list = [1,2,3,4,5,2,1,2,2,1,2,3,4,5,1,3,2,3,1]
all_counts = Counter(ngrams(l2, len(l1)))
counts = {k: all_counts[k] for k in [tuple(lookup_list)]}
counts
>>> {(1, 2, 3): 2}
I tried using pandas rolling window functions but they don't have a custom reset option.
def find_all_sequences(source, sequence):
def find_sequence(source, sequence, index, used):
for i in sequence:
while True:
index = source.index(i, index + 1)
if index not in used:
break
yield index
first, *rest = sequence
index = -1
used = set()
while True:
try:
index = source.index(first, index + 1)
indexes = index, *find_sequence(source, rest, index, used)
except ValueError:
break
else:
used.update(indexes)
yield indexes
Usage:
lookup_list = [1,2,3]
my_list = [1,2,3,4,5,2,1,2,2,1,2,3,4,5,1,3,2,3,1]
print(*find_all_sequences(my_list, lookup_list), sep="\n")
Output:
(0, 1, 2)
(6, 7, 11)
(9, 10, 15)
(14, 16, 17)
Generator function find_all_sequences() yields tuples with indexes of sequence matches. In this function we initialize loop which will be stopped when list.index() call will throw ValueError. Internal generator function find_sequence() yields index of every sequence item.
According to this benchmark, my method is about 60% faster than one from Andrej Kesely's answer.
The function find_matches() returns indices where the matches from lookup_list are:
def find_matches(lookup_list, lst):
buckets = []
def _find_bucket(i, v):
for b in buckets:
if lst[b[-1]] == lookup_list[len(b) - 1] and v == lookup_list[len(b)]:
b.append(i)
if len(b) == len(lookup_list):
buckets.remove(b)
return b
break
else:
if v == lookup_list[0]:
buckets.append([i])
rv = []
for i, v in enumerate(my_list):
b = _find_bucket(i, v)
if b:
rv.append(b)
return rv
lookup_list = [1, 2, 3]
my_list = [1, 2, 3, 4, 5, 2, 1, 2, 2, 1, 2, 3, 4, 5, 1, 3, 2, 3, 1]
print(find_matches(lookup_list, my_list))
Prints:
[[0, 1, 2], [6, 7, 11], [9, 10, 15], [14, 16, 17]]
Here is a recursive solution:
lookup_list = [1,2,3]
my_list = [1,2,3,4,5,2,1,2,2,1,2,3,4,5,1,3,2,3,1]
def find(my_list, continue_from_index):
if continue_from_index > (len(my_list) - 1):
return 0
last_found_index = 0
found_indizes = []
first_occuring_index = 0
found = False
for l in lookup_list:
for m_index in range(continue_from_index, len(my_list)):
if my_list[m_index] is l and m_index >= last_found_index:
if not found:
found = True
first_occuring_index = m_index
last_found_index = m_index
found += 1
found_indizes.append(str(m_index))
break
if len(found_indizes) is len(lookup_list):
return find(my_list, first_occuring_index+1) + 1
return 0
print(find(my_list, 0))
my_list = [5, 6, 3, 8, 2, 1, 7, 1]
lookup_list = [8, 2, 7]
counter =0
result =False
for i in my_list:
if i in lookup_list:
counter+=1
if(counter==len(lookup_list)):
result=True
print (result)
I have a list that looks something like this:
lst_A = [32,12,32,55,12,90,32,75]
I want to replace the numbers with their rank. I am using this function to do this:
def obtain_rank(lstC):
sort_data = [(x,i) for i,x in enumerate(lstC)]
sort_data = sorted(sort_data,reverse=True)
result = [0]*len(lstC)
for i,(_,idx) in enumerate(sort_data,1):
result[idx] = i
return result
I am getting the following output while I use this:
[6, 8, 5, 3, 7, 1, 4, 2]
But what I want from this is:
[4, 7, 5, 3, 8, 1, 6, 2]
How can I go about this?
Try this:
import pandas as pd
def obtain_rank(a):
s = pd.Series(a)
return [int(x) for x in s.rank(method='first', ascending=False)]
#[4, 7, 5, 3, 8, 1, 6, 2]
You could use 2 loops:
l = [32,12,32,55,12,90,32,75]
d = list(enumerate(sorted(l, reverse = True), start = 1))
res = []
for i in range(len(l)):
for j in range(len(d)):
if d[j][1] == l[i]:
res.append(d[j][0])
del d[j]
break
print(res)
#[4, 7, 5, 3, 8, 1, 6, 2]
Here you go. In case, you are not already aware, please read https://docs.python.org/3.7/library/collections.html to understand defaultdict and deque
from collections import defaultdict, deque
def obtain_rank(listC):
sorted_list = sorted(listC, reverse=True)
d = defaultdict(deque) # deque are efficient at appending/popping elements at both sides of the sequence.
for i, ele in enumerate(sorted_list):
d[ele].append(i+1)
result = []
for ele in listC:
result.append(d[ele].popleft()) # repeating numbers with lower rank will be the start of the list, therefore popleft
return result
Update: Without using defaultdict and deque
def obtain_rank(listC):
sorted_list = sorted(listC, reverse=True)
d = {}
for i, ele in enumerate(sorted_list):
d[ele] = d.get(ele, []) + [i + 1] # As suggested by Joshua Nixon
result = []
for ele in listC:
result.append(d[ele][0])
del d[ele][0]
return result
My requirement is to take 'arrList' string as an argument from main and split it using comma (,) and each name is an Array (arr1, arr2, arr3). Now all 3 arrays should be appended to a list. The output should be : list: 1,2,3,7,8,9,11,22,33. But should follow below steps only while implementing.
Tried below code, but not able to convert. Any help is appreciated.
arr1 = [1,2,3]
arr2 = [7,8,9]
arr3 = [11,22,33]
list = []
def arrTest(arrList):
var = arrList.split(',')
for x in var:
#f = open(, "r")
#for x in f:
# list.append(x.rstrip('\n'))
if __name__ == '__main__':
arrList = "arr1,arr2,arr3"
arrTest(arrList)
arr1 = [1,2,3]
arr2 = [7,8,9]
arr3 = [11,22,33]
the_list = []
arrList = "arr1,arr2,arr3"
for array in arrList.split(','):
if array in locals():
the_list.extend(locals()[array])
print (the_list)
Output
[1, 2, 3, 7, 8, 9, 11, 22, 33]
locals() function returns a dictionary containing the variables defined in the local namespace.
You can use python dict data structure.
arr1 = [1,2,3]
arr2 = [7,8,9]
arr3 = [11,22,33]
d = {"arr1": arr1, "arr2": arr2, "arr3": arr3}
def arrTest(arrList):
l = []
var = arrList.split(',')
for x in var:
for e in d[x]:
l.append(e)
return l
if __name__ == '__main__':
arrList = "arr1,arr2,arr3"
l = arrTest(arrList)
print(l)
Output
[1, 2, 3, 7, 8, 9, 11, 22, 33]
Use eval function
arr1 = [1,2,3]
arr2 = [7,8,9]
arr3 = [11,22,33]
out = []
def arrTest(arrList):
list_strs = arrList.split(',')
for list_str in list_strs:
l = eval(list_str)
out.extend(l)
arrList = "arr1,arr2,arr3"
arrTest(arrList)
print(out)
[1, 2, 3, 7, 8, 9, 11, 22, 33]
I am importing labdata.txt in the code that follows. For statements, such as for i in values: will add each variable in a given parameter. For each set of parameters, e.g., [1, 3, 4,] [3, 5, 7], [67, 56, 56], my goal is to add [1] + [3] + [67] for example. Then to obtain the average of these three items.
The part that is unclear is how to take x = values[:1] and then somehow have it add that variable for x in the different parameters like explained above.
Here is the code so far:
lab_data = open("labdata.txt", "r") #import text
x = 0 # initialize variable
y = 0 # initialize variable
for aline in lab_data:
values = aline.split()
values = [int (u) for u in values]
print (values)
x = values[:1]
y = values[1:2]
print (x)
#print (y)
average = 0
ui = 0
other = 0
bs = 0
# print (sum(values[:1]))
for z in values[:1]:
other = (sum(values[:1]))
bs = bs + other
print (bs)
#print (sum(values[:1]))
ui = ui + z
average = ui / len(values)#this just gives the avg of x
print (average)
</pre>
Let me answer this very unclear question
I assume your question is: how to get an average from three different lists
lst1 = [1, 3, 4]
lst2 = [3, 5, 7]
lst3 = [67, 56, 56]
avg1 = sum([lst1[0],lst2[0],lst3[0]]) / 3
avg2 = sum([lst1[1],lst2[1],lst3[1]]) / 3
avg3 = sum([lst1[2],lst2[2],lst3[2]]) / 3
print avg1,avg2,avg3
If you are using python3, you can use statistics.mean with zip:
lst1 = [1, 3, 4]
lst2 = [3, 5, 7]
lst3 = [67, 56, 56]
from statistics import mean
a, b, c = map(mean, zip(lst1, lst2, lst3))
print(a, b, c)
For python2 create your own mean function:
from itertools import imap, izip
def mean(x):
return sum(x) / float(len(x))
a, b, c = imap(mean, izip(lst1, lst2, lst3))
print(a, b, c)
Letting each list have the same length, this is my solution:
lst1 = [1, 2, 3]
lst2 = [3, 4, 5]
lst3 = [3, 2, 1]
# list "lst" will contain the results for every index
lst =[]
for i in range(len(lst1)):
lst.append(lst1[i]/3 + lst2[i]/3 + lst3[i]/3)
print lst
I guess what you want is something like:
lab_data = open("labdata.txt", "r") #import text
int_data = [[int(val) for val in line.split()] for line in lines]
def mean(args): return float(sum(args))/len(args)
means = []
for i in range(0, len(int_data[0])):
means.append(mean([row[i] for row in int_data]))
I am trying to add an option to my program which allow the user to choose which steps of the program he wants to do.
I would like to be able to parse a string like "1-3,6,8-10" and get [1, 2, 3, 6, 8, 9, 10].
Do you know if something in Python which is doing that already exists ?
This function does what you asked. It assumes no negative numbers are used, otherwise it needs some changes to support that case.
def mixrange(s):
r = []
for i in s.split(','):
if '-' not in i:
r.append(int(i))
else:
l,h = map(int, i.split('-'))
r+= range(l,h+1)
return r
print mixrange('1-3,6,8-10')
One way using list comprehensions:
s = "1-3,6,8-10"
x = [ss.split('-') for ss in s.split(',')]
x = [range(int(i[0]),int(i[1])+1) if len(i) == 2 else i for i in x]
print([int(item) for sublist in x for item in sublist])
Outputs:
[1, 2, 3, 6, 8, 9, 10]
No builtin function as such, but can be done using xrange and generators:
from itertools import chain
s = "1-3,6,8-10"
spans = (el.partition('-')[::2] for el in s.split(','))
ranges = (xrange(int(s), int(e) + 1 if e else int(s) + 1) for s, e in spans)
all_nums = chain.from_iterable(ranges) # loop over, or materialse using `list`
# [1, 2, 3, 6, 8, 9, 10]
s = '1-3,6,8-10,13-16'
temp = [x.split('-') if '-' in x else x for x in s.split(',')]
# temp = [['1', '3'], '6', ['8', '10'], ['13', '16']]
res = []
for l in temp:
if isinstance(l, list):
a, b = map(int, l)
res += list(range(a, b + 1))
else:
res.append(int(l))
# res = [1, 2, 3, 6, 8, 9, 10, 13, 14, 15, 16]
A little function I just created:
def expand(st):
res = []
for item in st.split(','):
if '-' in item:
temp = map(int, item.split('-'))
res.extend(range(temp[0], temp[1]+1))
else:
res.append(int(item))
return res
s = '1-3,6,8-10'
print expand(s)
Returns:
[1, 2, 3, 6, 8, 9, 10]
def parseIntSet(nputstr=""):
selection = set()
invalid = set()
# tokens are comma seperated values
tokens = [x.strip() for x in nputstr.split(',')]
for i in tokens:
try:
# typically tokens are plain old integers
selection.add(int(i))
except:
# if not, then it might be a range
try:
token = [int(k.strip()) for k in i.split('-')]
if len(token) > 1:
token.sort()
# we have items seperated by a dash
# try to build a valid range
first = token[0]
last = token[len(token)-1]
for x in range(first, last+1):
selection.add(x)
except:
# not an int and not a range...
invalid.add(i)
# Report invalid tokens before returning valid selection
print "Invalid set: " + str(invalid)
return selection
Via: Parsing a list of numbers in Python
Aha, one line proof of concept anyone?
EDIT: Improved version
import itertools
s = "1-3,6,8-10"
print(list(itertools.chain.from_iterable(range(int(ranges[0]), int(ranges[1])+1) for ranges in ((el+[el[0]])[:2] for el in (miniRange.split('-') for miniRange in s.split(','))))))
Now split on several lines for readability:
print(list(itertools.chain.from_iterable(
range(
int(ranges[0]),
int(ranges[1])+1
)
for ranges in
(
(el+[el[0]])[:2] # Allows to get rid of the ternary condition by always adding a duplicate of the first element if it is alone
for el in
(miniRange.split('-') for miniRange in s.split(','))
)
)))