Efficiently searching a pair of ordered lists with noise - python

Assuming two data sets are in order and that they contain pairwise matches, what is an efficient way to discover the pairs? There can be noise in either list.
From sets A,B the set C will consist of pairs (A[X1],B[Y1]),(A[X2],B[Y2]),...,(A[Xn],B[Yn]) such that X1 < X2 < ... < Xn and Y1 < Y2 < ... < Yn.
The problem can be demonstrated with the simplified Python block, where the specifics of how a successful pair is validated is irrelevant.
Because the validation condition is irrelevant, the condition return_pairs(A, B, validate) == return_pairs(B, A, validate) is not required to hold, given that the data in A,B need not be the same, just that there must exist a validation function for (A[x],B[y])
A = [0,0,0,1,2,0,3,4,0,5,6,0,7,0,0,8,0,0,9]
B = [1,2,0,0,0,0,0,3,0,0,4,0,5,6,0,0,7,0,0,8,0,9]
B1 = [1,2,0,0,0,0,0,3,0,0,4,0,5,6,0,0,7,7,7,0,0,8,0,9]
def validate(a,b):
return a and b and a==b
def return_pairs(A,B, validation):
ret = []
x,y = 0,0
# Do loops and index changes...
if validation(A[x], B[y]):
ret.append((A[x], B[y]))
return ret
assert zip(range(1,10), range(1,10)) == return_pairs(A,B,validate)
assert zip(range(1,10), range(1,10)) == return_pairs(A,B1,validate)

Instead of iterating each list in two nested loops you can first remove the noise according to your own criteria, then create a third list with the filtered elements and run each item (being a newly formed tuple) of the list against your validation. This is assuming I understood the question correctly, which I think I didn't really:
Demo
A = [0,0,0,1,2,0,3,4,0,5,6,0,7,0,0,8,0,0,9]
B = [1,2,0,0,0,0,0,3,0,0,4,0,5,6,0,0,7,0,0,8,0,9]
def clean(oldList):
newList = []
for item in oldList:
if 0<item and (not newList or item>newList[-1]):
newList.append(item)
return newList
def validate(C):
for item in C:
if item[0] != item[1]:
return False
return True
C = zip(clean(A),clean(B))
#clean(A):[1, 2, 3, 4, 5, 6, 7, 8, 9]
#clean(B):[1, 2, 3, 4, 5, 6, 7, 8, 9]
#list(C):[(1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 7), (8, 8), (9, 9)]
#validate(C): True

A solution. O(n2)
def return_pairs(A,B, validation):
ret = []
used_x, used_y = -1,-1
for x, _x in enumerate(A):
for y, _y in enumerate(B):
if x <= used_x or y <= used_y:
continue
if validation(A[x], B[y]):
used_x,used_y = x,y
ret.append((A[x], B[y]))
return ret

Related

Index to closest coordinate

I have this function
A=[(1,2,3),(2,3,4)]
B=[(2,4,3),(1,8,1),(2,3,5),(1,5,3)]
def closestNew(A,B):
C = {}
for bp in B:
closestDist = -1
for ap in A:
dist = sum(((bp[0]-ap[0])**2, (bp[1]-ap[1])**2, (bp[2]-ap[2])**2))
if(closestDist > dist or closestDist == -1):
C[bp] = ap
closestDist = dist
return C
That will return the closest coordinate between the two lists.
Output:
{(1, 2, 3): (2, 4, 3), (2, 3, 4): (2, 3, 5)}
However, I want the index of array B (the points that matched with array A (check output)) as well in a seperate list, any ideas?
Return
idx=[0,2]
A=[(1,2,3),(2,3,4)]
B=[(2,4,3),(1,8,1),(2,3,5),(1,5,3)]
C={(1, 2, 3): (2, 4, 3), (2, 3, 4): (2, 3, 5)}
C is a dictionary where it values correspond to points on B.
idx=[] # an empty list
for x in C.values():
idx.append(B.index(x)) # index function to find the index of values in B
print(idx)
#[0, 2]
If you want to calcule the closest point to A, is better to have A as a outer loop and B as inside loop, in that way you can iterate for every A through all B's. Also you can use enumerate to know what index you are in the loop.
a = [(1,2,3),(2,3,4)]
b =[(2,4,3),(1,8,1),(2,3,5),(1,5,3)]
# store reference for the min index-point
index = []
C = {}
for indexA, ap in enumerate(a):
# Assume the max distance
closestDist = 1e9
for indexB,bp in enumerate(b):
dist = sum(((bp[0]-ap[0])**2, (bp[1]-ap[1])**2, (bp[2]-ap[2])**2))
if(dist < closestDist):
C[ap] = bp
closestDist = dist
# Initialize the list if not have value for the i-th of A
if indexA + 1 > len(index):
index.append(indexB)
else:
index[indexA] = indexB
print(index)
return C

Create a list of pairs from two lists using recursion

I need to create a function that takes two lists as arguments and returns a list of the pairs of the elements in the two lists using recursion in python 3.x.
The input create_all_pairs([1,2], [3,4]) should give me :
[(1,3), (1,4), (2,3), (2,4)].
I have created this function in 3 differen ways: using for-loops, using while-loops and using list comprehension.
def create_all_pairs_for(xs, ys):
lst = []
for x in xs:
for y in ys:
lst.append((x,y))
return lst
def create_all_pairs_while(xs, ys):
lst = []
xscount = 0
yscount = 0
while xscount < len(xs):
while yscount < len(ys):
lst.append((xs[xscount], ys[yscount]))
yscount += 1
xscount += 1
yscount = 0
return lst
def create_all_pairs_listcomp(xs, ys):
lst = [(a,b) for a in xs for b in ys]
return lst
How can i write this function using recursion? This is what i have got so far, but i feel completely lost.
def create_all_pairs_rec(xs, ys):
if not xs:
return []
else:
return list(map(create_all_pairs_rec(xs, ys)), ys)
The following would be a recursive implementation:
def create_all_pairs(xs, ys):
if not (xs and ys):
return []
return [(xs[0], y) for y in ys] + create_all_pairs(xs[1:], ys)
While this is a bit of cheat, as it only uses recursion to reduce the xs, here is a true recursive divide'n'conquer solution that decreases the problem size recursively for both xs and ys:
def create_all_pairs(xs, ys):
if not (xs and ys): # base case 1: any empty list
return []
if len(xs) == len(ys) == 1: # base case 2: two singleton lists
return [(xs[0], ys[0])]
mid_x, mid_y = len(xs) // 2, len(ys) // 2
return create_all_pairs(xs[:mid_x], ys[:mid_y]) + create_all_pairs(xs[:mid_x], ys[mid_y:]) + \
create_all_pairs(xs[mid_x:], ys[:mid_y]) + create_all_pairs(xs[mid_x:], ys[mid_y:])
>>> create_all_pairs([1, 2], [3, 4])
[(1, 3), (1, 4), (2, 3), (2, 4)]
>>> create_all_pairs([1, 2, 3], [3, 4, 5])
[(1, 3), (1, 4), (1, 5), (2, 3), (3, 3), (2, 4), (2, 5), (3, 4), (3, 5)]
All pairs is the same as the cartesion product.
We can adapt this answer for using recursion to compute cartesion product: Cross product of sets using recursion (which has a good explanation)
An advantage of this function is that it works for an arbitrary number of lists (i.e. 1, 2, 3, etc.).
def create_all_pairs(*seqs):
if not seqs:
return [[]]
else:
return [[x] + p for x in seqs[0] for p in create_all_pairs(*seqs[1:])]
print(create_all_pairs([1,2], [3,4]))
Output
[[1, 3], [1, 4], [2, 3], [2, 4]]
Another recursive implementation, which also adds entries in a more sequential order to the final list of pairs, as compared to the answer above:
def create_all_pairs(list1, list2, resulting_list, index1=0, index2=0):
if index1 < len(list1) and index2 < (len(list2)-1):
resulting_list.insert(0, create_all_pairs(list1, list2, resulting_list, index1, index2+1))
elif index1 < (len(list1)-1) and index2 >= (len(list2)-1):
resulting_list.insert(0, create_all_pairs(list1, list2, resulting_list, index1+1, 0))
if index1 == 0 and index2 == 0:
resulting_list.insert(0, (list1[index1], list2[index2]))
return (list1[index1], list2[index2])
resulting_list = list()
create_all_pairs([1, 2, 3], [3, 4, 5], resulting_list)
print("Resulting list is:", resulting_list)
Result:
Resulting list is: [(1, 3), (1, 4), (1, 5), (2, 3), (2, 4), (2, 5), (3, 3), (3, 4), (3, 5)]
def all_pairs(x, y):
return x and y and [(x[0], y[0])] + all_pairs(x[:1], y[1:]) + all_pairs(x[1:], y)
Based on #schwobaseggl's "true recursive" solution, just splitting differently.
find_all_pairs(xs,ys,ret):
if xs == []: #basecase
return ret #return the list we built
else:
left = xs.pop() #we take an element out of the left list
for right in ys: #for every element in the right list
ret.append((left,right)) #we append to the list were building a (left,right) tuple
return find_all_pairs(xs,ys,ret) #call the function again with the decremented xs and the appended ret

quick way to do def pair_sum()

Does anyone know how to do this in a simple and effective way?
Thanks
Define a function called pair_sum() which takes two inputs: a list of integers and a total.
The function should return a list of tuples, where each value in the tuple is a unique value from the input list, and where the sum of the tuple elements equals the total. Each pair of values in the input list that sums to the total should only appear once in the output list. For example, if the input list is [3, 2, 1] and the total is 4, then the output list will only contain the tuple (3, 1) and not the tuple (1, 3). In other words, if (i, j) is a tuple in the output list, then i should appear to the left of j in the input list.
For example:
Test Result
print(pair_sum([4, 6, 2, 7, 3], 10))
[(4, 6), (7, 3)]
print(pair_sum([4, 7, 8, 9, 3, 2, 6, 11, 1, 5, 10], 14))
[(4, 10), (8, 6), (9, 5), (3, 11)]
#xaovnumwsercz, I proposed this version.
def pair_sum(numbers, target):
answer = []
for i, num in enumerate(numbers):
if target-num in numbers[i+1:]:
answer.append((num,target-num))
return answer
def pair_sum (numbers, pairSum):
resultSet=[];
newNumbers = sorted(numbers);
i = 0;
j = len(newNumbers)-1;
while i < len(newNumbers) and j >= 0:
if newNumbers[i] + newNumbers[j] == pairSum and i != j:
if (newNumbers[j], newNumbers[i]) not in resultSet and numbers.index(newNumbers[i]) < numbers.index(newNumbers[j]):
resultSet.append((newNumbers[i], newNumbers[j]))
numbers.remove(newNumbers[i]);
numbers.remove(newNumbers[j])
i = i + 1;
j = j - 1;
elif newNumbers[i] + newNumbers[j] < pairSum:
i = i + 1;
else:
j = j - 1;
return (resultSet);

Better ways to find pairs that sum to N

Is there a faster way to write this, the function takes a list and a value to find the pairs of numeric values in that list that sum to N without duplicates I tried to make it faster by using sets instead of using the list itself (however I used count() which I know is is linear time) any suggestions I know there is probably a way
def pairsum_n(list1, value):
set1 = set(list1)
solution = {(min(i, value - i) , max(i, value - i)) for i in set1 if value - i in set1}
solution.remove((value/2,value/2)) if list1.count(value/2) < 2 else None
return solution
"""
Example: value = 10, list1 = [1,2,3,4,5,6,7,8,9]
pairsum_n = { (1,9), (2,8), (3,7), (4,6) }
Example: value = 10, list2 = [5,6,7,5,7,5,3]
pairsum_n = { (5,5), (3,7) }
"""
Your approach is quite good, it just needs a few tweaks to make it more efficient. itertools is convenient, but it's not really suitable for this task because it produces so many unwanted pairs. It's ok if the input list is small, but it's too slow if the input list is large.
We can avoid producing duplicates by looping over the numbers in order, stopping when i >= value/2, after using a set to get rid of dupes.
def pairsum_n(list1, value):
set1 = set(list1)
list1 = sorted(set1)
solution = []
maxi = value / 2
for i in list1:
if i >= maxi:
break
j = value - i
if j in set1:
solution.append((i, j))
return solution
Note that the original list1 is not modified. The assignment in this function creates a new local list1. If you do actually want (value/2, value/2) in the output, just change the break condition.
Here's a slightly more compact version.
def pairsum_n(list1, value):
set1 = set(list1)
solution = []
for i in sorted(set1):
j = value - i
if i >= j:
break
if j in set1:
solution.append((i, j))
return solution
It's possible to condense this further, eg using itertools.takewhile, but it will be harder to read and there won't be any improvement in efficiency.
Try this, running time O(nlogn):
v = [1, 2, 3, 4, 5, 6, 7, 8, 9]
l = 0
r = len(v)-1
def myFunc(v, value):
ans = []
% this block search for the pair (value//2, value//2)
if value % 2 == 0:
c = [i for i in v if i == value // 2]
if len(c) >= 2:
ans.append((c[0], c[1]))
v = list(set(v))
l = 0
r = len(v)-1
v.sort()
while l<len(v) and r >= 0 and l < r:
if v[l] + v[r] == value:
ans.append((v[l], v[r]))
l += 1
r -= 1
elif v[l] + v[r] < value:
l += 1
else:
r -= 1
return list(set(ans))
It is called the Two pointers technique and it works as follows. First of all, sort the array. This imposes a minimum running time of O(nlogn). Then set two pointers, one pointing at the start of the array l and other pointing at its last element r (pointers name are for left and right).
Now, look at the list. If the sum of the values returned at position l and r is lower than the value we are looking for, then we need to increment l. If it's greater, we need to decrement r.
If v[l] + v[r] == value than we can increment/decrement both l or r since in any case we want to skip the combination of values (v[l], v[r]) as we don't want duplicates.
Timings: this is actually slower then the other 2 solutions. Due to the amount of combinations produced but not actually needed it gets worse the bigger the lists are.
You can use itertools.combinations to produce the 2-tuple-combinations for you.
Put them into a set if they match your value, then return as set/list:
from itertools import combinations
def pairsum_n(list1, value):
"""Returns the unique list of pairs of combinations of numbers from
list1 that sum up `value`. Reorders the values to (min_value,max_value)."""
result = set()
for n in combinations(list1, 2):
if sum(n) == value:
result.add( (min(n),max(n)) )
return list(result)
# more ugly one-liner:
# return list(set(((min(n),max(n)) for n in combinations(list1,2) if sum(n)==value)))
data = [1,2,3,4,5,6,6,5,4,3,2,1]
print(pairsum_n(data,7))
Output:
[(1, 6), (2, 5), (3, 4)]
Fun little thing, with some sorting overhead you can get all at once:
def pairsum_n2(data, count_nums=2):
"""Generate a dict with all count_nums-tuples from data. Key into the
dict is the sum of all tuple-values."""
d = {}
for n in (tuple(sorted(p)) for p in combinations(data,count_nums)):
d.setdefault(sum(n),set()).add(n)
return d
get_all = pairsum_n2(data,2) # 2 == number of numbers to combine
for k in get_all:
print(k," -> ", get_all[k])
Output:
3 -> {(1, 2)}
4 -> {(1, 3), (2, 2)}
5 -> {(2, 3), (1, 4)}
6 -> {(1, 5), (2, 4), (3, 3)}
7 -> {(3, 4), (2, 5), (1, 6)}
2 -> {(1, 1)}
8 -> {(2, 6), (4, 4), (3, 5)}
9 -> {(4, 5), (3, 6)}
10 -> {(5, 5), (4, 6)}
11 -> {(5, 6)}
12 -> {(6, 6)}
And then just access the one you need via:
print(get_all.get(7,"Not possible")) # {(3, 4), (2, 5), (1, 6)}
print(get_all.get(17,"Not possible")) # Not possible
Have another solution, it's alot faster then the one I just wrote, not as fast as #PM 2Ring's answer:
def pairsum_n(list1, value):
set1 = set(list1)
if list1.count(value/2) < 2:
set1.remove(value/2)
return set((min(x, value - x) , max(x, value - x)) for x in filterfalse(lambda x: (value - x) not in set1, set1))

Convert a list of numbers to ranges

I have a bunch of numbers, say the following:
1 2 3 4 6 7 8 20 24 28 32
The information presented there could be represented in Python as ranges:
[range(1, 5), range(6, 9), range(20, 33, 4)]
In my output I'd write 1..4, 6..8, 20..32..4, but that is just a matter of presentation.
Another answer shows how one can do this for contiguous ranges. I don't see how I can easily do this for strided ranges like above. Is there a similar trick for this?
Here's a straight forward approach at the problem.
def get_ranges(ls):
N = len(ls)
while ls:
# single element remains, yield the trivial range
if N == 1:
yield range(ls[0], ls[0] + 1)
break
diff = ls[1] - ls[0]
# find the last index that satisfies the determined difference
i = next(i for i in range(1, N) if i + 1 == N or ls[i+1] - ls[i] != diff)
yield range(ls[0], ls[i] + 1, diff)
# update variables
ls = ls[i+1:]
N -= i + 1
def ranges(data):
result = []
if not data:
return result
idata = iter(data)
first = prev = next(idata)
for following in idata:
if following - prev == 1:
prev = following
else:
result.append((first, prev + 1))
first = prev = following
# There was either exactly 1 element and the loop never ran,
# or the loop just normally ended and we need to account
# for the last remaining range.
result.append((first, prev+1))
return result
Test:
>>> data = range(1, 5) + range(6, 9) + range(20, 24)
>>> print ranges(data)
[(1, 5), (6, 9), (20, 24)]
You can use groupby and count from itertools module along with Counter from collections module like this example:
Update: See the comments in order to understand the logic behind this solution and its limitations.
from itertools import groupby, count
from collections import Counter
def ranges_list(data=list, func=range, min_condition=1):
# Sort in place the ranges list
data.sort()
# Find all the steps between the ranges's elements
steps = [v-k for k,v in zip(data, data[1:])]
# Find the repeated items's steps based on condition.
# Default: repeated more than once (min_condition = 1)
repeated = [item for item, count in Counter(steps).items() if count > min_condition]
# Group the items in to a dict based on the repeated steps
groups = {k:[list(v) for _,v in groupby(data, lambda n, c = count(step = k): n-next(c))] for k in repeated}
# Create a dict:
# - keys are the steps
# - values are the grouped elements
sub = {k:[j for j in v if len(j) > 1] for k,v in groups.items()}
# Those two lines are for pretty printing purpose:
# They are meant to have a sorted output.
# You can replace them by:
# return [func(j[0], j[-1]+1,k) for k,v in sub.items() for j in v]
# Otherwise:
final = [(j[0], j[-1]+1,k) for k,v in sub.items() for j in v]
return [func(*k) for k in sorted(final, key = lambda x: x[0])]
ranges1 = [1, 2, 3, 4, 6, 7, 8, 20, 24, 28, 32]
ranges2 = [1, 2, 3, 4, 6, 7, 10, 20, 24, 28, 50,51,59,60]
print(ranges_list(ranges1))
print(ranges_list(ranges2))
Output:
[range(1, 5), range(6, 9), range(20, 33, 4)]
[range(1, 5), range(6, 8), range(20, 29, 4), range(50, 52), range(59, 61)]
Limitations:
With this kind of intput:
ranges3 = [1,3,6,10]
print(ranges_list(ranges3)
print(ranges_list(ranges3, min_condition=0))
Will output:
# Steps are repeated <= 1 with the condition: min_condition = 1
# Will output an empty list
[]
# With min_condition = 0
# Will output the ranges using: zip(data, data[1:])
[range(1, 4, 2), range(3, 7, 3), range(6, 11, 4)]
Feel free to use this solution and adopt it or modify it in order to fill your needs.
It might not be super short or elegant, but it seems to work:
def ranges(ls):
li = iter(ls)
first = next(li)
while True:
try:
element = next(li)
except StopIteration:
yield range(first, first+1)
return
step = element - first
last = element
while True:
try:
element = next(li)
except StopIteration:
yield range(first, last+step, step)
return
if element - last != step:
yield range(first, last+step, step)
first = element
break
last = element
This iterates over an iterator of the list, and yields range objects:
>>> list(ranges([1, 2, 3, 4, 6, 7, 8, 20, 24, 28, 32]))
[range(1, 5), range(6, 9), range(20, 33, 4)]
It also handles negative ranges, and ranges that have just one element:
>>> list(ranges([9,8,7, 1,3,5, 99])
[range(9, 6, -1), range(1, 7, 2), range(99, 100)]

Categories