Finding the longest interval with a decrease in value - python

I have a list of values for some metric, e.g.:
# 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
[50, 52, 58, 54, 57, 51, 55, 60, 62, 65, 68, 72, 62, 61, 59, 63, 72]
I need to find the longest interval over which the value has decreased. For the above list such interval is from index 7 to 14 (and it's length is 8). An O(n²) solution to this is simple:
def get_longest_len(values: list[int]) -> int:
longest = 0
for i in range(len(values)-1):
for j in range(len(values)-1, i, -1):
if values[i] > values[j] and j - i > longest:
longest = j - i
break
return longest + 1
Is there any way to improve it's time complexity?

O(n log n):
from itertools import accumulate
from bisect import bisect
def get_longest_len(values: list[int]) -> int:
maxi = list(accumulate(values, max))
return max(
i - bisect(maxi, value) + 1
for i, value in enumerate(values)
)
First I compute the prefix maxima. For your example:
# 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
values = [50, 52, 58, 54, 57, 51, 55, 60, 62, 65, 68, 72, 62, 61, 59, 63, 72]
maxi = [50, 52, 58, 58, 58, 58, 58, 60, 62, 65, 68, 72, 72, 72, 72, 72, 72]
Then for each value, I can use binary search in these maxima to find the earliest larger value. For example the 59 at index 14 in values: We find that the earliest number in maxi larger than 59 is the 60 at index 7.
Correctness testing with 100 lists of 1000 randomized ascending values (the two numbers for each test case are your result and mine, and the boolean says whether they match):
True 461 461
True 360 360
True 909 909
...
True 576 576
True 312 312
True 810 810
100 out of 100 correct
Code:
from itertools import accumulate
from bisect import bisect
from random import randint, sample
def get_longest_len0(values: list[int]) -> int:
longest = 0
for i in range(len(values)-1):
for j in range(len(values)-1, i, -1):
if values[i] > values[j] and j - i > longest:
longest = j - i
break
return longest + 1
def get_longest_len(values: list[int]) -> int:
maxi = list(accumulate(values, max))
return max(
i - bisect(maxi, value) + 1
for i, value in enumerate(values)
)
cases = 100
correct = 0
for _ in range(cases):
values = [i + randint(-10, 10) for i in range(1000)]
for _ in range(5):
i, j = sample(range(1000), 2)
values[i], values[j] = values[j], values[i]
expect = get_longest_len0(values)
result = get_longest_len(values)
correct += result == expect
print(result == expect, expect, result)
print(correct, 'out of', cases, 'correct')
Attempt This Online!

Related

while loop, try except and other functions having issues

I am creating a bingo/housie ticket creator in python. The tickets have 9 rows and 3 columns. It means 27 spaces on a 9*3 grid. 9 rows vertically and 3 columns horizontally. Each row can only hold 3 random numbers. The first row can hold a random number from 1,9. The second row can hold a random number from 10,19. The third row can hold a random number from 20,29. The fourth row can hold a random number from 30,39. The fifth row can hold a random number from 40,49. The sixth row can hold a random number from 50,59. The seventh row can hold a random number from 60,69. The eighth row can hold a random number from 70,79. The ninth row can hold a random number from 80,90. Here is a reference image of the ticket.
I am planning to create a random ticket in python. Here is the script I created for it. It has all the comments for better understanding.
import random
# print("Script is ready")
# Variables
global numOfNums # This tells that only 15 numbers can be added to the ticket
numOfNums = 15
global allNum # this is the list containing all 90 numbers
global numToBedone # this is just for adding nums to the list
allNum = []
numToBedone = 1
global row1, row2, row3, row4, row5, row6, row7, row8, row9 # all the rows
row1, row2, row3, row4, row5, row6, row7, row8, row9 = [], [], [], [], [], [], [], [], []
global row1Num, row2Num, row3Num, row4Num, row5Num, row6Num, row7Num, row8Num, row9Num # all the nums row can have
# Filling the List
while numToBedone<91:
allNum.append(numToBedone)
numToBedone = numToBedone+1
# Generating number for row 1
def ranRow1NumGen():
global numOfNums
row1Num = random.randint(1,9)
try:
allNum.remove(row1Num)
except:
ranRow1NumGen()
row1.append(row1Num)
numOfNums = numOfNums - 1
# Generating number for row 2
def ranRow2NumGen():
global numOfNums
row2Num = random.randint(10,19)
try:
allNum.remove(row2Num)
except:
ranRow2NumGen()
row2.append(row2Num)
numOfNums = numOfNums - 1
# Generating number for row 3
def ranRow3NumGen():
global numOfNums
row3Num = random.randint(20,29)
try:
allNum.remove(row3Num)
except:
ranRow3NumGen()
row3.append(row3Num)
numOfNums = numOfNums - 1
# Generating number for row 4
def ranRow4NumGen():
global numOfNums
row4Num = random.randint(30,39)
try:
allNum.remove(row4Num)
except:
ranRow4NumGen()
row4.append(row4Num)
numOfNums = numOfNums - 1
# Generating number for row 5
def ranRow5NumGen():
global numOfNums
row5Num = random.randint(40,49)
try:
allNum.remove(row5Num)
except:
ranRow5NumGen()
row5.append(row5Num)
numOfNums = numOfNums - 1
# Generating number for row 6
def ranRow6NumGen():
global numOfNums
row6Num = random.randint(50,59)
try:
allNum.remove(row6Num)
except:
ranRow6NumGen()
row6.append(row6Num)
numOfNums = numOfNums - 1
# Generating number for row 7
def ranRow7NumGen():
global numOfNums
row7Num = random.randint(60,69)
try:
allNum.remove(row7Num)
except:
ranRow7NumGen()
row7.append(row7Num)
numOfNums = numOfNums - 1
# Generating number for row 8
def ranRow8NumGen():
global numOfNums
row8Num = random.randint(70,79)
try:
allNum.remove(row8Num)
except:
ranRow8NumGen()
row8.append(row8Num)
numOfNums = numOfNums - 1
# Generating number for row 9
def ranRow9NumGen():
global numOfNums
row9Num = random.randint(80,90)
try:
allNum.remove(row9Num)
except:
ranRow9NumGen()
row9.append(row9Num)
numOfNums = numOfNums - 1
# Main function for creating a ticket
def ticketGen():
global row1, row2, row3, row4, row5, row6, row7, row8, row9
global numOfNums
#Adding 1 number to all the rows
ranRow1NumGen()
ranRow2NumGen()
ranRow3NumGen()
ranRow4NumGen()
ranRow5NumGen()
ranRow6NumGen()
ranRow7NumGen()
ranRow8NumGen()
ranRow9NumGen()
# After we put 1 number in each row we have to put the rest in the random rows
# I cant understand how to do that
# I used this way but it has some issues
# sometimes a row can get more than 3 num which isnt possible in the ticket
# also some numbers repeat but I cant understand how as I have a try except statement
u = 0
while u < 6:
randomNumGiver = random.randint(1,9)
if numOfNums > 0:
if randomNumGiver == 1 and len(row1) < 3:
ranRow1NumGen()
elif randomNumGiver == 2 and len(row2) < 3:
ranRow2NumGen()
elif randomNumGiver == 3 and len(row3) < 3:
ranRow3NumGen()
elif randomNumGiver == 4 and len(row4) < 3:
ranRow4NumGen()
elif randomNumGiver == 5 and len(row5) < 3:
ranRow5NumGen()
elif randomNumGiver == 6 and len(row6) < 3:
ranRow6NumGen()
elif randomNumGiver == 7 and len(row7) < 3:
ranRow7NumGen()
elif randomNumGiver == 8 and len(row8) < 3:
ranRow8NumGen()
elif randomNumGiver == 9 and len(row9) < 3:
ranRow9NumGen()
else:
pass
u = u+1
# printing all the rows and the remaining numbers in the list
print(row1, row2, row3, row4, row5, row6, row7, row8, row9)
pass
ticketGen()
I have gotten some mixed output :
Incorrect output given by the code :
[8] [14] [21, 25, 21] [32] [42, 47, 42] [55] [60, 66, 60] [76] [82]
Correct way and the output given by the code :
[7, 2] [16, 14] [28] [34] [40] [59, 56, 52] [68, 61, 64] [77] [83]
As you can see it has a mixed output. The issue with the first output is the numbers can't be repeated and also a row can't have more than 3 numbers. I can't find how this bug is happening as I have set a try-except statement for the repetition and an if statement for having less than 3 numbers. If you can find any issue and know the way to resolve it please do so. It would mean a lot to me. Thank you.
Edit - The code has now been changed according to the first answer but I got these results. Also at one point it clearly had more than 3 numbers in a row and they were duplicate numbers. Can't figure out a way please HELP
In your while loop, change this -
while u < 6:
randomNumGiver = random.randint(1,9)
if randomNumGiver == 1 and numOfNums > 0 and row1.__len__() < 3:
ranRow1NumGen()
elif randomNumGiver == 2 and numOfNums > 0 and row2.__len__() < 3:
ranRow2NumGen() ..........
To -
while u < 6:
randomNumGiver = random.randint(1,9)
if numOfNums > 0:
if randomNumGiver == 1 and len(row1) < 3:
ranRow1NumGen()
elif randomNumGiver == 2 and len(row2) < 3:
ranRow2NumGen()
elif randomNumGiver == 3 and len(row3) < 3:
ranRow3NumGen()
elif randomNumGiver == 4 and len(row4) < 3:
ranRow4NumGen()
elif randomNumGiver == 5 and len(row5) < 3:
ranRow5NumGen()
elif randomNumGiver == 6 and len(row6) < 3:
ranRow6NumGen()
elif randomNumGiver == 7 and len(row7) < 3:
ranRow7NumGen()
elif randomNumGiver == 8 and len(row8) < 3:
ranRow8NumGen()
elif randomNumGiver == 9 and len(row9) < 3:
ranRow9NumGen()
else:
pass
u = u+1
Make the line numOfNums common and then use len() instead because it is better practice not to use .__len__()
Result: (No repetition and more than 3 when I tried it many times for any wrong result)
Script is ready
2 is selected for row1
12 is selected for row2
22 is selected for row3
35 is selected for row4
48 is selected for row5
56 is selected for row6
64 is selected for row7
75 is selected for row8
90 is selected for row9
[2] [12] [22] [35] [48] [56] [64] [75] [90]
19 is selected for row2
60 is selected for row7
39 is selected for row4
55 is selected for row6
53 is selected for row6
56 is selected for row6
[2] [12, 19] [22] [35, 39] [48] [56, 55, 53] [64, 60] [75] [90]
[1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 20, 21, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47, 49, 50, 51, 52, 54, 57, 58, 59, 61, 62, 63, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89]

Python finding numbers from a list that satisfy a specific condition [closed]

Closed. This question does not meet Stack Overflow guidelines. It is not currently accepting answers.
This question does not appear to be about programming within the scope defined in the help center.
Closed 2 years ago.
Locked. There are disputes about this question’s content being resolved at this time. It is not currently accepting new answers or interactions.
Good evening, I am currently having some problems implementing a binary search algorithm, which extracts the amount of numbers in a list that satisfy this condition
Suppose Sorted list A = [1,2,4,5,6,7,8] I need to find the amount of numbers that satisfy this condition.
Absolute(A[i] - i) <= C where C is a number specified by the user for example
|A[i] -i|<= C <---- That is the condition I need to satisfy, i need to find the amount of numbers that are in the list that fulfil this condition.
Example:
A = [1, 2, 4, 8, 16, 32, 64]
c = 4
A[0] = | 1 - 0 | = 1.
A[1] = | 2 - 1 | = 1.
A[2] = | 4 - 2 | = 2.
A[3] = | 8 - 3 | = 5.
A[4] = | 16 - 4 | = 12.
A[5] = | 32 - 5 | = 27.
A[6] = | 64 - 6 | = 58.
Now I realise I need to use binary search to ensure my running time is in O(log N) time, but I am not sure where do I put the condition/if statement.
Can someone please show me how this would look in python code. Thank you so much for the assistance.
Using Python Bisect module
Use a key with binary_search module to allow function evaluations
from bisect import bisect_left
class KeyifyList(object):
" Allows specifying a key with binary search module"
def __init__(self, inner, key):
self.inner = inner
self.key = key
def __len__(self):
return len(self.inner)
def __getitem__(self, k):
return self.key((k, self.inner[k]))
def bin_search(a, c):
# Binary search for placement
# Using key function to allow binary search using a function
# Computes abs(a[i] - i) at places where binary search is evaluated
# key computes abs(a[k]-k)
# Binary search so O(log(n)) time complexity
i = bisect_left(KeyifyList(a, lambda kv: abs(kv[1]-kv[0])), c)
if i == len(a):
last_index = len(a) -1
if abs(a[last_index] - last_index) <= c:
return len(a) # all indices satisfy
else:
i = last_index
while i >= 0 and abs(a[i]-i) > c:
# this is normally a one point move over
# so O(1) rather than O(n) in time complexity
i -= 1
# number of points is one more than index to satisfy
return i + 1
Test
A = [1, 2, 4, 8, 16, 32, 64]
c = 4
Test c from 0 to 63
for c in range(65):
print(f'c = {c}, number of points = {bin_search(A, c)}')
Output
c = 0, number of points = 0
c = 1, number of points = 1
c = 2, number of points = 3
c = 3, number of points = 3
c = 4, number of points = 3
c = 5, number of points = 4
c = 6, number of points = 4
c = 7, number of points = 4
c = 8, number of points = 4
c = 9, number of points = 4
c = 10, number of points = 4
c = 11, number of points = 4
c = 12, number of points = 5
c = 13, number of points = 5
c = 14, number of points = 5
c = 15, number of points = 5
c = 16, number of points = 5
c = 17, number of points = 5
c = 18, number of points = 5
c = 19, number of points = 5
c = 20, number of points = 5
c = 21, number of points = 5
c = 22, number of points = 5
c = 23, number of points = 5
c = 24, number of points = 5
c = 25, number of points = 5
c = 26, number of points = 5
c = 27, number of points = 6
c = 28, number of points = 6
c = 29, number of points = 6
c = 30, number of points = 6
c = 31, number of points = 6
c = 32, number of points = 6
c = 33, number of points = 6
c = 34, number of points = 6
c = 35, number of points = 6
c = 36, number of points = 6
c = 37, number of points = 6
c = 38, number of points = 6
c = 39, number of points = 6
c = 40, number of points = 6
c = 41, number of points = 6
c = 42, number of points = 6
c = 43, number of points = 6
c = 44, number of points = 6
c = 45, number of points = 6
c = 46, number of points = 6
c = 47, number of points = 6
c = 48, number of points = 6
c = 49, number of points = 6
c = 50, number of points = 6
c = 51, number of points = 6
c = 52, number of points = 6
c = 53, number of points = 6
c = 54, number of points = 6
c = 55, number of points = 6
c = 56, number of points = 6
c = 57, number of points = 6
c = 58, number of points = 7
c = 59, number of points = 7
c = 60, number of points = 7
c = 61, number of points = 7
c = 62, number of points = 7
c = 63, number of points = 7
c = 64, number of points = 7
Performance Testing
Compare to list comprehension (O(n) algorithm)
def list_comprehension_method(a, c):
" Use list comprehension to find number of points "
return len([1 for i, v in enumerate(A) if abs(v - i) <= c])
Timing Test
Create a large random array
n = 10000 # number of points in array
c = n // 4 # c value
A = sorted([randint(1, n) for _ in range(n)])
print(timeit(lambda: bin_search(A, c), number=100))
# Time: 0.00173 seconds
print(timeit(lambda: list_comprehension_method(A, c), number=100))
# Time: 0.49982 seconds
Binary search ~289X faster for n = 10, 000
lambda and filter should get you there
A = [1, 2, 4, 8, 16, 32, 64]
c = 4
res = list(filter(lambda x: A[x] - x <= c, range(len(A))))
print(res)
[0, 1, 2]

Pandas - Interate over row and compare previous values -faster

I am trying to get my results faster (13 minutes for 800 rows). I asked a similar question here: pandas - iterate over rows and calculate - faster - but I not able to use the good solutions for my variation. The difference is that if the overlap of previous values in 'col2' is more than 'n=3', the value of 'col1' in the row is set to '0' and affect the following code.
import pandas as pd
d = {'col1': [20, 23, 40, 41, 46, 47, 48, 49, 50, 50, 52, 55, 56, 69, 70],
'col2': [39, 32, 42, 50, 63, 67, 64, 68, 68, 74, 59, 75, 58, 71, 66]}
df = pd.DataFrame(data=d)
df["overlap_count"] = "" #create new column
n = 3 #if x >= n, then value = 0
for row in range(len(df)):
x = (df["col2"].loc[0:row-1] > (df["col1"].loc[row])).sum()
df["overlap_count"].loc[row] = x
if x >= n:
df["col2"].loc[row] = 0
df["overlap_count"].loc[row] = 'x'
df
I obtain following result: replacing values in col1 if they are greater than 'n' and the column overlap_count
col1 col2 overlap_count
0 20 39 0
1 23 32 1
2 40 42 0
3 41 50 1
4 46 63 1
5 47 67 2
6 48 0 x
7 49 0 x
8 50 68 2
9 50 0 x
10 52 0 x
11 55 0 x
12 56 0 x
13 69 71 0
14 70 66 1
Thank you for your help and time!
I think you can use numba for improve performance, only is necessary working with numeric values, so instead x is added -1 and new column is filled by 0 instead empty string:
df["overlap_count"] = 0 #create new column
n = 3 #if x >= n, then value = 0
a = df[['col1','col2','overlap_count']].values
from numba import njit
#njit
def custom_sum(arr, n):
for row in range(arr.shape[0]):
x = (arr[0:row, 1] > arr[row, 0]).sum()
arr[row, 2] = x
if x >= n:
arr[row, 1] = 0
arr[row, 2] = -1
return arr
df1 = pd.DataFrame(custom_sum(a, n), columns=df.columns)
print (df1)
col1 col2 overlap_count
0 20 39 0
1 23 32 1
2 40 42 0
3 41 50 1
4 46 63 1
5 47 67 2
6 48 0 -1
7 49 0 -1
8 50 68 2
9 50 0 -1
10 52 0 -1
11 55 0 -1
12 56 0 -1
13 69 71 0
14 70 66 1
Performance:
d = {'col1': [20, 23, 40, 41, 46, 47, 48, 49, 50, 50, 52, 55, 56, 69, 70],
'col2': [39, 32, 42, 50, 63, 67, 64, 68, 68, 74, 59, 75, 58, 71, 66]}
df = pd.DataFrame(data=d)
#4500rows
df = pd.concat([df] * 300, ignore_index=True)
print (df)
In [115]: %%timeit
...: pd.DataFrame(custom_sum(a, n), columns=df.columns)
...:
8.11 ms ± 224 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
In [116]: %%timeit
...: for row in range(len(df)):
...: x = (df["col2"].loc[0:row-1] > (df["col1"].loc[row])).sum()
...: df["overlap_count"].loc[row] = x
...:
...: if x >= n:
...: df["col2"].loc[row] = 0
...: df["overlap_count"].loc[row] = 'x'
...:
...:
7.84 s ± 442 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
create a function and then just apply the function as shown below:
df['overlap_count'] = [fn(i) for i in df['overlap_count']]
Try this one, maybe it will be faster.
df['overlap_count'] = df.groupby('col1')['col2'].transform(lambda g: len((g >= g.name).index))

Consecutive Prime Sum

I am solving the 50th problem on Project Euler.
The question is to:
Find the prime number below one-million, which can be written as the sum of the most consecutive primes.
For example, 41 = 2 + 3 + 5 + 7 + 11 + 13
41 is the prime number that can be written as the sum of the most consecutive primes.
I wrote a code to find the prime numbers below 1000 that can be written as the sum of the most consecutive primes, to check if my code finds the prime number(953) which can be written as the sum of the most consecutive primes below 1000. This is what I came up with:
#!/usr/bin/python
import prime
p = prime.genprimes(1000)
prms = [i for i in p]
for prm in prms:
count = 0
p = prm
temp = []
for a in prms:
p -= a
temp.append(a)
count += 1
if p == 0:
print prm, '\t', count, '\t', temp
prime.py:
#!/usr/bin/python
def genprimes(limit):
"""
Returns the prime numbers(generator) until the limit(inclusive) given.
"""
D = {}
q = 2
while q <= limit:
if q not in D:
yield q
D[q * 2] = [q]
else:
for p in D[q]:
D.setdefault(p + q, []).append(p)
del D[q]
q += 1
Output when I run the code:
2 1 [2]
5 2 [2, 3]
17 4 [2, 3, 5, 7]
41 6 [2, 3, 5, 7, 11, 13] # Longest sum of consecutive primes that adds to a prime below 100
197 12 [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37]
281 14 [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43]
The problem is it doesn't find the prime number 953 which is the longest sum of consecutice primes that adds to a prime below 1000.
So, I changed my code to troubleshoot what it does when prm is 953 in the for loop:
#!/usr/bin/python
import prime
p = prime.genprimes(1000)
prms = [i for i in p]
found = []
for prm in prms:
if prm == 953:
p = prm
for a in prms:
print p, '\t', a
p -= a
if p < -100:
break
Output:
953 2
951 3
948 5
943 7
936 11
925 13
912 17
895 19
876 23
853 29
824 31
793 37
756 41
715 43
672 47
625 53
572 59
513 61
452 67
385 71
314 73
241 79
162 83
79 89
-10 97
Any idea what I am doing wrong here? Thanks for any help.
Your loop always starts with the index 2. The consecutive primes it seems don't necessarily need to start with the prime 2. You will need to vary which prime the consecutive adding starts at.
A smaller example: if you were finding the largest sum of consecutive primes with sum less than 10, then that is 3 + 5 = 8, not 2 + 3 = 5.
It might not (and is not) the case that you always get the largest sum by adding all the primes starting at 2.
This Question was asked in TCS CodeVita 2016
#include<iostream>
using namespace std;
int main(){
long long int num=0;
cout<<"Enter the Size to count Prime number till NUM : ";
cin>>num;
long long int ary[num],j=2;
ary[0] =2,ary[1]=3;
for(int i=2;i<=num;i++){ // loop will add the prime number till num
if(i%2 != 0 && i%3 != 0){
ary[j] = i;
j++;
}
}
long long int k,sum=0,count=0;
cout<<"Sum of Consecutive Prime numbers from "<<2<<" to "<<num<<endl;
for(int i=0;i<=j;i++){
for(k=0;k<j;k++){
sum+= ary[k];
if(sum %2 !=0 && sum%3!=0 && sum<=num){
count++;
cout<<sum<<endl;
}
}
}
cout<<"Total Consecutive Count : "<<count<<endl;
}
OUTPUT
Sample Output 1
Enter the Size to count Prime number till NUM : 20
Sum of Consecutive Prime numbers from 2 to 20
5
17
Total Consecutive Count : 2
Sample Output 2
Enter the Size to count Prime number till NUM : 100
Sum of Consecutive Prime numbers from 2 to 100
5
17
41
77
Total Consecutive Count : 4

which numbers in list 2 are bigger and smaller than each number in list 1

I am using python. I have two lists, list 1 is 7000 integers long, list 2 is 25000 integers. I want to go through each number in list 1 and find the closest number in list 2 that is bigger and the closest number that is smaller than each number in list 1, and then calculate the difference between these two numbers in list 2. So far I have:
for i in list1:
for j in list 2:
if list2[j]<list1[i]:
a = max(list2)
elif list2[j]>list1[i]:
b = min(list2)
interval = b-a
This doesn't seem to work. I want to find the explicit numbers in list 2 that are less than a specific number in list 1 and know the maximum, and then find out the smallest number in list 2 that is bigger than the number in list 1. Does anyone have any ideas? Thanks
Here's a vectorized solution using NumPy. It should be extremely fast, as it has no loops in Python (apart from the printing stage at the end).
import numpy as np
# set up fake data
l1 = np.array([1.9, 2, 2.1]) # or whatever list you have
l2 = np.array([1, 2, 5, 10]) # as above
l2.sort() # remove this line if it's always sorted
# the actual algorithm
indexes = np.searchsorted(l2, l1, side='right')
lower = l2[indexes - 1]
upper = l2[indexes]
diffs = upper - lower
# print results for debugging
for value, diff in zip(l1, diffs):
print "value", value, "gap", diff
Here's the output with the hard-coded test data as above:
value 1.9 gap 1
value 2.0 gap 3
value 2.1 gap 3
You can use the bisect module, worst case complexity O(N * logN):
import bisect
lis1 = [4, 20, 26, 27, 30, 53, 57, 76, 89, 101]
lis2 = [17, 21, 40, 49, 53, 53, 53, 53, 70, 80, 81, 95, 99] #this must be sorted
#use lis2.sort() in case lis2 is not sorted
for x in lis1:
#returns the index where x can be placed in lis2, keeping lis2 sorted
ind=bisect.bisect(lis2,x)
if not (x >= lis2[-1] or x <= lis2[0]):
sm, bi = lis2[ind-1], lis2[ind]
if sm == x:
""" To handle the case when an item present in lis1 is
repeated multiple times in lis2, for eg 53 in this case"""
ind -= 1
while lis2[ind] == x:
ind -= 1
sm = lis2[ind]
print "{} <= {} <= {}".format(sm ,x, bi)
output:
17 <= 20 <= 21
21 <= 26 <= 40
21 <= 27 <= 40
21 <= 30 <= 40
49 <= 53 <= 70
53 <= 57 <= 70
70 <= 76 <= 80
81 <= 89 <= 95
Though this will not output anything for 4 and 101, as 4 is smaller than any element in lis2 and 101 is greater than any element in lis2. But that can be fixed if required.
First of all, your example is not valid code, or at least it doesn't do what you want it to do. If you have
for i in list1:
then i is not the index, but an element of list1. So first of all you would compare i and j, not list[i] and list[j].
It should be easier to use list comprehensions>
for i in list1:
a = max([n for n in list2 if n < i])
b = min([n for n in list2 if n > i])
You might have to add an if or two to make sure a and b exist, but it should work like this.
Here's a solution not using numpy, bisect module or list comprehensions!
Enjoy
list1=[1,2,4,8,16,32,64]
list2=[3,6,9,12,15,18,21]
correct={4:3, 8:3, 16:3}
lower=0
for t in list1:
print t
difference = 0
index = lower
while (difference == 0 and index<len(list2)-1):
print "consider %d < %d and %d > %d" % (list2[index],t,list2[index+1],t)
if list2[index]<t and list2[index+1] > t:
lower = index
upper = index + 1
difference = list2[upper] - list2[lower]
print "%d difference %d" % (t,list2[upper] - list2[lower])
break
index = index +1
if t in correct.keys():
assert(difference == correct[t])

Categories