What is the most efficient way to rotate a list in python?
Right now I have something like this:
>>> def rotate(l, n):
... return l[n:] + l[:n]
...
>>> l = [1,2,3,4]
>>> rotate(l,1)
[2, 3, 4, 1]
>>> rotate(l,2)
[3, 4, 1, 2]
>>> rotate(l,0)
[1, 2, 3, 4]
>>> rotate(l,-1)
[4, 1, 2, 3]
Is there a better way?
A collections.deque is optimized for pulling and pushing on both ends. They even have a dedicated rotate() method.
from collections import deque
items = deque([1, 2])
items.append(3) # deque == [1, 2, 3]
items.rotate(1) # The deque is now: [3, 1, 2]
items.rotate(-1) # Returns deque to original state: [1, 2, 3]
item = items.popleft() # deque == [2, 3]
What about just using pop(0)?
list.pop([i])
Remove the item at the given position in the list, and return it. If
no index is specified, a.pop() removes and returns the last item in
the list. (The square brackets around the i in the method signature
denote that the parameter is optional, not that you should type square
brackets at that position. You will see this notation frequently in
the Python Library Reference.)
Numpy can do this using the roll command:
>>> import numpy
>>> a=numpy.arange(1,10) #Generate some data
>>> numpy.roll(a,1)
array([9, 1, 2, 3, 4, 5, 6, 7, 8])
>>> numpy.roll(a,-1)
array([2, 3, 4, 5, 6, 7, 8, 9, 1])
>>> numpy.roll(a,5)
array([5, 6, 7, 8, 9, 1, 2, 3, 4])
>>> numpy.roll(a,9)
array([1, 2, 3, 4, 5, 6, 7, 8, 9])
It depends on what you want to have happen when you do this:
>>> shift([1,2,3], 14)
You might want to change your:
def shift(seq, n):
return seq[n:]+seq[:n]
to:
def shift(seq, n):
n = n % len(seq)
return seq[n:] + seq[:n]
Simplest way I can think of:
a.append(a.pop(0))
Just some notes on timing:
If you're starting with a list, l.append(l.pop(0)) is the fastest method you can use. This can be shown with time complexity alone:
deque.rotate is O(k) (k=number of elements)
list to deque conversion is O(n)
list.append and list.pop are both O(1)
So if you are starting with deque objects, you can deque.rotate() at the cost of O(k). But, if the starting point is a list, the time complexity of using deque.rotate() is O(n). l.append(l.pop(0) is faster at O(1).
Just for the sake of illustration, here are some sample timings on 1M iterations:
Methods which require type conversion:
deque.rotate with deque object: 0.12380790710449219 seconds (fastest)
deque.rotate with type conversion: 6.853878974914551 seconds
np.roll with nparray: 6.0491721630096436 seconds
np.roll with type conversion: 27.558452129364014 seconds
List methods mentioned here:
l.append(l.pop(0)): 0.32483696937561035 seconds (fastest)
"shiftInPlace": 4.819645881652832 seconds
...
Timing code used is below.
collections.deque
Showing that creating deques from lists is O(n):
from collections import deque
import big_o
def create_deque_from_list(l):
return deque(l)
best, others = big_o.big_o(create_deque_from_list, lambda n: big_o.datagen.integers(n, -100, 100))
print best
# --> Linear: time = -2.6E-05 + 1.8E-08*n
If you need to create deque objects:
1M iterations # 6.853878974914551 seconds
setup_deque_rotate_with_create_deque = """
from collections import deque
import random
l = [random.random() for i in range(1000)]
"""
test_deque_rotate_with_create_deque = """
dl = deque(l)
dl.rotate(-1)
"""
timeit.timeit(test_deque_rotate_with_create_deque, setup_deque_rotate_with_create_deque)
If you already have deque objects:
1M iterations # 0.12380790710449219 seconds
setup_deque_rotate_alone = """
from collections import deque
import random
l = [random.random() for i in range(1000)]
dl = deque(l)
"""
test_deque_rotate_alone= """
dl.rotate(-1)
"""
timeit.timeit(test_deque_rotate_alone, setup_deque_rotate_alone)
np.roll
If you need to create nparrays
1M iterations # 27.558452129364014 seconds
setup_np_roll_with_create_npa = """
import numpy as np
import random
l = [random.random() for i in range(1000)]
"""
test_np_roll_with_create_npa = """
np.roll(l,-1) # implicit conversion of l to np.nparray
"""
If you already have nparrays:
1M iterations # 6.0491721630096436 seconds
setup_np_roll_alone = """
import numpy as np
import random
l = [random.random() for i in range(1000)]
npa = np.array(l)
"""
test_roll_alone = """
np.roll(npa,-1)
"""
timeit.timeit(test_roll_alone, setup_np_roll_alone)
"Shift in place"
Requires no type conversion
1M iterations # 4.819645881652832 seconds
setup_shift_in_place="""
import random
l = [random.random() for i in range(1000)]
def shiftInPlace(l, n):
n = n % len(l)
head = l[:n]
l[:n] = []
l.extend(head)
return l
"""
test_shift_in_place="""
shiftInPlace(l,-1)
"""
timeit.timeit(test_shift_in_place, setup_shift_in_place)
l.append(l.pop(0))
Requires no type conversion
1M iterations # 0.32483696937561035
setup_append_pop="""
import random
l = [random.random() for i in range(1000)]
"""
test_append_pop="""
l.append(l.pop(0))
"""
timeit.timeit(test_append_pop, setup_append_pop)
I also got interested in this and compared some of the suggested solutions with perfplot (a small project of mine).
It turns out that Kelly Bundy's suggestion
tmp = data[shift:]
tmp += data[:shift]
performs very well for all shifts.
Essentially, perfplot performs the shift for increasing large arrays and measures the time. Here are the results:
shift = 1:
shift = 100:
Code to reproduce the plot:
import numpy
import perfplot
import collections
shift = 100
def list_append(data):
return data[shift:] + data[:shift]
def list_append2(data):
tmp = data[shift:]
tmp += data[:shift]
return tmp
def shift_concatenate(data):
return numpy.concatenate([data[shift:], data[:shift]])
def roll(data):
return numpy.roll(data, -shift)
def collections_deque(data):
items = collections.deque(data)
items.rotate(-shift)
return items
def pop_append(data):
data = data.copy()
for _ in range(shift):
data.append(data.pop(0))
return data
b = perfplot.bench(
setup=lambda n: numpy.random.rand(n).tolist(),
kernels=[
list_append,
list_append2,
roll,
shift_concatenate,
collections_deque,
pop_append,
],
n_range=[2 ** k for k in range(7, 20)],
xlabel="len(data)",
)
b.show()
b.save("shift100.png")
If you just want to iterate over these sets of elements rather than construct a separate data structure, consider using iterators to construct a generator expression:
def shift(l,n):
return itertools.islice(itertools.cycle(l),n,n+len(l))
>>> list(shift([1,2,3],1))
[2, 3, 1]
This also depends on if you want to shift the list in place (mutating it), or if you want the function to return a new list. Because, according to my tests, something like this is at least twenty times faster than your implementation that adds two lists:
def shiftInPlace(l, n):
n = n % len(l)
head = l[:n]
l[:n] = []
l.extend(head)
return l
In fact, even adding a l = l[:] to the top of that to operate on a copy of the list passed in is still twice as fast.
Various implementations with some timing at http://gist.github.com/288272
For an immutable implementation, you could use something like this:
def shift(seq, n):
shifted_seq = []
for i in range(len(seq)):
shifted_seq.append(seq[(i-n) % len(seq)])
return shifted_seq
print shift([1, 2, 3, 4], 1)
Possibly a ringbuffer is more suitable. It is not a list, although it is likely that it can behave enough like a list for your purposes.
The problem is that the efficiency of a shift on a list is O(n), which becomes significant for large enough lists.
Shifting in a ringbuffer is simply updating the head location which is O(1)
If efficiency is your goal, (cycles? memory?) you may be better off looking at the array module: http://docs.python.org/library/array.html
Arrays do not have the overhead of lists.
As far as pure lists go though, what you have is about as good as you can hope to do.
I think you are looking for this:
a.insert(0, x)
Another alternative:
def move(arr, n):
return [arr[(idx-n) % len(arr)] for idx,_ in enumerate(arr)]
def solution(A, K):
if len(A) == 0:
return A
K = K % len(A)
return A[-K:] + A[:-K]
# use case
A = [1, 2, 3, 4, 5, 6]
K = 3
print(solution(A, K))
For example, given
A = [3, 8, 9, 7, 6]
K = 3
the function should return [9, 7, 6, 3, 8]. Three rotations were made:
[3, 8, 9, 7, 6] -> [6, 3, 8, 9, 7]
[6, 3, 8, 9, 7] -> [7, 6, 3, 8, 9]
[7, 6, 3, 8, 9] -> [9, 7, 6, 3, 8]
For another example, given
A = [0, 0, 0]
K = 1
the function should return [0, 0, 0]
Given
A = [1, 2, 3, 4]
K = 4
the function should return [1, 2, 3, 4]
I take this cost model as a reference:
http://scripts.mit.edu/~6.006/fall07/wiki/index.php?title=Python_Cost_Model
Your method of slicing the list and concatenating two sub-lists are linear-time operations. I would suggest using pop, which is a constant-time operation, e.g.:
def shift(list, n):
for i in range(n)
temp = list.pop()
list.insert(0, temp)
I don't know if this is 'efficient', but it also works:
x = [1,2,3,4]
x.insert(0,x.pop())
EDIT: Hello again, I just found a big problem with this solution!
Consider the following code:
class MyClass():
def __init__(self):
self.classlist = []
def shift_classlist(self): # right-shift-operation
self.classlist.insert(0, self.classlist.pop())
if __name__ == '__main__':
otherlist = [1,2,3]
x = MyClass()
# this is where kind of a magic link is created...
x.classlist = otherlist
for ii in xrange(2): # just to do it 2 times
print '\n\n\nbefore shift:'
print ' x.classlist =', x.classlist
print ' otherlist =', otherlist
x.shift_classlist()
print 'after shift:'
print ' x.classlist =', x.classlist
print ' otherlist =', otherlist, '<-- SHOULD NOT HAVE BIN CHANGED!'
The shift_classlist() method executes the same code as my x.insert(0,x.pop())-solution, otherlist is a list indipendent from the class. After passing the content of otherlist to the MyClass.classlist list, calling the shift_classlist() also changes the otherlist list:
CONSOLE OUTPUT:
before shift:
x.classlist = [1, 2, 3]
otherlist = [1, 2, 3]
after shift:
x.classlist = [3, 1, 2]
otherlist = [3, 1, 2] <-- SHOULD NOT HAVE BIN CHANGED!
before shift:
x.classlist = [3, 1, 2]
otherlist = [3, 1, 2]
after shift:
x.classlist = [2, 3, 1]
otherlist = [2, 3, 1] <-- SHOULD NOT HAVE BIN CHANGED!
I use Python 2.7. I don't know if thats a bug, but I think it's more likely that I missunderstood something here.
Does anyone of you know why this happens?
The following method is O(n) in place with constant auxiliary memory:
def rotate(arr, shift):
pivot = shift % len(arr)
dst = 0
src = pivot
while (dst != src):
arr[dst], arr[src] = arr[src], arr[dst]
dst += 1
src += 1
if src == len(arr):
src = pivot
elif dst == pivot:
pivot = src
Note that in python, this approach is horribly inefficient compared to others as it can't take advantage of native implementations of any of the pieces.
I have similar thing. For example, to shift by two...
def Shift(*args):
return args[len(args)-2:]+args[:len(args)-2]
I think you've got the most efficient way
def shift(l,n):
n = n % len(l)
return l[-U:] + l[:-U]
Jon Bentley in Programming Pearls (Column 2) describes an elegant and efficient algorithm for rotating an n-element vector x left by i positions:
Let's view the problem as transforming the array ab into the array
ba, but let's also assume that we have a function that reverses the
elements in a specified portion of the array. Starting with ab, we
reverse a to get arb, reverse b to get
arbr, and then reverse the whole
thing to get (arbr)r,
which is exactly ba. This results in the following code for
rotation:
reverse(0, i-1)
reverse(i, n-1)
reverse(0, n-1)
This can be translated to Python as follows:
def rotate(x, i):
i %= len(x)
x[:i] = reversed(x[:i])
x[i:] = reversed(x[i:])
x[:] = reversed(x)
return x
Demo:
>>> def rotate(x, i):
... i %= len(x)
... x[:i] = reversed(x[:i])
... x[i:] = reversed(x[i:])
... x[:] = reversed(x)
... return x
...
>>> rotate(list('abcdefgh'), 1)
['b', 'c', 'd', 'e', 'f', 'g', 'h', 'a']
>>> rotate(list('abcdefgh'), 3)
['d', 'e', 'f', 'g', 'h', 'a', 'b', 'c']
>>> rotate(list('abcdefgh'), 8)
['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
>>> rotate(list('abcdefgh'), 9)
['b', 'c', 'd', 'e', 'f', 'g', 'h', 'a']
I was looking for in place solution to this problem. This solves the purpose in O(k).
def solution(self, list, k):
r=len(list)-1
i = 0
while i<k:
temp = list[0]
list[0:r] = list[1:r+1]
list[r] = temp
i+=1
return list
What is the use case? Often, we don't actually need a fully shifted array --we just need to access a handful of elements in the shifted array.
Getting Python slices is runtime O(k) where k is the slice, so a sliced rotation is runtime N. The deque rotation command is also O(k). Can we do better?
Consider an array that is extremely large (let's say, so large it would be computationally slow to slice it). An alternative solution would be to leave the original array alone and simply calculate the index of the item that would have existed in our desired index after a shift of some kind.
Accessing a shifted element thus becomes O(1).
def get_shifted_element(original_list, shift_to_left, index_in_shifted):
# back calculate the original index by reversing the left shift
idx_original = (index_in_shifted + shift_to_left) % len(original_list)
return original_list[idx_original]
my_list = [1, 2, 3, 4, 5]
print get_shifted_element(my_list, 1, 2) ----> outputs 4
print get_shifted_element(my_list, -2, 3) -----> outputs 2
Following function copies sent list to a templist, so that pop function does not affect the original list:
def shift(lst, n, toreverse=False):
templist = []
for i in lst: templist.append(i)
if toreverse:
for i in range(n): templist = [templist.pop()]+templist
else:
for i in range(n): templist = templist+[templist.pop(0)]
return templist
Testing:
lst = [1,2,3,4,5]
print("lst=", lst)
print("shift by 1:", shift(lst,1))
print("lst=", lst)
print("shift by 7:", shift(lst,7))
print("lst=", lst)
print("shift by 1 reverse:", shift(lst,1, True))
print("lst=", lst)
print("shift by 7 reverse:", shift(lst,7, True))
print("lst=", lst)
Output:
lst= [1, 2, 3, 4, 5]
shift by 1: [2, 3, 4, 5, 1]
lst= [1, 2, 3, 4, 5]
shift by 7: [3, 4, 5, 1, 2]
lst= [1, 2, 3, 4, 5]
shift by 1 reverse: [5, 1, 2, 3, 4]
lst= [1, 2, 3, 4, 5]
shift by 7 reverse: [4, 5, 1, 2, 3]
lst= [1, 2, 3, 4, 5]
For a list X = ['a', 'b', 'c', 'd', 'e', 'f'] and a desired shift value of shift less than list length, we can define the function list_shift() as below
def list_shift(my_list, shift):
assert shift < len(my_list)
return my_list[shift:] + my_list[:shift]
Examples,
list_shift(X,1) returns ['b', 'c', 'd', 'e', 'f', 'a']
list_shift(X,3) returns ['d', 'e', 'f', 'a', 'b', 'c']
I'm "old school" I define efficiency in lowest latency, processor time and memory usage, our nemesis are the bloated libraries. So there is exactly one right way:
def rotatel(nums):
back = nums.pop(0)
nums.append(back)
return nums
Below is an efficient algorithm that doesn't require the use of any additional data structure:
def rotate(nums: List[int], k: int):
k = k%len(nums)
l, r = 0, len(nums)-1
while (l<r):
nums[l], nums[r]= nums[r], nums[l]
l,r=l+1,r-1
l,r = 0, k-1
while (l<r):
nums[l], nums[r]=nums[r], nums[l]
l,r=l+1,r-1
l,r=k,len(nums)-1
while (l<r):
nums[l], nums[r]=nums[r], nums[l]
l,r=l+1,r-1
Related
I have a list of elements, N, that I want to run through repeatedly until it has M elements, where M can be any integer, not necessarily an integer multiple of N.
I found two ways to do this but neither seems as direct as I would expect is possible. Is there a more native way to do this in Python?
Here is what I came up with. Note that the first method was faster in limited testing.
from math import ceil
def repeat_list_1(values, n):
repeated = values * ceil(n / len(values))
return repeated[:n]
from itertools import cycle
def repeat_list_2(values, n):
values_cycler = cycle(values)
repeated = [next(values_cycler) for _ in range(n)]
return repeated
values = list(range(5))
n = 12
repeated1 = repeat_list_1(values, n)
repeated2 = repeat_list_2(values, n)
assert repeated1 == repeated2
print(repeated1)
[0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2]
As already stated, there are multiple ways to achieve that, you have to pick your favorite one based on some criteria (speed, code simplicity, ...).
Here's one that I find simple (although it's old style - it doesn't use generators):
>>> def repeat_list_4(values, n):
... d, m = divmod(n, len(values))
... return values * d + values[:m]
...
>>>
>>> repeat_list_4(list(range(5)), 12)
[0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1]
>>> repeat_list_4(list(range(5)), 5)
[0, 1, 2, 3, 4]
>>> repeat_list_4(list(range(5)), 1)
[0]
>>> repeat_list_4(list(range(5)), 0)
[]
As a side note, in your example there's n = 12, but the resulting list has 13 elements.
You can use islice to get the first n values of an iterable (called take in some other languages):
from itertools import cycle, islice
def repeat_list_3(values, n):
return list(islice(cycle(values), n))
I don't know whether this is efficient or not but you can do this.
l = [0, 1, 2, 3, 4]
N = 12
k = (l * (N // len(l) + len(l) - N % len(l)))[:N]
print(k)
# [0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1]
First you extend the list upto N + k number of terms where N + k is the factor of len(l). Finally you slice the resulting list to get N elements.
numpy has a handy function:
np.resize(values, n)
but you would need to convert it back from np.ndarray to list
I've done this a couple of times using zip() to define a limit while iterating cyclically through a list.
An example that uses itertools:
from itertools import cycle
values = ['a', 'b', 'c', 1]
n = 10
pool = cycle(values)
for item, limit in zip(pool, range(n)):
print(item)
A way to do this without importing is to iterate cyclically without using itertools, which can be done easily using a generator.
values = ['a', 'b', 'c', 1]
def circular():
while True:
for connection in values:
yield connection
n = 10
for item, limit in zip(circular(), range(n)):
print(item)
I do think however that this solution is clunky and doesn't use zip() as it is intended to be used.
I also took the code from your second example and replaced cycle with a generator to avoid having to import anything.
values = ['a', 'b', 'c', 1]
n = 10
def circular():
while True:
for connection in values:
yield connection
def repeat_list_2(values, n):
values_cycler = circular()
repeated = [next(values_cycler) for _ in range(n)]
return repeated
print(repeat_list_2(values, n))
What is the most efficient way to rotate a list in python?
Right now I have something like this:
>>> def rotate(l, n):
... return l[n:] + l[:n]
...
>>> l = [1,2,3,4]
>>> rotate(l,1)
[2, 3, 4, 1]
>>> rotate(l,2)
[3, 4, 1, 2]
>>> rotate(l,0)
[1, 2, 3, 4]
>>> rotate(l,-1)
[4, 1, 2, 3]
Is there a better way?
A collections.deque is optimized for pulling and pushing on both ends. They even have a dedicated rotate() method.
from collections import deque
items = deque([1, 2])
items.append(3) # deque == [1, 2, 3]
items.rotate(1) # The deque is now: [3, 1, 2]
items.rotate(-1) # Returns deque to original state: [1, 2, 3]
item = items.popleft() # deque == [2, 3]
What about just using pop(0)?
list.pop([i])
Remove the item at the given position in the list, and return it. If
no index is specified, a.pop() removes and returns the last item in
the list. (The square brackets around the i in the method signature
denote that the parameter is optional, not that you should type square
brackets at that position. You will see this notation frequently in
the Python Library Reference.)
Numpy can do this using the roll command:
>>> import numpy
>>> a=numpy.arange(1,10) #Generate some data
>>> numpy.roll(a,1)
array([9, 1, 2, 3, 4, 5, 6, 7, 8])
>>> numpy.roll(a,-1)
array([2, 3, 4, 5, 6, 7, 8, 9, 1])
>>> numpy.roll(a,5)
array([5, 6, 7, 8, 9, 1, 2, 3, 4])
>>> numpy.roll(a,9)
array([1, 2, 3, 4, 5, 6, 7, 8, 9])
It depends on what you want to have happen when you do this:
>>> shift([1,2,3], 14)
You might want to change your:
def shift(seq, n):
return seq[n:]+seq[:n]
to:
def shift(seq, n):
n = n % len(seq)
return seq[n:] + seq[:n]
Simplest way I can think of:
a.append(a.pop(0))
Just some notes on timing:
If you're starting with a list, l.append(l.pop(0)) is the fastest method you can use. This can be shown with time complexity alone:
deque.rotate is O(k) (k=number of elements)
list to deque conversion is O(n)
list.append and list.pop are both O(1)
So if you are starting with deque objects, you can deque.rotate() at the cost of O(k). But, if the starting point is a list, the time complexity of using deque.rotate() is O(n). l.append(l.pop(0) is faster at O(1).
Just for the sake of illustration, here are some sample timings on 1M iterations:
Methods which require type conversion:
deque.rotate with deque object: 0.12380790710449219 seconds (fastest)
deque.rotate with type conversion: 6.853878974914551 seconds
np.roll with nparray: 6.0491721630096436 seconds
np.roll with type conversion: 27.558452129364014 seconds
List methods mentioned here:
l.append(l.pop(0)): 0.32483696937561035 seconds (fastest)
"shiftInPlace": 4.819645881652832 seconds
...
Timing code used is below.
collections.deque
Showing that creating deques from lists is O(n):
from collections import deque
import big_o
def create_deque_from_list(l):
return deque(l)
best, others = big_o.big_o(create_deque_from_list, lambda n: big_o.datagen.integers(n, -100, 100))
print best
# --> Linear: time = -2.6E-05 + 1.8E-08*n
If you need to create deque objects:
1M iterations # 6.853878974914551 seconds
setup_deque_rotate_with_create_deque = """
from collections import deque
import random
l = [random.random() for i in range(1000)]
"""
test_deque_rotate_with_create_deque = """
dl = deque(l)
dl.rotate(-1)
"""
timeit.timeit(test_deque_rotate_with_create_deque, setup_deque_rotate_with_create_deque)
If you already have deque objects:
1M iterations # 0.12380790710449219 seconds
setup_deque_rotate_alone = """
from collections import deque
import random
l = [random.random() for i in range(1000)]
dl = deque(l)
"""
test_deque_rotate_alone= """
dl.rotate(-1)
"""
timeit.timeit(test_deque_rotate_alone, setup_deque_rotate_alone)
np.roll
If you need to create nparrays
1M iterations # 27.558452129364014 seconds
setup_np_roll_with_create_npa = """
import numpy as np
import random
l = [random.random() for i in range(1000)]
"""
test_np_roll_with_create_npa = """
np.roll(l,-1) # implicit conversion of l to np.nparray
"""
If you already have nparrays:
1M iterations # 6.0491721630096436 seconds
setup_np_roll_alone = """
import numpy as np
import random
l = [random.random() for i in range(1000)]
npa = np.array(l)
"""
test_roll_alone = """
np.roll(npa,-1)
"""
timeit.timeit(test_roll_alone, setup_np_roll_alone)
"Shift in place"
Requires no type conversion
1M iterations # 4.819645881652832 seconds
setup_shift_in_place="""
import random
l = [random.random() for i in range(1000)]
def shiftInPlace(l, n):
n = n % len(l)
head = l[:n]
l[:n] = []
l.extend(head)
return l
"""
test_shift_in_place="""
shiftInPlace(l,-1)
"""
timeit.timeit(test_shift_in_place, setup_shift_in_place)
l.append(l.pop(0))
Requires no type conversion
1M iterations # 0.32483696937561035
setup_append_pop="""
import random
l = [random.random() for i in range(1000)]
"""
test_append_pop="""
l.append(l.pop(0))
"""
timeit.timeit(test_append_pop, setup_append_pop)
I also got interested in this and compared some of the suggested solutions with perfplot (a small project of mine).
It turns out that Kelly Bundy's suggestion
tmp = data[shift:]
tmp += data[:shift]
performs very well for all shifts.
Essentially, perfplot performs the shift for increasing large arrays and measures the time. Here are the results:
shift = 1:
shift = 100:
Code to reproduce the plot:
import numpy
import perfplot
import collections
shift = 100
def list_append(data):
return data[shift:] + data[:shift]
def list_append2(data):
tmp = data[shift:]
tmp += data[:shift]
return tmp
def shift_concatenate(data):
return numpy.concatenate([data[shift:], data[:shift]])
def roll(data):
return numpy.roll(data, -shift)
def collections_deque(data):
items = collections.deque(data)
items.rotate(-shift)
return items
def pop_append(data):
data = data.copy()
for _ in range(shift):
data.append(data.pop(0))
return data
b = perfplot.bench(
setup=lambda n: numpy.random.rand(n).tolist(),
kernels=[
list_append,
list_append2,
roll,
shift_concatenate,
collections_deque,
pop_append,
],
n_range=[2 ** k for k in range(7, 20)],
xlabel="len(data)",
)
b.show()
b.save("shift100.png")
If you just want to iterate over these sets of elements rather than construct a separate data structure, consider using iterators to construct a generator expression:
def shift(l,n):
return itertools.islice(itertools.cycle(l),n,n+len(l))
>>> list(shift([1,2,3],1))
[2, 3, 1]
This also depends on if you want to shift the list in place (mutating it), or if you want the function to return a new list. Because, according to my tests, something like this is at least twenty times faster than your implementation that adds two lists:
def shiftInPlace(l, n):
n = n % len(l)
head = l[:n]
l[:n] = []
l.extend(head)
return l
In fact, even adding a l = l[:] to the top of that to operate on a copy of the list passed in is still twice as fast.
Various implementations with some timing at http://gist.github.com/288272
For an immutable implementation, you could use something like this:
def shift(seq, n):
shifted_seq = []
for i in range(len(seq)):
shifted_seq.append(seq[(i-n) % len(seq)])
return shifted_seq
print shift([1, 2, 3, 4], 1)
Possibly a ringbuffer is more suitable. It is not a list, although it is likely that it can behave enough like a list for your purposes.
The problem is that the efficiency of a shift on a list is O(n), which becomes significant for large enough lists.
Shifting in a ringbuffer is simply updating the head location which is O(1)
If efficiency is your goal, (cycles? memory?) you may be better off looking at the array module: http://docs.python.org/library/array.html
Arrays do not have the overhead of lists.
As far as pure lists go though, what you have is about as good as you can hope to do.
I think you are looking for this:
a.insert(0, x)
Another alternative:
def move(arr, n):
return [arr[(idx-n) % len(arr)] for idx,_ in enumerate(arr)]
def solution(A, K):
if len(A) == 0:
return A
K = K % len(A)
return A[-K:] + A[:-K]
# use case
A = [1, 2, 3, 4, 5, 6]
K = 3
print(solution(A, K))
For example, given
A = [3, 8, 9, 7, 6]
K = 3
the function should return [9, 7, 6, 3, 8]. Three rotations were made:
[3, 8, 9, 7, 6] -> [6, 3, 8, 9, 7]
[6, 3, 8, 9, 7] -> [7, 6, 3, 8, 9]
[7, 6, 3, 8, 9] -> [9, 7, 6, 3, 8]
For another example, given
A = [0, 0, 0]
K = 1
the function should return [0, 0, 0]
Given
A = [1, 2, 3, 4]
K = 4
the function should return [1, 2, 3, 4]
I take this cost model as a reference:
http://scripts.mit.edu/~6.006/fall07/wiki/index.php?title=Python_Cost_Model
Your method of slicing the list and concatenating two sub-lists are linear-time operations. I would suggest using pop, which is a constant-time operation, e.g.:
def shift(list, n):
for i in range(n)
temp = list.pop()
list.insert(0, temp)
I don't know if this is 'efficient', but it also works:
x = [1,2,3,4]
x.insert(0,x.pop())
EDIT: Hello again, I just found a big problem with this solution!
Consider the following code:
class MyClass():
def __init__(self):
self.classlist = []
def shift_classlist(self): # right-shift-operation
self.classlist.insert(0, self.classlist.pop())
if __name__ == '__main__':
otherlist = [1,2,3]
x = MyClass()
# this is where kind of a magic link is created...
x.classlist = otherlist
for ii in xrange(2): # just to do it 2 times
print '\n\n\nbefore shift:'
print ' x.classlist =', x.classlist
print ' otherlist =', otherlist
x.shift_classlist()
print 'after shift:'
print ' x.classlist =', x.classlist
print ' otherlist =', otherlist, '<-- SHOULD NOT HAVE BIN CHANGED!'
The shift_classlist() method executes the same code as my x.insert(0,x.pop())-solution, otherlist is a list indipendent from the class. After passing the content of otherlist to the MyClass.classlist list, calling the shift_classlist() also changes the otherlist list:
CONSOLE OUTPUT:
before shift:
x.classlist = [1, 2, 3]
otherlist = [1, 2, 3]
after shift:
x.classlist = [3, 1, 2]
otherlist = [3, 1, 2] <-- SHOULD NOT HAVE BIN CHANGED!
before shift:
x.classlist = [3, 1, 2]
otherlist = [3, 1, 2]
after shift:
x.classlist = [2, 3, 1]
otherlist = [2, 3, 1] <-- SHOULD NOT HAVE BIN CHANGED!
I use Python 2.7. I don't know if thats a bug, but I think it's more likely that I missunderstood something here.
Does anyone of you know why this happens?
The following method is O(n) in place with constant auxiliary memory:
def rotate(arr, shift):
pivot = shift % len(arr)
dst = 0
src = pivot
while (dst != src):
arr[dst], arr[src] = arr[src], arr[dst]
dst += 1
src += 1
if src == len(arr):
src = pivot
elif dst == pivot:
pivot = src
Note that in python, this approach is horribly inefficient compared to others as it can't take advantage of native implementations of any of the pieces.
I have similar thing. For example, to shift by two...
def Shift(*args):
return args[len(args)-2:]+args[:len(args)-2]
I think you've got the most efficient way
def shift(l,n):
n = n % len(l)
return l[-U:] + l[:-U]
Jon Bentley in Programming Pearls (Column 2) describes an elegant and efficient algorithm for rotating an n-element vector x left by i positions:
Let's view the problem as transforming the array ab into the array
ba, but let's also assume that we have a function that reverses the
elements in a specified portion of the array. Starting with ab, we
reverse a to get arb, reverse b to get
arbr, and then reverse the whole
thing to get (arbr)r,
which is exactly ba. This results in the following code for
rotation:
reverse(0, i-1)
reverse(i, n-1)
reverse(0, n-1)
This can be translated to Python as follows:
def rotate(x, i):
i %= len(x)
x[:i] = reversed(x[:i])
x[i:] = reversed(x[i:])
x[:] = reversed(x)
return x
Demo:
>>> def rotate(x, i):
... i %= len(x)
... x[:i] = reversed(x[:i])
... x[i:] = reversed(x[i:])
... x[:] = reversed(x)
... return x
...
>>> rotate(list('abcdefgh'), 1)
['b', 'c', 'd', 'e', 'f', 'g', 'h', 'a']
>>> rotate(list('abcdefgh'), 3)
['d', 'e', 'f', 'g', 'h', 'a', 'b', 'c']
>>> rotate(list('abcdefgh'), 8)
['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
>>> rotate(list('abcdefgh'), 9)
['b', 'c', 'd', 'e', 'f', 'g', 'h', 'a']
I was looking for in place solution to this problem. This solves the purpose in O(k).
def solution(self, list, k):
r=len(list)-1
i = 0
while i<k:
temp = list[0]
list[0:r] = list[1:r+1]
list[r] = temp
i+=1
return list
What is the use case? Often, we don't actually need a fully shifted array --we just need to access a handful of elements in the shifted array.
Getting Python slices is runtime O(k) where k is the slice, so a sliced rotation is runtime N. The deque rotation command is also O(k). Can we do better?
Consider an array that is extremely large (let's say, so large it would be computationally slow to slice it). An alternative solution would be to leave the original array alone and simply calculate the index of the item that would have existed in our desired index after a shift of some kind.
Accessing a shifted element thus becomes O(1).
def get_shifted_element(original_list, shift_to_left, index_in_shifted):
# back calculate the original index by reversing the left shift
idx_original = (index_in_shifted + shift_to_left) % len(original_list)
return original_list[idx_original]
my_list = [1, 2, 3, 4, 5]
print get_shifted_element(my_list, 1, 2) ----> outputs 4
print get_shifted_element(my_list, -2, 3) -----> outputs 2
Following function copies sent list to a templist, so that pop function does not affect the original list:
def shift(lst, n, toreverse=False):
templist = []
for i in lst: templist.append(i)
if toreverse:
for i in range(n): templist = [templist.pop()]+templist
else:
for i in range(n): templist = templist+[templist.pop(0)]
return templist
Testing:
lst = [1,2,3,4,5]
print("lst=", lst)
print("shift by 1:", shift(lst,1))
print("lst=", lst)
print("shift by 7:", shift(lst,7))
print("lst=", lst)
print("shift by 1 reverse:", shift(lst,1, True))
print("lst=", lst)
print("shift by 7 reverse:", shift(lst,7, True))
print("lst=", lst)
Output:
lst= [1, 2, 3, 4, 5]
shift by 1: [2, 3, 4, 5, 1]
lst= [1, 2, 3, 4, 5]
shift by 7: [3, 4, 5, 1, 2]
lst= [1, 2, 3, 4, 5]
shift by 1 reverse: [5, 1, 2, 3, 4]
lst= [1, 2, 3, 4, 5]
shift by 7 reverse: [4, 5, 1, 2, 3]
lst= [1, 2, 3, 4, 5]
For a list X = ['a', 'b', 'c', 'd', 'e', 'f'] and a desired shift value of shift less than list length, we can define the function list_shift() as below
def list_shift(my_list, shift):
assert shift < len(my_list)
return my_list[shift:] + my_list[:shift]
Examples,
list_shift(X,1) returns ['b', 'c', 'd', 'e', 'f', 'a']
list_shift(X,3) returns ['d', 'e', 'f', 'a', 'b', 'c']
I'm "old school" I define efficiency in lowest latency, processor time and memory usage, our nemesis are the bloated libraries. So there is exactly one right way:
def rotatel(nums):
back = nums.pop(0)
nums.append(back)
return nums
Below is an efficient algorithm that doesn't require the use of any additional data structure:
def rotate(nums: List[int], k: int):
k = k%len(nums)
l, r = 0, len(nums)-1
while (l<r):
nums[l], nums[r]= nums[r], nums[l]
l,r=l+1,r-1
l,r = 0, k-1
while (l<r):
nums[l], nums[r]=nums[r], nums[l]
l,r=l+1,r-1
l,r=k,len(nums)-1
while (l<r):
nums[l], nums[r]=nums[r], nums[l]
l,r=l+1,r-1
I'm supposed to create a function, which input is a list and two numbers, the function reverses the sublist which its place is indicated by the two numbers.
for example this is what it's supposed to do:
>>> lst = [1, 2, 3, 4, 5]
>>> reverse_sublist (lst,0,4)
>>> lst [4, 3, 2, 1, 5]
I created a function and it works, but I'm not sure is it's in place.
This is my code:
def reverse_sublist(lst,start,end):
sublist=lst[start:end]
sublist.reverse()
lst[start:end]=sublist
print(lst)
def reverse_sublist(lst,start,end):
lst[start:end] = lst[start:end][::-1]
return lst
Partial reverse with no temporary list (replace range with xrange if you use Python 2):
def partial_reverse(list_, from_, to):
for i in range(0, int((to - from_)/2)):
(list_[from_+i], list_[to-i]) = (list_[to-i], list_[from_+i])
list_ = [1, 2, 3, 4, 5, 6, 7, 8]
partial_reverse(list_, 3, 7)
print(list_)
Easiest way to reverse a list in a partial or complete manner.
listVar = ['a','b','c','d']
def listReverse(list,start,end):
while(start<end):
temp = list[start]
list[start] = list[end] #Swaping
list[end]=temp
start+=1
end-=1
print(list)
listReverse(listVar,1,3)
Output : - ['a', 'd', 'c', 'b']
Not sure if you have a similar problem as mine, but i needed to reverse a list in place.
The only piece I was missing was [:]
exStr = "String"
def change(var):
var[:] = var[::-1] # This line here
print(exStr) #"String"
change(exStr)
print(exStr) #"gnirtS"
... I'm not sure is it's in place.
...
lst[start:end]=sublist
Yes, it's in place. lst is never rebound, only its object mutated.
Just use a slice:
>>> lst = [1, 2, 3, 4, 5]
>>> lst[0:len(lst[3::-1])]=lst[3::-1]
>>> lst
[4, 3, 2, 1, 5]
Or, perhaps easier to understand:
>>> lst = [1, 2, 3, 4, 5]
>>> sl=lst[3::-1]
>>> lst[0:len(sl)]=sl
>>> lst
[4, 3, 2, 1, 5]
lst[::-1] is the idiomatic way to reverse a list in Python, The following show how and that it was in-place:
>>> lst = [1, 2, 3, 4, 5]
>>> id(lst)
12229328
>>> lst[:] = lst[::-1]
>>> lst
[5, 4, 3, 2, 1]
>>> id(lst)
12229328
Try some crazy slicing, see Explain Python's slice notation and http://docs.python.org/2.3/whatsnew/section-slices.html
x = [1,2,3,4,5,6,7,8]
def sublist_reverse(start_rev, end_rev, lst):
return lst[:end_rev-1:start_rev-1]+lst[:[end_rev]
print sublist_reverse(0,4,x)
[out]:
[8, 7, 6, 5, 4, 3, 2, 1]
I have two ways for in-place reversal, the simple way is to loop through the list half-way, swapping the elements with the respective mirror-elements. By mirror-element I mean (first, last), (2nd, 2nd-last), (3rd, 3rd-last), etc.
def reverse_list(A):
for i in range(len(A) // 2): # half-way
A[i], A[len(A) - i - 1] = A[len(A) - i - 1], A[i] #swap
return A
The other way is similar to the above but using recursion as opposed to a "loop":
def reverse_list(A):
def rev(A, start, stop):
A[start], A[stop] = A[stop], A[start] # swap
if stop - start > 1: # until halfway
rev(A, start + 1, stop - 1)
return A
return rev(A, 0, len(A) - 1)
I've conducted a tiny experiment and it seems that any assignment to list slice causes memory allocation:
import resource
resource.setrlimit(resource.RLIMIT_AS, (64 * 1024, 64 * 1024))
try:
# Python 2
zrange = xrange
arr_size = 3 * 1024
except NameError:
# Python 3
zrange = range
arr_size = 4 * 1024
arr = list(zrange(arr_size))
# We could allocate additional 100 integers, so there should be enough free memory
# to allocate a couple of variables for indexes in the statement below
# additional_memory = list(zrange(100))
# MemoryError is raised here
arr[:] = zrange(arr_size)
So you have to use for loop to reverse a sublist in place.
PS: If you want to repeat this test, you should ensure that setrlimit RLIMIT_AS works fine on your platform. Also arr_size may vary for different python implementations.
Two methods in-place and constant memory:
def reverse_swap(arr, start=None, end=None):
"""
Swap two edge pointers until meeting in the center.
"""
if start is None:
start = 0
if end is None:
end = len(arr)
i = start
j = end - 1
while i < j:
arr[i], arr[j] = arr[j], arr[i]
i += 1
j -= 1
def reverse_slice(arr, start=None, end=None):
"""
Use python slice assignment but use a generator on the right-hand-side
instead of slice notation to prevent allocating another list.
"""
if start is None:
start = 0
if end is None:
end = len(arr)
arr[start:end] = (arr[i] for i in range(end - 1, start - 1, -1))
The simplest way is probably to use slice assignment and reversed():
lst[start:end] = reversed(lst[start:end])
You could also slice and reverse at the same time, however this generally requires either using negative indexes or specially handling the case when start = 0, i.e.:
lst[start:end] = lst[end-len(lst)-1:start-len(lst)-1:-1]
or
lst[start:end] = lst[end-1::-1] if start == 0 else lst[end-1:start-1:-1]
OTOH, using slicing alone is faster than the reversed() solution.
Much cleaner way to do this
a = [1,2,3,4,5,6,7,8,9]
i = 2
j = 7
while (i<j):
a[i],a[j]=a[j],a[i]
j -= 1
i += 1
print(a)
lst = [1,2,3,4,5,6,7,8]
Suppose you have to reverse 2nd position to 4th position in place.
lst[2:5] = lst[2:5][::-1]
Output:
[1,2,5,4,3,6,7,8]
So here's what I want to do: I have a list that contains several equivalence relations:
l = [[1, 2], [2, 3], [4, 5], [6, 7], [1, 7]]
And I want to union the sets that share one element. Here is a sample implementation:
def union(lis):
lis = [set(e) for e in lis]
res = []
while True:
for i in range(len(lis)):
a = lis[i]
if res == []:
res.append(a)
else:
pointer = 0
while pointer < len(res):
if a & res[pointer] != set([]) :
res[pointer] = res[pointer].union(a)
break
pointer +=1
if pointer == len(res):
res.append(a)
if res == lis:
break
lis,res = res,[]
return res
And it prints
[set([1, 2, 3, 6, 7]), set([4, 5])]
This does the right thing but is way too slow when the equivalence relations is too large. I looked up the descriptions on union-find algorithm: http://en.wikipedia.org/wiki/Disjoint-set_data_structure
but I still having problem coding a Python implementation.
Solution that runs in O(n) time
def indices_dict(lis):
d = defaultdict(list)
for i,(a,b) in enumerate(lis):
d[a].append(i)
d[b].append(i)
return d
def disjoint_indices(lis):
d = indices_dict(lis)
sets = []
while len(d):
que = set(d.popitem()[1])
ind = set()
while len(que):
ind |= que
que = set([y for i in que
for x in lis[i]
for y in d.pop(x, [])]) - ind
sets += [ind]
return sets
def disjoint_sets(lis):
return [set([x for i in s for x in lis[i]]) for s in disjoint_indices(lis)]
How it works:
>>> lis = [(1,2),(2,3),(4,5),(6,7),(1,7)]
>>> indices_dict(lis)
>>> {1: [0, 4], 2: [0, 1], 3: [1], 4: [2], 5: [2], 6: [3], 7: [3, 4]})
indices_dict gives a map from an equivalence # to an index in lis. E.g. 1 is mapped to index 0 and 4 in lis.
>>> disjoint_indices(lis)
>>> [set([0,1,3,4], set([2])]
disjoint_indices gives a list of disjoint sets of indices. Each set corresponds to indices in an equivalence. E.g. lis[0] and lis[3] are in the same equivalence but not lis[2].
>>> disjoint_set(lis)
>>> [set([1, 2, 3, 6, 7]), set([4, 5])]
disjoint_set converts disjoint indices into into their proper equivalences.
Time complexity
The O(n) time complexity is difficult to see but I'll try to explain. Here I will use n = len(lis).
indices_dict certainly runs in O(n) time because only 1 for-loop
disjoint_indices is the hardest to see. It certainly runs in O(len(d)) time since the outer loop stops when d is empty and the inner loop removes an element of d each iteration. now, the len(d) <= 2n since d is a map from equivalence #'s to indices and there are at most 2n different equivalence #'s in lis. Therefore, the function runs in O(n).
disjoint_sets is difficult to see because of the 3 combined for-loops. However, you'll notice that at most i can run over all n indices in lis and x runs over the 2-tuple, so the total complexity is 2n = O(n)
I think this is an elegant solution, using the built in set functions:
#!/usr/bin/python3
def union_find(lis):
lis = map(set, lis)
unions = []
for item in lis:
temp = []
for s in unions:
if not s.isdisjoint(item):
item = s.union(item)
else:
temp.append(s)
temp.append(item)
unions = temp
return unions
if __name__ == '__main__':
l = [[1, 2], [2, 3], [4, 5], [6, 7], [1, 7]]
print(union_find(l))
It returns a list of sets.
Perhaps something like this?
#!/usr/local/cpython-3.3/bin/python
import copy
import pprint
import collections
def union(list_):
dict_ = collections.defaultdict(set)
for sublist in list_:
dict_[sublist[0]].add(sublist[1])
dict_[sublist[1]].add(sublist[0])
change_made = True
while change_made:
change_made = False
for key, values in dict_.items():
for value in copy.copy(values):
for element in dict_[value]:
if element not in dict_[key]:
dict_[key].add(element)
change_made = True
return dict_
list_ = [ [1, 2], [2, 3], [4, 5], [6, 7], [1, 7] ]
pprint.pprint(union(list_))
This works by completely exhausting one equivalence at a time. When an element finds it's equivalence it is removed from the original set and no longer searched.
def equiv_sets(lis):
s = set(lis)
sets = []
#loop while there are still items in original set
while len(s):
s1 = set(s.pop())
length = 0
#loop while there are still equivalences to s1
while( len(s1) != length):
length = len(s1)
for v in list(s):
if v[0] in s1 or v[1] in s1:
s1 |= set(v)
s -= set([v])
sets += [s1]
return sets
print equiv_sets([(1,2),(2,3),(4,5),(6,7),(1,7)])
OUTPUT: [set([1, 2, 3, 6, 7]), set([4, 5])]
I want to take the difference between lists x and y:
>>> x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
>>> y = [1, 3, 5, 7, 9]
>>> x - y
# should return [0, 2, 4, 6, 8]
Use a list comprehension to compute the difference while maintaining the original order from x:
[item for item in x if item not in y]
If you don't need list properties (e.g. ordering), use a set difference, as the other answers suggest:
list(set(x) - set(y))
To allow x - y infix syntax, override __sub__ on a class inheriting from list:
class MyList(list):
def __init__(self, *args):
super(MyList, self).__init__(args)
def __sub__(self, other):
return self.__class__(*[item for item in self if item not in other])
Usage:
x = MyList(1, 2, 3, 4)
y = MyList(2, 5, 2)
z = x - y
Use set difference
>>> z = list(set(x) - set(y))
>>> z
[0, 8, 2, 4, 6]
Or you might just have x and y be sets so you don't have to do any conversions.
if duplicate and ordering items are problem :
[i for i in a if not i in b or b.remove(i)]
a = [1,2,3,3,3,3,4]
b = [1,3]
result: [2, 3, 3, 3, 4]
That is a "set subtraction" operation. Use the set data structure for that.
In Python 2.7:
x = {1,2,3,4,5,6,7,8,9,0}
y = {1,3,5,7,9}
print x - y
Output:
>>> print x - y
set([0, 8, 2, 4, 6])
For many use cases, the answer you want is:
ys = set(y)
[item for item in x if item not in ys]
This is a hybrid between aaronasterling's answer and quantumSoup's answer.
aaronasterling's version does len(y) item comparisons for each element in x, so it takes quadratic time. quantumSoup's version uses sets, so it does a single constant-time set lookup for each element in x—but, because it converts both x and y into sets, it loses the order of your elements.
By converting only y into a set, and iterating x in order, you get the best of both worlds—linear time, and order preservation.*
However, this still has a problem from quantumSoup's version: It requires your elements to be hashable. That's pretty much built into the nature of sets.** If you're trying to, e.g., subtract a list of dicts from another list of dicts, but the list to subtract is large, what do you do?
If you can decorate your values in some way that they're hashable, that solves the problem. For example, with a flat dictionary whose values are themselves hashable:
ys = {tuple(item.items()) for item in y}
[item for item in x if tuple(item.items()) not in ys]
If your types are a bit more complicated (e.g., often you're dealing with JSON-compatible values, which are hashable, or lists or dicts whose values are recursively the same type), you can still use this solution. But some types just can't be converted into anything hashable.
If your items aren't, and can't be made, hashable, but they are comparable, you can at least get log-linear time (O(N*log M), which is a lot better than the O(N*M) time of the list solution, but not as good as the O(N+M) time of the set solution) by sorting and using bisect:
ys = sorted(y)
def bisect_contains(seq, item):
index = bisect.bisect(seq, item)
return index < len(seq) and seq[index] == item
[item for item in x if bisect_contains(ys, item)]
If your items are neither hashable nor comparable, then you're stuck with the quadratic solution.
* Note that you could also do this by using a pair of OrderedSet objects, for which you can find recipes and third-party modules. But I think this is simpler.
** The reason set lookups are constant time is that all it has to do is hash the value and see if there's an entry for that hash. If it can't hash the value, this won't work.
If the lists allow duplicate elements, you can use Counter from collections:
from collections import Counter
result = list((Counter(x)-Counter(y)).elements())
If you need to preserve the order of elements from x:
result = [ v for c in [Counter(y)] for v in x if not c[v] or c.subtract([v]) ]
The other solutions have one of a few problems:
They don't preserve order, or
They don't remove a precise count of elements, e.g. for x = [1, 2, 2, 2] and y = [2, 2] they convert y to a set, and either remove all matching elements (leaving [1] only) or remove one of each unique element (leaving [1, 2, 2]), when the proper behavior would be to remove 2 twice, leaving [1, 2], or
They do O(m * n) work, where an optimal solution can do O(m + n) work
Alain was on the right track with Counter to solve #2 and #3, but that solution will lose ordering. The solution that preserves order (removing the first n copies of each value for n repetitions in the list of values to remove) is:
from collections import Counter
x = [1,2,3,4,3,2,1]
y = [1,2,2]
remaining = Counter(y)
out = []
for val in x:
if remaining[val]:
remaining[val] -= 1
else:
out.append(val)
# out is now [3, 4, 3, 1], having removed the first 1 and both 2s.
Try it online!
To make it remove the last copies of each element, just change the for loop to for val in reversed(x): and add out.reverse() immediately after exiting the for loop.
Constructing the Counter is O(n) in terms of y's length, iterating x is O(n) in terms of x's length, and Counter membership testing and mutation are O(1), while list.append is amortized O(1) (a given append can be O(n), but for many appends, the overall big-O averages O(1) since fewer and fewer of them require a reallocation), so the overall work done is O(m + n).
You can also test for to determine if there were any elements in y that were not removed from x by testing:
remaining = +remaining # Removes all keys with zero counts from Counter
if remaining:
# remaining contained elements with non-zero counts
Looking up values in sets are faster than looking them up in lists:
[item for item in x if item not in set(y)]
I believe this will scale slightly better than:
[item for item in x if item not in y]
Both preserve the order of the lists.
We can use set methods as well to find the difference between two list
x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]
y = [1, 3, 5, 7, 9]
list(set(x).difference(y))
[0, 2, 4, 6, 8]
Try this.
def subtract_lists(a, b):
""" Subtracts two lists. Throws ValueError if b contains items not in a """
# Terminate if b is empty, otherwise remove b[0] from a and recurse
return a if len(b) == 0 else [a[:i] + subtract_lists(a[i+1:], b[1:])
for i in [a.index(b[0])]][0]
>>> x = [1,2,3,4,5,6,7,8,9,0]
>>> y = [1,3,5,7,9]
>>> subtract_lists(x,y)
[2, 4, 6, 8, 0]
>>> x = [1,2,3,4,5,6,7,8,9,0,9]
>>> subtract_lists(x,y)
[2, 4, 6, 8, 0, 9] #9 is only deleted once
>>>
The answer provided by #aaronasterling looks good, however, it is not compatible with the default interface of list: x = MyList(1, 2, 3, 4) vs x = MyList([1, 2, 3, 4]). Thus, the below code can be used as a more python-list friendly:
class MyList(list):
def __init__(self, *args):
super(MyList, self).__init__(*args)
def __sub__(self, other):
return self.__class__([item for item in self if item not in other])
Example:
x = MyList([1, 2, 3, 4])
y = MyList([2, 5, 2])
z = x - y
from collections import Counter
y = Counter(y)
x = Counter(x)
print(list(x-y))
Let:
>>> xs = [1, 2, 3, 4, 3, 2, 1]
>>> ys = [1, 3, 3]
Keep each unique item only once xs - ys == {2, 4}
Take the set difference:
>>> set(xs) - set(ys)
{2, 4}
Remove all occurrences xs - ys == [2, 4, 2]
>>> [x for x in xs if x not in ys]
[2, 4, 2]
If ys is large, convert only1 ys into a set for better performance:
>>> ys_set = set(ys)
>>> [x for x in xs if x not in ys_set]
[2, 4, 2]
Only remove same number of occurrences xs - ys == [2, 4, 2, 1]
from collections import Counter, defaultdict
def diff(xs, ys):
counter = Counter(ys)
for x in xs:
if counter[x] > 0:
counter[x] -= 1
continue
yield x
>>> list(diff(xs, ys))
[2, 4, 2, 1]
1 Converting xs to set and taking the set difference is unnecessary (and slower, as well as order-destroying) since we only need to iterate once over xs.
This example subtracts two lists:
# List of pairs of points
list = []
list.append([(602, 336), (624, 365)])
list.append([(635, 336), (654, 365)])
list.append([(642, 342), (648, 358)])
list.append([(644, 344), (646, 356)])
list.append([(653, 337), (671, 365)])
list.append([(728, 13), (739, 32)])
list.append([(756, 59), (767, 79)])
itens_to_remove = []
itens_to_remove.append([(642, 342), (648, 358)])
itens_to_remove.append([(644, 344), (646, 356)])
print("Initial List Size: ", len(list))
for a in itens_to_remove:
for b in list:
if a == b :
list.remove(b)
print("Final List Size: ", len(list))
list1 = ['a', 'c', 'a', 'b', 'k']
list2 = ['a', 'a', 'a', 'a', 'b', 'c', 'c', 'd', 'e', 'f']
for e in list1:
try:
list2.remove(e)
except ValueError:
print(f'{e} not in list')
list2
# ['a', 'a', 'c', 'd', 'e', 'f']
This will change list2. if you want to protect list2 just copy it and use the copy of list2 in this code.
def listsubtraction(parent,child):
answer=[]
for element in parent:
if element not in child:
answer.append(element)
return answer
I think this should work. I am a beginner so pardon me for any mistakes