How to improve the following code?
1 )-> pass a 1-dimensional array of length 2**n
2 )-> for every index of the array get the binary representation
3 )-> reverse the binary represenation and use it as new integer index for the corresponding value
EXAMPLE:
[56,209,81,42]
[00,01,10,11] (binary representation of the indices)
-> REVERSE:[00,10,01,11]
-> BECOMES:[56,81,209,42]
Code:
def order_in_reversed_bits(data: np.ndarray) -> np.ndarray:
tobinary = lambda t: np.binary_repr(t, width=len(np.binary_repr(data.shape[0]-1)))[::-1]
func = np.vectorize(tobinary)
a = func(np.arange(0,data.shape[0]))
t = np.zeros(data.shape,dtype='float64')
for i,k in enumerate(a):
t[int(k,2)] = data[i]
return t
Which built-in functionality of Numpy or Python is handy?
You could use sorted with custom key, for example (thanks to #hpaulj for improved key function with bin()):
lst = [56,209,81,42]
def order_in_reversed_bits_python(lst):
return [v for _, v in sorted(enumerate(lst), key=lambda k: bin(k[0])[:1:-1])]
print(order_in_reversed_bits_python(lst))
Prints:
[56, 81, 209, 42]
Timings:
import timeit
from random import randint
def order_in_reversed_bits_python(lst):
return [v for _, v in sorted(enumerate(lst), key=lambda k: bin(k[0])[:1:-1])]
def order_in_reversed_bits(data):
tobinary = lambda t: np.binary_repr(t, width=len(np.binary_repr(data.shape[0]-1)))[::-1]
func = np.vectorize(tobinary)
a = func(np.arange(0,data.shape[0]))
t = np.zeros(data.shape,dtype='float64')
for i,k in enumerate(a):
t[int(k,2)] = data[i]
return t
# create some large array:
lst = np.array([randint(1, 100) for _ in range(2**16)])
t1 = timeit.timeit(lambda: order_in_reversed_bits_python(lst), number=1)
t2 = timeit.timeit(lambda: order_in_reversed_bits(lst), number=1)
print(t1)
print(t2)
Prints:
0.05821935099811526
0.22723246600071434
which is improvement ~3.9x
This problem is known as bit-reversal permutation. The only difficult part is to reverse the binary representation of an index. Here you will find ways to do it. I choose the simplest one:
def bit_reversal_permutation(n):
indices = range(2**n)
rev_bits = lambda x: int(format(x, f'0{n}b')[::-1], 2)
return np.fromiter(map(rev_bits, indices), dtype=int)
A faster version, based on the observation that:
Each permutation in this sequence can be generated by concatenating two sequences of numbers: the previous permutation, doubled, and the same sequence with each value increased by one.
def bit_reversal_permutation(n):
indices = range(2**(n-1))
rev_bits = lambda x: int(format(x, f'0{n-1}b')[::-1], 2)
rev_indices = np.fromiter(map(rev_bits, indices), dtype=int)
return np.concatenate([2*rev_indices, 2*rev_indices + 1])
Example:
n = 4
a = np.random.randn(2**n)
inds_rev = bit_reversal_permutation(n)
a[inds_rev]
Related
I need to find a city with the highest population using regex, data is presented in such way:
data = ["id,name,poppulation,is_capital",
"3024,eu_kyiv,24834,y",
"3025,eu_volynia,20231,n",
"3026,eu_galych,23745,n",
"4892,me_medina,18038,n",
"4401,af_cairo,18946,y",
"4700,me_tabriz,13421,n",
"4899,me_bagdad,22723,y",
"6600,af_zulu,09720,n"]
I've done this so far:
def max_population(data):
lst = []
for items in data:
a = re.findall(r',\S+_\S+,[0-9]+', items)
lst += [[b for b in i.split(',') if b] for i in a]
return max(lst, key=lambda x:int(x[1]))
But function should return (str, int) tuple, is it possible to change my code in a way that it will return tuple without iterating list once again?
All your strings are separated by a comma. You could get the max value using split and check if the third value is a digit and is greater than the first value of the tuple.
If it is, set it as the new highest value.
def max_population(data):
result = None
for s in data:
parts = s.split(",")
if not parts[2].isdigit():
continue
tup = (parts[1], int(parts[2]))
if result is None or tup[1] > result[1]:
result = tup
return result
print(max_population(items))
Output
('eu_kyiv', 24834)
Python demo
The following long line get the wanted result (str, int) tuple:
def max_population(data):
p=max([(re.findall(r"(\w*),\d*,\w$",i)[0],int(re.findall(r"(\d*),\w$",i)[0])) for n,i in enumerate(data) if n>0],key=lambda x:int(x[1]) )
return p
in this line,enumerate(data) and n>0 were used to skip the header "id,name,poppulation,is_capital". But if data has no-header the, line would be:
def max_population(data):
p=max([(re.findall(r"(\w*),\d*,\w$",i)[0],int(re.findall(r"(\d*),\w$",i)[0])) for i in data],key=lambda x:int(x[1]) )
return p
The result for both is ('eu_kyiv', 24834)
Create a list of tuples instead of a list of lists.
import re
data = ["id,name,poppulation,is_capital",
"3024,eu_kyiv,24834,y",
"3025,eu_volynia,20231,n",
"3026,eu_galych,23745,n",
"4892,me_medina,18038,n",
"4401,af_cairo,18946,y",
"4700,me_tabriz,13421,n",
"4899,me_bagdad,22723,y",
"6600,af_zulu,09720,n"]
def max_population(data):
lst = []
for items in data:
a = re.findall(r',\S+_\S+,[0-9]+', items)
lst += [tuple(b for b in i.split(',') if b) for i in a]
return max(lst, key=lambda x:int(x[1]))
print(max_population(data))
You could create a mapping function to map the types to the data and use the operator.itemgetter function as your key in max:
from operator import itemgetter
def f(row):
# Use a tuple of types to cast str to the desired type
types = (str, int)
# slice here to get the city and population values
return tuple(t(val) for t, val in zip(types, row.split(',')[1:3]))
# Have max consume a map on the data excluding the
# header row (hence the slice)
max(map(f, data[1:]), key=itemgetter(1))
('eu_kyiv', 24834)
np.unique() can return indices of first occurrence, indices to reconstruct, and occurrence count. Is there any function/library that can do the same for any Python object?
Not as such. You can get similar functionality using different classes depending on your needs.
unique with no extra flags has a similar result to set:
unique_value = set(x)
collections.Counter simulates return_counts:
counts = collections.Counter(x)
unique_values = list(counts.keys())
unique_counts = list(counts.values())
To mimic return_index, use list.index on a set or Counter. This assumes that the container is a list
first_indices = [x.index(k) for k in counts]
To simulate return_inverse, we look at how unique is actually implemented. unique sorts the input to get the runs of elements. A similar technique can be acheived via sorted (or in-place list.sort) and itertools.groupby:
s = sorted(zip(x, itertools.count()))
inverse = [0] * len(x)
for i, (k, g) in enumerate(itertools.groupby(s, operator.itemgetter(0))):
for v in g:
inverse[v[1]] = i
In fact, the groupby approach encodes all the options:
s = sorted(zip(x, itertools.count()))
unique_values = []
first_indices = []
unique_counts = []
inverse = [0] * len(x)
for i, (k, g) in enumerate(itertools.groupby(s, operator.itemgetter(0))):
unique_values.append(k)
count = 1
v = next(g)
inverse[v[1]] = i
first_indices.append(v[0])
for v in g:
inverse[v[1]] = i
count += 1
unique_counts.append(count)
You can use Counter:
> from collections import Counter
> bob = ['bob','bob','dob']
> Counter(bob)
Counter({'bob': 2, 'dob': 1})
> Counter(bob).keys()
dict_keys(['bob', 'dob'])
I have a list:
ll = [1,2,3]
I have a function:
def hello(x, v):
x[v] = True
return x
I have an iterable ii = [1,2]
Currently, I do:
for i in ii:
ll = hello(x=ll, v=i)
How to do this with reduce and partial?
EDIT:
I solved it with:
reduce(lambda x,y: hello(x,y), ii, ll)
I have a list which is a million items long of random, repeatable integers. I need to sort that list, and then find the index of the first iteration of every unique element in the list. When I do this, I am running into run time >5 minutes long. Can anyone give me any suggestions to speed up my code? An example of my process is shown below.
import random
a = []
for x in range(1000000):
a.append(random.randint(1,10000))
unique_a = set(a)
inds=[0]
inds = [a.index(i) for i in sorted(unique_a) if i not in inds]
inds = [a.index(i) for i in sorted(unique_a) if i not in inds] is implicitly quadratic is a.index(i) is linear. Use a dictionary to grab the indices in one pass over the sorted list:
a =sorted([0,4,3,5,21,5,6,3,1,23,4,6,1,93,34,10])
unique_a = set(a)
first_inds = {}
for i,x in enumerate(a):
if not x in first_inds:
first_inds[x] = i
my_inds = [first_inds[x] for x in sorted(unique_a)]
Just store the first position for every unique element:
first_position = {}
for i, value in enumerate(a):
if value not in first_position:
first_position[value] = i
And then replace a.index(i) for first_position[i]
Or just use:
_, indices = zip(*sorted(first_position.items()))
You can use the bisect_left function from the standard library's bisect module to do this. On a sorted list, a bisection search is faster than searching through the list as index does.
>>> L = [random.randint(0, 10) for _ in range(100)]
>>> L.sort()
>>> L.index(9)
83
>>> bisect.bisect_left(L, 9)
83
>>> timeit.timeit(setup="from __main__ import L", stmt="L.index(9)")
2.1408978551626205
>>> timeit.timeit(setup="from __main__ import L;from bisect import bisect_left", stmt="bisect_left(L, 9)")
0.5187544231303036
On my machine, using bisect.bisect_left is faster than iterating over the list and accumulating indexes on the way:
>>> L = [random.randint(0, 100) for _ in range(10000)]
>>> L.sort()
>>> def iterative_approach(list_):
... unique = set(list_)
... first_inds = {}
... for i, x in enumerate(list_):
... if x not in first_inds:
... first_inds[x] = i
... return [first_inds[x] for x in sorted(unique)]
...
>>> ia = iterative_approach(L)
>>> bisect_left = bisect.bisect_left
>>> def bisect_approach(list_):
... unique = set(list_)
... out = {}
... for x in unique:
... out[x] = bisect_left(list_, x)
... return [out[x] for x in sorted(unique)]
...
>>> ba = bisect_approach(L)
>>> ia == ba
True
>>> timeit.timeit(setup="from __main__ import L, iterative_approach", stmt="iterative_approach(L)")
1488.956467495067
>>> timeit.timeit(setup="from __main__ import L, bisect_approach", stmt="bisect_approach(L)")
407.6803469741717
I have a few arrays containing integer and strings. For example:
myarray1 = [1,2,3,"ab","cd",4]
myarray2 = [1,"a",2,3,"bc","cd","e",4]
I'm trying to combine only the strings in an array that are next to each other. So I want the result to be:
newarray1= [1,2,3,"abcd",4]
newarray2= [1,"a",2,3,"bccde",4]
Does anyone know how to do this? Thank you!
The groupby breaks the list up into runs of strings and runs of integers. The ternary operation joins the groups of strings and puts them into a temporary sequence. The chain re-joins the strings and the runs of integers.
from itertools import groupby, chain
def joinstrings(iterable):
return list(chain.from_iterable(
(''.join(group),) if key else group
for key, group in
groupby(iterable, key=lambda elem: isinstance(elem, basestring))))
>>> myarray1 = [1,2,3,"ab","cd",4]
>>> newarray1 = [myarray1[0]]
>>> for item in myarray1[1:]:
... if isinstance(item, str) and isinstance(newarray1[-1], str):
... newarray1[-1] = newarray1[-1] + item
... else:
... newarray1.append(item)
>>> newarray1
[1, 2, 3, 'abcd', 4]
reduce(lambda x, (tp, it): tp and x + ["".join(it)] or x+list(it), itertools.groupby( myarray1, lambda x: isinstance(x, basestring) ), [])
a = [1,2,3,"ab","cd",4]
b = [1,a,2,3,"bc","cd","e",4]
def func(a):
ret = []
s = ""
for x in a:
if isinstance(x, basestring):
s = s + x
else:
if s:
ret.append(s)
s = ""
ret.append(x)
return ret
print func(a)
print func(b)