How can I avoid numba error with recursion? - python

I have setup a function that iterates over combinations of chars to form strings.
It is recursive to itself, the recursive call looks like that:
testG(charNum - 1, arr2)
But when i call the entire function, I get this error:
>>> testSpeedGPU()
Traceback (most recent call last):
File "<pyshell#9>", line 1, in <module>
testSpeedGPU()
File "F:\Script Projects#\HASHFinder.py", line 90, in testSpeedGPU
testG(4, [''])
File "D:\Python\lib\site-packages\numba\cuda\dispatcher.py", line 40, in __call__
return self.compiled(*args, **kws)
File "D:\Python\lib\site-packages\numba\cuda\compiler.py", line 758, in __call__
kernel = self.specialize(*args)
File "D:\Python\lib\site-packages\numba\cuda\compiler.py", line 769, in specialize
kernel = self.compile(argtypes)
File "D:\Python\lib\site-packages\numba\cuda\compiler.py", line 784, in compile
kernel = compile_kernel(self.py_func, argtypes,
File "D:\Python\lib\site-packages\numba\core\compiler_lock.py", line 32, in _acquire_compile_lock
return func(*args, **kwargs)
TypeError: compile_kernel() got an unexpected keyword argument 'boundscheck'
Here is the function's body:
#jit(target ="cuda")
def testG(charNum, inpArray) -> null:
if charNum == 1:
arr2 = []
for s in range(len(inpArray)):
for i in range(len(alp)):
arr2.append(alp[i] + inpArray[s])
return
else:
print("more than 1")
arr2 = []
for s in range(len(inpArray)):
for i in range(len(alp)):
arr2.append(alp[i] + inpArray[s])
testG(charNum - 1, arr2)
I think it does have to do with the recursion but I really dont know.
Thanks for your help!
PS: The function works when not marked with #jit(target="cuda")

Related

Jax vmap, in_axes doesn't work if keyword argument is passed

The parameter in_axes in vmap seems to only work for positional arguments.
But throws AssertionError (with no message) called with keyword argument.
from jax import vmap
import numpy as np
def foo(a, b, c):
return a * b + c
foo = vmap(foo, in_axes=(0, 0, None))
aj, bj = np.random.rand(2, 100, 1)
foo(aj, bj, 10) # works
foo(aj, bj, c=10) # throws error
console
Traceback (most recent call last):
File "C:\Users\Amith\PycharmProjects\nntp\venv\lib\site-packages\jax\_src\api_util.py", line 300, in flatten_axes
tree_map(add_leaves, _replace_nones(proxy, axis_tree), dummy)
File "C:\Users\Amith\PycharmProjects\nntp\venv\lib\site-packages\jax\_src\tree_util.py", line 183, in tree_map
all_leaves = [leaves] + [treedef.flatten_up_to(r) for r in rest]
File "C:\Users\Amith\PycharmProjects\nntp\venv\lib\site-packages\jax\_src\tree_util.py", line 183, in <listcomp>
all_leaves = [leaves] + [treedef.flatten_up_to(r) for r in rest]
ValueError: Tuple arity mismatch: 2 != 3; tuple: (<object object at 0x00000187F7BF4380>, <object object at 0x00000187F7BF4380>).
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\Amith\PycharmProjects\nntp\venv\lib\site-packages\IPython\core\interactiveshell.py", line 3433, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-2-20500a2f8a08>", line 1, in <module>
runfile('C:\\Users\\Amith\\PycharmProjects\\nntp\\tests\\test2.py', wdir='C:\\Users\\Amith\\PycharmProjects\\nntp\\tests')
File "C:\Program Files\JetBrains\PyCharm 2022.2\plugins\python\helpers\pydev\_pydev_bundle\pydev_umd.py", line 198, in runfile
pydev_imports.execfile(filename, global_vars, local_vars) # execute the script
File "C:\Program Files\JetBrains\PyCharm 2022.2\plugins\python\helpers\pydev\_pydev_imps\_pydev_execfile.py", line 18, in execfile
exec(compile(contents+"\n", file, 'exec'), glob, loc)
File "C:\Users\Amith\PycharmProjects\nntp\tests\test2.py", line 11, in <module>
foo(aj, bj, c=10)
File "C:\Users\Amith\PycharmProjects\nntp\venv\lib\site-packages\jax\_src\traceback_util.py", line 162, in reraise_with_filtered_traceback
return fun(*args, **kwargs)
File "C:\Users\Amith\PycharmProjects\nntp\venv\lib\site-packages\jax\_src\api.py", line 1481, in vmap_f
in_axes_flat = flatten_axes("vmap in_axes", in_tree, (in_axes, 0), kws=True)
File "C:\Users\Amith\PycharmProjects\nntp\venv\lib\site-packages\jax\_src\api_util.py", line 306, in flatten_axes
assert treedef_is_leaf(leaf)
AssertionError
how would one go about running foo as foo(aj, bj, c=10) without provoking the error?
Yes, it's true that vmap in_axes only works for positional arguments. If you want to make a more general vmapped function, the best option currently is probably to use a wrapper function. For example:
def _foo(a, b, c):
return a * b + c
def foo(a, b, c):
return vmap(_foo, in_axes=(0, 0, None))(a, b, c)

Unable to indentify the issue here

Currently working on a portfolio code, and I am trying to print an array but I keep getting hit with
w = np.random.random((1000, len(symbols)))
w = (w.T / w.sum(axis=1)).T
print(w[:5])
Traceback (most recent call last):
File "<ipython-input-23-246da7acc0b7>", line 3, in <module>
print(w[:5])
File "C:\Users\godso\anaconda3\lib\site-packages\numpy\core\arrayprint.py", line 1506, in _array_str_implementation
return array2string(a, max_line_width, precision, suppress_small, ' ', "")
File "C:\Users\godso\anaconda3\lib\site-packages\numpy\core\arrayprint.py", line 712, in array2string
return _array2string(a, options, separator, prefix)
File "C:\Users\godso\anaconda3\lib\site-packages\numpy\core\arrayprint.py", line 484, in wrapper
return f(self, *args, **kwargs)
File "C:\Users\godso\anaconda3\lib\site-packages\numpy\core\arrayprint.py", line 510, in _array2string
format_function = _get_format_function(data, **options)
File "C:\Users\godso\anaconda3\lib\site-packages\numpy\core\arrayprint.py", line 431, in _get_format_function
formatdict = _get_formatdict(data, **options)
File "C:\Users\godso\anaconda3\lib\site-packages\numpy\core\arrayprint.py", line 403, in _get_formatdict
fkeys = [k for k in formatter.keys() if formatter[k] is not None]
AttributeError: 'set' object has no attribute 'keys'
I have the symbols defined and all I am trying to do is the print the code below, note I have way more code but I didn't include it because everything worked fine up until now.
w = np.random.random((1000, len(symbols)))
w = (w.T / w.sum(axis=1)).T
print(w[:5])

Pyspark UDF unable to use large dictionary

I've a dictionary consisting of keys = word, value = Array of 300 float numbers.
I'm unable to use this dictionary in my pyspark UDF.
When size of this dictionary is 2Million keys it does not work. But when I reduce the size to 200K it works.
This is my code for the function to be converted to UDF
def get_sentence_vector(sentence, dictionary_containing_word_vectors):
cleanedSentence = list(clean_text(sentence))
words_vector_list = np.zeros(300)# 300 dimensional vector
for x in cleanedSentence:
try:
words_vector_list = np.add(words_vector_list, dictionary_containing_word_vectors[str(x)])
except Exception as e:
print("Exception caught while finding word vector from Fast text pretrained model Dictionary: ",e)
return words_vector_list.tolist()
This is my UDF
get_sentence_vector_udf = F.udf(lambda val: get_sentence_vector(val, fast_text_dictionary), ArrayType(FloatType()))
This is how i'm calling the udf to be added as a column in my dataframe
dmp_df_with_vectors = df.filter(df.item_name.isNotNull()).withColumn("sentence_vector", get_sentence_vector_udf(df.item_name))
And this is the stack trace for the error
Traceback (most recent call last):
File "/usr/lib/spark/python/pyspark/broadcast.py", line 83, in dump
pickle.dump(value, f, 2)
SystemError: error return without exception set
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/lib/spark/python/pyspark/sql/functions.py", line 1957, in wrapper
return udf_obj(*args)
File "/usr/lib/spark/python/pyspark/sql/functions.py", line 1916, in __call__
judf = self._judf
File "/usr/lib/spark/python/pyspark/sql/functions.py", line 1900, in _judf
self._judf_placeholder = self._create_judf()
File "/usr/lib/spark/python/pyspark/sql/functions.py", line 1909, in _create_judf
wrapped_func = _wrap_function(sc, self.func, self.returnType)
File "/usr/lib/spark/python/pyspark/sql/functions.py", line 1866, in _wrap_function
pickled_command, broadcast_vars, env, includes = _prepare_for_python_RDD(sc, command)
File "/usr/lib/spark/python/pyspark/rdd.py", line 2377, in _prepare_for_python_RDD
broadcast = sc.broadcast(pickled_command)
File "/usr/lib/spark/python/pyspark/context.py", line 799, in broadcast
return Broadcast(self, value, self._pickled_broadcast_vars)
File "/usr/lib/spark/python/pyspark/broadcast.py", line 74, in __init__
self._path = self.dump(value, f)
File "/usr/lib/spark/python/pyspark/broadcast.py", line 90, in dump
raise pickle.PicklingError(msg)
cPickle.PicklingError: Could not serialize broadcast: SystemError: error return without exception set
How big is your fast_text_dictionary in the 2M case? It might be too big.
Try broadcast it first before running udf. e.g.
broadcastVar = sc.broadcast(fast_text_dictionary)
Then use broadcastVar instead in your udf.
See the document for broadcast

(Casting) errors using extract_(relevant_)features from tsfresh

Trying out Python package tsfresh I run into issues in the first steps. Given a series how to (automatically) make features for it? This snippet produces different errors based on which part I try.
import tsfresh
import pandas as pd
import numpy as np
#tfX, tfy = tsfresh.utilities.dataframe_functions.make_forecasting_frame(pd.Series(np.random.randn(1000)/50), kind='float64', max_timeshift=50, rolling_direction=1)
#rf = tsfresh.extract_relevant_features(tfX, y=tfy, n_jobs=1, column_id='id')
tfX, tfy = tsfresh.utilities.dataframe_functions.make_forecasting_frame(pd.Series(np.random.randn(1000)/50), kind=1, max_timeshift=50, rolling_direction=1)
rf = tsfresh.extract_relevant_features(tfX, y=tfy, n_jobs=1, column_id='id')
The errors are in the first case
""" Traceback (most recent call last): File "C:\Users\user\Anaconda3\envs\env1\lib\multiprocessing\pool.py", line
119, in worker
result = (True, func(*args, **kwds)) File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\tsfresh\utilities\distribution.py",
line 38, in _function_with_partly_reduce
results = list(itertools.chain.from_iterable(results)) File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\tsfresh\utilities\distribution.py",
line 37, in
results = (map_function(chunk, **kwargs) for chunk in chunk_list) File
"C:\Users\user\Anaconda3\envs\env1\lib\site-packages\tsfresh\feature_extraction\extraction.py",
line 358, in _do_extraction_on_chunk
return list(_f()) File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\tsfresh\feature_extraction\extraction.py",
line 350, in _f
result = [("", func(data))] File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\tsfresh\feature_extraction\feature_calculators.py",
line 193, in variance_larger_than_standard_deviation
y = np.var(x) File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\numpy\core\fromnumeric.py",
line 3157, in var
**kwargs) File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\numpy\core_methods.py",
line 110, in _var
arrmean, rcount, out=arrmean, casting='unsafe', subok=False) TypeError: unsupported operand type(s) for /: 'str' and 'int' """
and in the second case
""" Traceback (most recent call last): File
"C:\Users\user\Anaconda3\envs\env1\lib\multiprocessing\pool.py", line
119, in worker
result = (True, func(*args, **kwds)) File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\tsfresh\utilities\distribution.py",
line 38, in _function_with_partly_reduce
results = list(itertools.chain.from_iterable(results)) File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\tsfresh\utilities\distribution.py",
line 37, in
results = (map_function(chunk, **kwargs) for chunk in chunk_list) File
"C:\Users\user\Anaconda3\envs\env1\lib\site-packages\tsfresh\feature_extraction\extraction.py",
line 358, in _do_extraction_on_chunk
return list(_f()) File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\tsfresh\feature_extraction\extraction.py",
line 345, in _f
result = func(data, param=parameter_list) File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\tsfresh\feature_extraction\feature_calculators.py",
line 1752, in friedrich_coefficients
coeff = _estimate_friedrich_coefficients(x, m, r) File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\tsfresh\feature_extraction\feature_calculators.py",
line 145, in _estimate_friedrich_coefficients
result.dropna(inplace=True) File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\pandas\core\frame.py",
line 4598, in dropna
result = self.loc(axis=axis)[mask] File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\pandas\core\indexing.py",
line 1500, in getitem
return self._getitem_axis(maybe_callable, axis=axis) File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\pandas\core\indexing.py",
line 1859, in _getitem_axis
if is_iterator(key): File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\pandas\core\dtypes\inference.py",
line 157, in is_iterator
return hasattr(obj, 'next') File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\pandas\core\generic.py",
line 5065, in getattr
if self._info_axis._can_hold_identifiers_and_holds_name(name): File
"C:\Users\user\Anaconda3\envs\env1\lib\site-packages\pandas\core\indexes\base.py",
line 3984, in _can_hold_identifiers_and_holds_name
return name in self File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\pandas\core\indexes\category.py",
line 327, in contains
return contains(self, key, container=self._engine) File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\pandas\core\arrays\categorical.py",
line 188, in contains
loc = cat.categories.get_loc(key) File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\pandas\core\indexes\interval.py",
line 770, in get_loc
start, stop = self._find_non_overlapping_monotonic_bounds(key) File
"C:\Users\user\Anaconda3\envs\env1\lib\site-packages\pandas\core\indexes\interval.py",
line 717, in _find_non_overlapping_monotonic_bounds
start = self._searchsorted_monotonic(key, 'left') File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\pandas\core\indexes\interval.py",
line 681, in _searchsorted_monotonic
return sub_idx._searchsorted_monotonic(label, side) File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\pandas\core\indexes\base.py",
line 4755, in _searchsorted_monotonic
return self.searchsorted(label, side=side) File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\pandas\core\base.py",
line 1501, in searchsorted
return self._values.searchsorted(value, side=side, sorter=sorter) TypeError: Cannot cast array data from dtype('float64') to
dtype('
np.version, tsfresh.version are ('1.15.4', 'unknown'). I installed tsfresh using conda, probably from conda-forge. I am on Windows 10. Using another kernel with np.version, tsfresh.version ('1.15.4', '0.11.2') lead to the same results.
Trying the first couple of cells from timeseries_forecasting_basic_example.ipynb yields the casting error as well.
Fixed it. Either the version on conda(-forge) or one of the dependencies was the issue. So using "conda uninstall tsfresh", "conda install patsy future six tqdm" and "pip install tsfresh" combined did the trick.

AtributeError:int object has no attribute name

Following is my code:
def cos_dist(self,net_1,net_2,sess):
#result
result=tf.div(product_norm,denom)
r=tf.cond(result>0.2,self.truef,self.falsef)
return r
def truef(self):
return 1
def falsef(self):
return 0
Here I am applying thresholding on result. If its value is greater than 0.2 then assign 1 otherwise assign 0. But I keep getting this error. Kindly tell what I am doing wrong.
Traceback:
Traceback (most recent call last):
File "f.py", line 326, in <module>
vgg = vgg16(imgs1,imgs2, 'vgg16_weights.npz', sess)
File "f.py", line 39, in __init__
self.cd=self.cos_dist(self.o1,self.o2,sess)
File "f.py", line 312, in cos_dist
r=tf.cond(result>0.2,self.truef,self.falsef)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/control_flow_ops.py", line 1776, in cond
orig_res, res_t = context_t.BuildCondBranch(fn1)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/control_flow_ops.py", line 1703, in BuildCondBranch
real_v = self._ProcessOutputTensor(v)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/control_flow_ops.py", line 1661, in _ProcessOutputTensor
if val.name not in self._values:
AttributeError: 'int' object has no attribute 'name'
The callbacks should return tensors, not ints. Try:
one = tf.constant(1, dtype=tf.int32, name='one')
zero = tf.constant(0, dtype=tf.int32, name='zero')
and inside the class:
def truef(self):
return one
def falsef(self):
return zero

Categories