newton raphson method using dataframes - python

I am using newton() function from the scipy to solve a particular non linear equation which works for a single values
def iv(p):
I=p
diode=Il-(Io*(np.exp((v+(I*RS))/a)-1))-((v+(I*RS))/Rsh)-I
return diode
I=[opt.newton(iv,2)for v in np.arange(0,44.5,0.1)]
I am trying to do the same for 8760 values of (I,Io,RS,a,Rsh) which are individual dataframes
def elec():
I=DataFrame(zeros(3898200).reshape((8760,445)),index=pd.date_range('1/1/2001 00:00','12/31/2001 23:59',freq='1h'),dtype=float)
for i in np.arange(0,8761,1):
def power(u):
I=u
diode=DataFrame(zeros(3898200).reshape((8760,445)),index=pd.date_range('1/1/2001 00:00','12/31/2001 23:59',freq='1h'),dtype=float)
diode[i]=IL[i]-(Io[i]*(np.exp((v+(I*RS[i]))/a[i]-1)))-((v+(I*RS[i]))/Rsh[i])-I
return diode
I[i]=[opt.newton(power,2)for v in np.arange(0,44.5,0.1)]
return I
I=elec()
I am getting a error of index error:index out of bounds
I should get 8760*445 values of I
Traceback (most recent call last):
File "<ipython-input-176-18d774ef131a>", line 5, in <module>
elec()
File "<ipython-input-174-996ccd566a20>", line 18, in elec
I[i]=[opt.newton(power,2)for v in np.arange(0,44.5,0.1)]
File "C:\Python27\lib\site-packages\scipy\optimize\zeros.py", line 143, in newton
q0 = func(*((p0,) + args))
File "<ipython-input-174-996ccd566a20>", line 14, in power
diode_east[i]=IL_east[0][i]-(Io_east[0][i]*(np.exp((v+(I*RS_ref[i]))/a_east[0][i]-1)))- ((v+(I*RS_ref[i]))/Rsh_east[0][i])-I
File "C:\Python27\lib\site-packages\pandas\core\series.py", line 613, in __getitem__
return self.index.get_value(self, key)
File "C:\Python27\lib\site-packages\pandas\tseries\index.py", line 1132, in get_value
return Index.get_value(self, series, key)
File "C:\Python27\lib\site-packages\pandas\core\index.py", line 769, in get_value
return tslib.get_value_box(series, key)
File "tslib.pyx", line 364, in pandas.tslib.get_value_box (pandas\tslib.c:8228)
File "tslib.pyx", line 379, in pandas.tslib.get_value_box (pandas\tslib.c:8075)
IndexError: index out of bounds
<type 'exceptions.IndexError'>

Related

I try to using quantstats but when i load intraday data then give me below

Traceback (most recent call last):
File "C:\Users\shiri\anaconda3\lib\site-packages\pandas\core\arrays\datetimelike.py", line 1008, in _cmp_method
other = self._validate_comparison_value(other)
File "C:\Users\shiri\anaconda3\lib\site-packages\pandas\core\arrays\datetimelike.py", line 539, in _validate_comparison_value
raise InvalidComparison(other) from err
pandas.core.arrays.datetimelike.InvalidComparison: 2022-08-01 00:00:00
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "d:\Algorithmic Trading Proejct\Project\stock_strategies\Test\pyfolio_test.py", line 20, in
qs.reports.full(df['pct'])
File "C:\Users\shiri\anaconda3\lib\site-packages\quantstats\reports.py", line 302, in full
metrics(returns=returns, benchmark=benchmark,
File "C:\Users\shiri\anaconda3\lib\site-packages\quantstats\reports.py", line 518, in metrics
metrics['MTD %'] = comp_func(df[df.index >= _dt(today.year, today.month, 1)]) * pct
File "C:\Users\shiri\anaconda3\lib\site-packages\pandas\core\ops\common.py", line 70, in new_method
return method(self, other)
File "C:\Users\shiri\anaconda3\lib\site-packages\pandas\core\arraylike.py", line 60, in ge
return self._cmp_method(other, operator.ge)
File "C:\Users\shiri\anaconda3\lib\site-packages\pandas\core\indexes\base.py", line 6685, in _cmp_method
result = op(self._values, other)
File "C:\Users\shiri\anaconda3\lib\site-packages\pandas\core\ops\common.py", line 70, in new_method
return method(self, other)
File "C:\Users\shiri\anaconda3\lib\site-packages\pandas\core\arraylike.py", line 60, in ge
return self._cmp_method(other, operator.ge)
File "C:\Users\shiri\anaconda3\lib\site-packages\pandas\core\arrays\datetimelike.py", line 1010, in _cmp_method
return invalid_comparison(self, other, op)
File "C:\Users\shiri\anaconda3\lib\site-packages\pandas\core\ops\invalid.py", line 34, in invalid_comparison
raise TypeError(f"Invalid comparison between dtype={left.dtype} and {typ}")
TypeError: Invalid comparison between dtype=datetime64[ns, America/Sao_Paulo] and datetimeenter code here

Sympy matrix exponential raises ValueError

I have a 3x3 symbolic matrix and I want to calculate the matrix exponential of it using Sympy.
from sympy import *
a0,a1,y0,y1,k,t = symbols('a0 a1 y0 y1 k t')
A = Matrix([[-y1*t-a1*t,-y0*t,0],[t,-a1*t,-k*t],[0,t,-a1*t]])
res = exp(A)
print(res)
This gives the following error:
Warning (from warnings module):
File "/usr/lib/python3/dist-packages/apport/report.py", line 13
import fnmatch, glob, traceback, errno, sys, atexit, locale, imp, stat
DeprecationWarning: the imp module is deprecated in favour of importlib; see the module's documentation for alternative uses
Traceback (most recent call last):
File "/usr/lib/python3/dist-packages/sympy/core/cache.py", line 94, in wrapper
retval = cfunc(*args, **kwargs)
TypeError: unhashable type: 'MutableDenseMatrix'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/lib/python3/dist-packages/sympy/core/cache.py", line 94, in wrapper
retval = cfunc(*args, **kwargs)
TypeError: unhashable type: 'MutableDenseMatrix'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/lib/python3/dist-packages/sympy/core/compatibility.py", line 419, in as_int
raise TypeError
TypeError
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/lib/python3.8/idlelib/run.py", line 559, in runcode
exec(code, self.locals)
File "/home/kaguro/Documents/PROJECT/Paper-simulation/Finite-time/Model B/extra.py", line 7, in <module>
res = exp(At)
File "/usr/lib/python3/dist-packages/sympy/core/cache.py", line 96, in wrapper
retval = func(*args, **kwargs)
File "/usr/lib/python3/dist-packages/sympy/core/function.py", line 473, in __new__
result = super(Function, cls).__new__(cls, *args, **options)
File "/usr/lib/python3/dist-packages/sympy/core/cache.py", line 96, in wrapper
retval = func(*args, **kwargs)
File "/usr/lib/python3/dist-packages/sympy/core/function.py", line 288, in __new__
evaluated = cls.eval(*args)
File "/usr/lib/python3/dist-packages/sympy/functions/elementary/exponential.py", line 320, in eval
return arg.exp()
File "/usr/lib/python3/dist-packages/sympy/matrices/matrices.py", line 3208, in exp
P, J = self.jordan_form()
File "/usr/lib/python3/dist-packages/sympy/matrices/matrices.py", line 1811, in jordan_form
jordan_basis = [eig_mat(eig, 1).nullspace()[0] for eig in blocks]
File "/usr/lib/python3/dist-packages/sympy/matrices/matrices.py", line 1811, in <listcomp>
jordan_basis = [eig_mat(eig, 1).nullspace()[0] for eig in blocks]
File "/usr/lib/python3/dist-packages/sympy/matrices/matrices.py", line 1011, in nullspace
reduced, pivots = self.rref(iszerofunc=iszerofunc, simplify=simplify)
File "/usr/lib/python3/dist-packages/sympy/matrices/matrices.py", line 941, in rref
ret, pivot_cols = self._eval_rref(iszerofunc=iszerofunc,
File "/usr/lib/python3/dist-packages/sympy/matrices/matrices.py", line 593, in _eval_rref
reduced, pivot_cols, swaps = self._row_reduce(iszerofunc, simpfunc,
File "/usr/lib/python3/dist-packages/sympy/matrices/matrices.py", line 713, in _row_reduce
assumed_nonzero, newly_determined = _find_reasonable_pivot(
File "/usr/lib/python3/dist-packages/sympy/matrices/matrices.py", line 5327, in _find_reasonable_pivot
if x.equals(S.Zero):
File "/usr/lib/python3/dist-packages/sympy/core/expr.py", line 757, in equals
constant = diff.is_constant(simplify=False, failing_number=True)
File "/usr/lib/python3/dist-packages/sympy/core/expr.py", line 683, in is_constant
if b is not None and b is not S.NaN and b.equals(a) is False:
File "/usr/lib/python3/dist-packages/sympy/core/expr.py", line 834, in equals
mp = minimal_polynomial(diff)
File "/usr/lib/python3/dist-packages/sympy/polys/numberfields.py", line 655, in minimal_polynomial
result = _minpoly_compose(ex, x, domain)
File "/usr/lib/python3/dist-packages/sympy/polys/numberfields.py", line 534, in _minpoly_compose
res = _minpoly_add(x, dom, *ex.args)
File "/usr/lib/python3/dist-packages/sympy/polys/numberfields.py", line 362, in _minpoly_add
mp = _minpoly_op_algebraic_element(Add, a[0], a[1], x, dom)
File "/usr/lib/python3/dist-packages/sympy/polys/numberfields.py", line 242, in _minpoly_op_algebraic_element
mp1 = _minpoly_compose(ex1, x, dom)
File "/usr/lib/python3/dist-packages/sympy/polys/numberfields.py", line 547, in _minpoly_compose
mp1 = minimal_polynomial(ex1, x)
File "/usr/lib/python3/dist-packages/sympy/polys/numberfields.py", line 655, in minimal_polynomial
result = _minpoly_compose(ex, x, domain)
File "/usr/lib/python3/dist-packages/sympy/polys/numberfields.py", line 557, in _minpoly_compose
res = _minpoly_mul(x, dom, *ex.args)
File "/usr/lib/python3/dist-packages/sympy/polys/numberfields.py", line 374, in _minpoly_mul
mp = _minpoly_op_algebraic_element(Mul, a[0], a[1], x, dom)
File "/usr/lib/python3/dist-packages/sympy/polys/numberfields.py", line 244, in _minpoly_op_algebraic_element
mp2 = _minpoly_compose(ex2, y, dom)
File "/usr/lib/python3/dist-packages/sympy/polys/numberfields.py", line 559, in _minpoly_compose
res = _minpoly_pow(ex.base, ex.exp, x, dom)
File "/usr/lib/python3/dist-packages/sympy/polys/numberfields.py", line 337, in _minpoly_pow
mp = _minpoly_compose(ex, x, dom)
File "/usr/lib/python3/dist-packages/sympy/polys/numberfields.py", line 534, in _minpoly_compose
res = _minpoly_add(x, dom, *ex.args)
File "/usr/lib/python3/dist-packages/sympy/polys/numberfields.py", line 362, in _minpoly_add
mp = _minpoly_op_algebraic_element(Add, a[0], a[1], x, dom)
File "/usr/lib/python3/dist-packages/sympy/polys/numberfields.py", line 279, in _minpoly_op_algebraic_element
res = _choose_factor(factors, x, op(ex1, ex2), dom)
File "/usr/lib/python3/dist-packages/sympy/polys/numberfields.py", line 70, in _choose_factor
if abs(f.as_expr().evalf(prec1, points)) < eps:
File "/usr/lib/python3/dist-packages/sympy/core/expr.py", line 171, in __abs__
return Abs(self)
File "/usr/lib/python3/dist-packages/sympy/core/cache.py", line 94, in wrapper
retval = cfunc(*args, **kwargs)
File "/usr/lib/python3/dist-packages/sympy/core/function.py", line 473, in __new__
result = super(Function, cls).__new__(cls, *args, **options)
File "/usr/lib/python3/dist-packages/sympy/core/cache.py", line 94, in wrapper
retval = cfunc(*args, **kwargs)
File "/usr/lib/python3/dist-packages/sympy/core/function.py", line 288, in __new__
evaluated = cls.eval(*args)
File "/usr/lib/python3/dist-packages/sympy/functions/elementary/complexes.py", line 473, in eval
arg = signsimp(arg, evaluate=False)
File "/usr/lib/python3/dist-packages/sympy/simplify/simplify.py", line 383, in signsimp
e = sub_post(sub_pre(expr))
File "/usr/lib/python3/dist-packages/sympy/simplify/cse_opts.py", line 16, in sub_pre
adds = [a for a in e.atoms(Add) if a.could_extract_minus_sign()]
File "/usr/lib/python3/dist-packages/sympy/simplify/cse_opts.py", line 16, in <listcomp>
adds = [a for a in e.atoms(Add) if a.could_extract_minus_sign()]
File "/usr/lib/python3/dist-packages/sympy/core/expr.py", line 2386, in could_extract_minus_sign
(negative_self).extract_multiplicatively(-1) is not None)
File "/usr/lib/python3/dist-packages/sympy/core/expr.py", line 2208, in extract_multiplicatively
newarg = arg.extract_multiplicatively(c)
File "/usr/lib/python3/dist-packages/sympy/core/expr.py", line 2221, in extract_multiplicatively
newarg = arg.extract_multiplicatively(c)
File "/usr/lib/python3/dist-packages/sympy/core/expr.py", line 2134, in extract_multiplicatively
elif c == self:
File "/usr/lib/python3/dist-packages/sympy/core/numbers.py", line 2248, in __eq__
return Rational.__eq__(self, other)
File "/usr/lib/python3/dist-packages/sympy/core/numbers.py", line 1906, in __eq__
integer_log(self.p//m, 2) == (t, True)
File "/usr/lib/python3/dist-packages/sympy/core/power.py", line 147, in integer_log
y = as_int(y)
File "/usr/lib/python3/dist-packages/sympy/core/compatibility.py", line 425, in as_int
raise ValueError('%s is not an integer' % (n,))
ValueError: 1 is not an integer
I don't understand why is it not working. Earlier I had used a 2x2 matrix with similar kind of form, and it worked perfectly (albeit very complicated looking).
The eigenvalues can be found of this matrix, but asking for eigenvectors gives the same error. So I can't use the diagonalisation method manually either (didn't expect this to work, because then Sympy would have done that).
Any help would be greatly appreciated.

(Casting) errors using extract_(relevant_)features from tsfresh

Trying out Python package tsfresh I run into issues in the first steps. Given a series how to (automatically) make features for it? This snippet produces different errors based on which part I try.
import tsfresh
import pandas as pd
import numpy as np
#tfX, tfy = tsfresh.utilities.dataframe_functions.make_forecasting_frame(pd.Series(np.random.randn(1000)/50), kind='float64', max_timeshift=50, rolling_direction=1)
#rf = tsfresh.extract_relevant_features(tfX, y=tfy, n_jobs=1, column_id='id')
tfX, tfy = tsfresh.utilities.dataframe_functions.make_forecasting_frame(pd.Series(np.random.randn(1000)/50), kind=1, max_timeshift=50, rolling_direction=1)
rf = tsfresh.extract_relevant_features(tfX, y=tfy, n_jobs=1, column_id='id')
The errors are in the first case
""" Traceback (most recent call last): File "C:\Users\user\Anaconda3\envs\env1\lib\multiprocessing\pool.py", line
119, in worker
result = (True, func(*args, **kwds)) File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\tsfresh\utilities\distribution.py",
line 38, in _function_with_partly_reduce
results = list(itertools.chain.from_iterable(results)) File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\tsfresh\utilities\distribution.py",
line 37, in
results = (map_function(chunk, **kwargs) for chunk in chunk_list) File
"C:\Users\user\Anaconda3\envs\env1\lib\site-packages\tsfresh\feature_extraction\extraction.py",
line 358, in _do_extraction_on_chunk
return list(_f()) File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\tsfresh\feature_extraction\extraction.py",
line 350, in _f
result = [("", func(data))] File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\tsfresh\feature_extraction\feature_calculators.py",
line 193, in variance_larger_than_standard_deviation
y = np.var(x) File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\numpy\core\fromnumeric.py",
line 3157, in var
**kwargs) File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\numpy\core_methods.py",
line 110, in _var
arrmean, rcount, out=arrmean, casting='unsafe', subok=False) TypeError: unsupported operand type(s) for /: 'str' and 'int' """
and in the second case
""" Traceback (most recent call last): File
"C:\Users\user\Anaconda3\envs\env1\lib\multiprocessing\pool.py", line
119, in worker
result = (True, func(*args, **kwds)) File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\tsfresh\utilities\distribution.py",
line 38, in _function_with_partly_reduce
results = list(itertools.chain.from_iterable(results)) File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\tsfresh\utilities\distribution.py",
line 37, in
results = (map_function(chunk, **kwargs) for chunk in chunk_list) File
"C:\Users\user\Anaconda3\envs\env1\lib\site-packages\tsfresh\feature_extraction\extraction.py",
line 358, in _do_extraction_on_chunk
return list(_f()) File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\tsfresh\feature_extraction\extraction.py",
line 345, in _f
result = func(data, param=parameter_list) File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\tsfresh\feature_extraction\feature_calculators.py",
line 1752, in friedrich_coefficients
coeff = _estimate_friedrich_coefficients(x, m, r) File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\tsfresh\feature_extraction\feature_calculators.py",
line 145, in _estimate_friedrich_coefficients
result.dropna(inplace=True) File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\pandas\core\frame.py",
line 4598, in dropna
result = self.loc(axis=axis)[mask] File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\pandas\core\indexing.py",
line 1500, in getitem
return self._getitem_axis(maybe_callable, axis=axis) File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\pandas\core\indexing.py",
line 1859, in _getitem_axis
if is_iterator(key): File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\pandas\core\dtypes\inference.py",
line 157, in is_iterator
return hasattr(obj, 'next') File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\pandas\core\generic.py",
line 5065, in getattr
if self._info_axis._can_hold_identifiers_and_holds_name(name): File
"C:\Users\user\Anaconda3\envs\env1\lib\site-packages\pandas\core\indexes\base.py",
line 3984, in _can_hold_identifiers_and_holds_name
return name in self File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\pandas\core\indexes\category.py",
line 327, in contains
return contains(self, key, container=self._engine) File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\pandas\core\arrays\categorical.py",
line 188, in contains
loc = cat.categories.get_loc(key) File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\pandas\core\indexes\interval.py",
line 770, in get_loc
start, stop = self._find_non_overlapping_monotonic_bounds(key) File
"C:\Users\user\Anaconda3\envs\env1\lib\site-packages\pandas\core\indexes\interval.py",
line 717, in _find_non_overlapping_monotonic_bounds
start = self._searchsorted_monotonic(key, 'left') File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\pandas\core\indexes\interval.py",
line 681, in _searchsorted_monotonic
return sub_idx._searchsorted_monotonic(label, side) File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\pandas\core\indexes\base.py",
line 4755, in _searchsorted_monotonic
return self.searchsorted(label, side=side) File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\pandas\core\base.py",
line 1501, in searchsorted
return self._values.searchsorted(value, side=side, sorter=sorter) TypeError: Cannot cast array data from dtype('float64') to
dtype('
np.version, tsfresh.version are ('1.15.4', 'unknown'). I installed tsfresh using conda, probably from conda-forge. I am on Windows 10. Using another kernel with np.version, tsfresh.version ('1.15.4', '0.11.2') lead to the same results.
Trying the first couple of cells from timeseries_forecasting_basic_example.ipynb yields the casting error as well.
Fixed it. Either the version on conda(-forge) or one of the dependencies was the issue. So using "conda uninstall tsfresh", "conda install patsy future six tqdm" and "pip install tsfresh" combined did the trick.

TypeError in Dask dataframe while converting to pandas using compute()

I can't figure out what is the problem in the given code:
I am using dask to merge several dataframes. After merging I want to find the unique values from one of the column. I am getting type error while converting from dask to pandas using unique().compute(). But, I cannot seem to find what actually is the problem. It says that str cannot be assigned as int but, in some of the files the code passses through and in some it doesn't. I also cannot find the problem with data structure.
Any suggestions??
import pandas as pd
import dask.dataframe as dd
# Everything is fine until merging
# I have put several print(markers) to find the problem code
print('dask cols')
print(df_by_dask_merged.columns)
print()
print(dask_cols)
print()
print('find unique contigs values in dask dataframe')
pd_df = df_by_dask_merged['contig']
print(pd_df)
print()
print('mark 02')
# this is the problem code ??
pd_df_contig = pd_df.unique().compute()
print(pd_df_contig)
print('mark 03')
Output on Terminal:
dask cols
Index(['contig', 'pos', 'ref', 'all-alleles', 'ms01e_PI', 'ms01e_PG_al',
'ms02g_PI', 'ms02g_PG_al', 'all-freq'],
dtype='object')
['contig', 'pos', 'ref', 'all-alleles', 'ms01e_PI', 'ms01e_PG_al', 'ms02g_PI', 'ms02g_PG_al', 'all-freq']
find unique contigs values in dask dataframe
Dask Series Structure:
npartitions=1
int64
...
Name: contig, dtype: int64
Dask Name: getitem, 52 tasks
mark 02
Traceback (most recent call last):
File "/home/everestial007/.local/lib/python3.5/site-packages/pandas/indexes/base.py", line 2145, in get_value
return tslib.get_value_box(s, key)
File "pandas/tslib.pyx", line 880, in pandas.tslib.get_value_box (pandas/tslib.c:17368)
File "pandas/tslib.pyx", line 889, in pandas.tslib.get_value_box (pandas/tslib.c:17042)
TypeError: 'str' object cannot be interpreted as an integer
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "merge_haplotype.py", line 305, in <module>
main()
File "merge_haplotype.py", line 152, in main
pd_df_contig = pd_df.unique().compute()
File "/home/everestial007/anaconda3/lib/python3.5/site-packages/dask/base.py", line 155, in compute
(result,) = compute(self, traverse=False, **kwargs)
File "/home/everestial007/anaconda3/lib/python3.5/site-packages/dask/base.py", line 404, in compute
results = get(dsk, keys, **kwargs)
File "/home/everestial007/anaconda3/lib/python3.5/site-packages/dask/threaded.py", line 75, in get
pack_exception=pack_exception, **kwargs)
File "/home/everestial007/anaconda3/lib/python3.5/site-packages/dask/local.py", line 521, in get_async
raise_exception(exc, tb)
File "/home/everestial007/anaconda3/lib/python3.5/site-packages/dask/compatibility.py", line 67, in reraise
raise exc
File "/home/everestial007/anaconda3/lib/python3.5/site-packages/dask/local.py", line 290, in execute_task
result = _execute_task(task, data)
File "/home/everestial007/anaconda3/lib/python3.5/site-packages/dask/local.py", line 271, in _execute_task
return func(*args2)
File "/home/everestial007/anaconda3/lib/python3.5/site-packages/dask/dataframe/core.py", line 3404, in apply_and_enforce
df = func(*args, **kwargs)
File "/home/everestial007/anaconda3/lib/python3.5/site-packages/dask/utils.py", line 687, in __call__
return getattr(obj, self.method)(*args, **kwargs)
File "/home/everestial007/.local/lib/python3.5/site-packages/pandas/core/frame.py", line 4133, in apply
return self._apply_standard(f, axis, reduce=reduce)
File "/home/everestial007/.local/lib/python3.5/site-packages/pandas/core/frame.py", line 4229, in _apply_standard
results[i] = func(v)
File "merge_haplotype.py", line 249, in <lambda>
apply(lambda row : update_cols(row, sample_name), axis=1, meta=(int))
File "merge_haplotype.py", line 278, in update_cols
if 'N|N' in df_by_dask[sample_name + '_PG_al']:
File "/home/everestial007/.local/lib/python3.5/site-packages/pandas/core/series.py", line 601, in __getitem__
result = self.index.get_value(self, key)
File "/home/everestial007/.local/lib/python3.5/site-packages/pandas/indexes/base.py", line 2153, in get_value
raise e1
File "/home/everestial007/.local/lib/python3.5/site-packages/pandas/indexes/base.py", line 2139, in get_value
tz=getattr(series.dtype, 'tz', None))
File "pandas/index.pyx", line 105, in pandas.index.IndexEngine.get_value (pandas/index.c:3338)
File "pandas/index.pyx", line 113, in pandas.index.IndexEngine.get_value (pandas/index.c:3041)
File "pandas/index.pyx", line 161, in pandas.index.IndexEngine.get_loc (pandas/index.c:4024)
File "pandas/src/hashtable_class_helper.pxi", line 732, in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:13161)
File "pandas/src/hashtable_class_helper.pxi", line 740, in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:13115)
KeyError: ('ms02g_PG_al', 'occurred at index 0')

Pandas hashtable with gives key error:0

I am trying to get the same elements of two pandas data table, with indexing the datas and merge it. I use it for a very large amount of data(millions). The frist table (df) is constatn, and the second(d2) is changing in every loop, with the new elements will be merged with the first table.
here is my code for this process:
df = pd.read_csv("inputfile.csv",header=None)
d1 = pd.DataFrame(df).set_index(0)
for i in range(0, len(df)):
try:
follower_id=twitter.get_followers_ids(user_id=df.iloc[i][0],cursor=next_cursor)
f=follower_id['ids']
json.dumps(f)
d2 = pd.DataFrame(f).set_index(0)
match_result = pd.merge(d1,d2,left_index=True,right_index=True)
fk=[df.iloc[i][0] for number in range(len(match_result))]
DF = pd.DataFrame(fk)
DF.to_csv(r'output1.csv',header=None,sep=' ',index=None)
match_result.to_csv(r'output2.csv', header=None, sep=' ')
I have experienced, that this code, runs well for a while, but after that- probably it is relatad to the second databasses size wich is change every loop- the program gives me the following error message, and stop running:
Traceback (most recent call last):
File "halozat3.py", line 39, in <module>
d2 = pd.DataFrame(f).set_index(0) #1Trump koveto kovetolistaja
File "/usr/lib/python2.7/dist-packages/pandas/core/frame.py", line 2372, in set_index
level = frame[col].values
File "/usr/lib/python2.7/dist-packages/pandas/core/frame.py", line 1678, in __getitem__
return self._getitem_column(key)
File "/usr/lib/python2.7/dist-packages/pandas/core/frame.py", line 1685, in _getitem_column
return self._get_item_cache(key)
File "/usr/lib/python2.7/dist-packages/pandas/core/generic.py", line 1052, in _get_item_cache
values = self._data.get(item)
File "/usr/lib/python2.7/dist-packages/pandas/core/internals.py", line 2565, in get
loc = self.items.get_loc(item)
File "/usr/lib/python2.7/dist-packages/pandas/core/index.py", line 1181, in get_loc
return self._engine.get_loc(_values_from_object(key))
File "index.pyx", line 129, in pandas.index.IndexEngine.get_loc (pandas/index.c:3656)
File "index.pyx", line 149, in pandas.index.IndexEngine.get_loc (pandas/index.c:3534)
File "hashtable.pyx", line 381, in pandas.hashtable.Int64HashTable.get_item (pandas/hashtable.c:7035)
File "hashtable.pyx", line 387, in pandas.hashtable.Int64HashTable.get_item (pandas/hashtable.c:6976)
KeyError: 0
What could be the problem?
Have you only one row in your dataframe?
You must write as many rows as you like
Look

Categories