How to fix "Length of value doesnt match index" in python? - python

I've been trying to get the list of addresses from a franchise at Brazil, but when I run the code, it starts, runs two cities and then it stops and appears "ValueError"
The code I've been trying to run is this:
import requests
import json
import pandas as pd
dMun = pd.read_json('https://servicodados.ibge.gov.br/api/v1/localidades/municipios')
dEndTotal = pd.DataFrame()
for iMun in range(len(dMun)):
sCidade = dMun.loc[iMun,'nome']
print(str(iMun) + ' - '+ dMun.loc[iMun,'nome'])
sSigla = dMun.loc[iMun,'microrregiao']['mesorregiao']['UF']['sigla']
r = requests.post('https://www.5asec.com.br/busca-lojas-endereco', data = {'endereco':'A, 1 {}/{}'.format(sCidade,sSigla)})
jEnd = json.loads(r.text)
dEnd = pd.DataFrame.from_records(jEnd['lojas'])
print(dEnd)
if len(dEnd) > 0:
for sChave in jEnd['lojas'][0]['Endereco'].keys():
dEnd[sChave] = []
for i in range(len(dEnd)):
for sChave in jEnd['lojas'][i]['Endereco'].keys():
dEnd[sChave][i] = jEnd['lojas'][i]['Endereco'][sChave]
dEndTotal = pd.concat([dEndTotal,dEnd],ignore_index=False).drop_duplicates().reset_index(drop=True)
But its resulting on this error:
0 - Alta Floresta D'Oeste
Empty DataFrame
Columns: []
Index: []
1 - Ariquemes
CEP Codigo CodigoExterno ... Telefone TemEcommerce Url
0 76870512 675 69004P ... 35366864 False ariquemes
[1 rows x 16 columns]
Traceback (most recent call last):
File "<ipython-input-1-cd9a35514f7e>", line 1, in <module>
runfile('C:/Users/vinis/OneDrive/Área de Trabalho/5aSec.py', wdir='C:/Users/vinis/OneDrive/Área de Trabalho')
File "C:\Users\vinis\Anaconda2\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 786, in runfile
execfile(filename, namespace)
File "C:\Users\vinis\Anaconda2\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 95, in execfile
exec(compile(scripttext, filename, 'exec'), glob, loc)
File "C:/Users/vinis/OneDrive/Área de Trabalho/5aSec.py", line 38, in <module>
File "C:\Users\vinis\Anaconda2\lib\site-packages\pandas\core\frame.py", line 3370, in __setitem__
self._set_item(key, value)
File "C:\Users\vinis\Anaconda2\lib\site-packages\pandas\core\frame.py", line 3445, in _set_item
value = self._sanitize_column(key, value)
File "C:\Users\vinis\Anaconda2\lib\site-packages\pandas\core\frame.py", line 3630, in _sanitize_column
value = sanitize_index(value, self.index, copy=False)
File "C:\Users\vinis\Anaconda2\lib\site-packages\pandas\core\internals\construction.py", line 519, in sanitize_index
raise ValueError('Length of values does not match length of index')
ValueError: Length of values does not match length of index
How can i fix this one?
Thanks for the help, guys
And I'm sorry if the post isn't all correct

Related

Backtrader giving IndexError: array assignment index out of range

I am trying to run the following strategy:
def max_n(array, n):
return np.argpartition(array, -n)[-n:]
class CrossSectionalMR(bt.Strategy):
params = (
('num_positions', 100),
)
def __init__(self, temp):
self.inds = {}
for d in self.datas:
self.inds[d] = {}
self.inds[d]["pct"] = bt.indicators.PercentChange(d.close, period=1)
def prenext(self):
self.next()
def next(self):
available = list(filter(lambda d: len(d), self.datas)) # only look at data that existed yesterday
rets = np.zeros(len(available))
for i, d in enumerate(available):
rets[i] = self.inds[d]['pct'][0]
market_ret = np.mean(rets)
weights = -(rets - market_ret)
max_weights_index = max_n(np.abs(weights), self.params.num_positions)
max_weights = weights[max_weights_index]
weights = weights / np.sum(np.abs(max_weights))
for i, d in enumerate(available):
if i in max_weights_index:
self.order_target_percent(d, target=weights[i])
else:
self.order_target_percent(d, 0)
The full error is:
Traceback (most recent call last):
File "/home/poblivsig/Software/pycharm-2020.3.1/plugins/python/helpers/pydev/pydevd.py", line 1477, in _exec
pydev_imports.execfile(file, globals, locals) # execute the script
File "/home/poblivsig/Software/pycharm-2020.3.1/plugins/python/helpers/pydev/_pydev_imps/_pydev_execfile.py", line 18, in execfile
exec(compile(contents+"\n", file, 'exec'), glob, loc)
File "/home/poblivsig/Dropbox/meanrev/main.py", line 190, in <module>
dd, cagr, sharpe = backtest(datas, CrossSectionalMR, plot=True, num_positions=100)
File "/home/poblivsig/Dropbox/meanrev/main.py", line 181, in backtest
results = cerebro.run()
File "/home/poblivsig/Dropbox/meanrev/venv/lib/python3.8/site-packages/backtrader/cerebro.py", line 1127, in run
runstrat = self.runstrategies(iterstrat)
File "/home/poblivsig/Dropbox/meanrev/venv/lib/python3.8/site-packages/backtrader/cerebro.py", line 1293, in runstrategies
self._runonce(runstrats)
File "/home/poblivsig/Dropbox/meanrev/venv/lib/python3.8/site-packages/backtrader/cerebro.py", line 1652, in _runonce
strat._once()
File "/home/poblivsig/Dropbox/meanrev/venv/lib/python3.8/site-packages/backtrader/lineiterator.py", line 297, in _once
indicator._once()
File "/home/poblivsig/Dropbox/meanrev/venv/lib/python3.8/site-packages/backtrader/lineiterator.py", line 297, in _once
indicator._once()
File "/home/poblivsig/Dropbox/meanrev/venv/lib/python3.8/site-packages/backtrader/linebuffer.py", line 630, in _once
self.oncestart(self._minperiod - 1, self._minperiod)
File "/home/poblivsig/Dropbox/meanrev/venv/lib/python3.8/site-packages/backtrader/lineroot.py", line 165, in oncestart
self.once(start, end)
File "/home/poblivsig/Dropbox/meanrev/venv/lib/python3.8/site-packages/backtrader/linebuffer.py", line 672, in once
dst[i] = src[i + ago]
IndexError: array assignment index out of range
python-BaseExceptio
Any help would be greatly appreciated.
I grab the data from Yahoo and store it in csv files which are then loaded up and added to Cerebro. Sometimes, the code cannot get the full list of the SPY, but I don't think that is the problem here.

How to read json format from binance api using pandas?

I want to get live prices of concurrency from rest api of binance.
I am using:
def inCoin(coin):
url = 'https://api.binance.com/api/v3/ticker/price?symbol='+coin+'USDT'
df = pd.read_json(url)
df.columns = ["symbol","price"]
return df
It gives the following error when this function is called:
Traceback (most recent call last):
File "ee2.py", line 201, in <module>
aa = inCoin('BTC')
File "ee2.py", line 145, in inCoin
df = pd.read_json(url, orient='index')
File "/home/hspace/.local/lib/python3.6/site-packages/pandas/io/json/json.py", line 422, in read_json
result = json_reader.read()
File "/home/hspace/.local/lib/python3.6/site-packages/pandas/io/json/json.py", line 529, in read
obj = self._get_object_parser(self.data)
File "/home/hspace/.local/lib/python3.6/site-packages/pandas/io/json/json.py", line 546, in _get_object_parser
obj = FrameParser(json, **kwargs).parse()
File "/home/hspace/.local/lib/python3.6/site-packages/pandas/io/json/json.py", line 638, in parse
self._parse_no_numpy()
File "/home/hspace/.local/lib/python3.6/site-packages/pandas/io/json/json.py", line 861, in _parse_no_numpy
loads(json, precise_float=self.precise_float), dtype=None).T
File "/home/hspace/.local/lib/python3.6/site-packages/pandas/core/frame.py", line 348, in __init__
mgr = self._init_dict(data, index, columns, dtype=dtype)
File "/home/hspace/.local/lib/python3.6/site-packages/pandas/core/frame.py", line 459, in _init_dict
return _arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
File "/home/hspace/.local/lib/python3.6/site-packages/pandas/core/frame.py", line 7356, in _arrays_to_mgr
index = extract_index(arrays)
File "/home/hspace/.local/lib/python3.6/site-packages/pandas/core/frame.py", line 7393, in extract_index
raise ValueError('If using all scalar values, you must pass'
ValueError: If using all scalar values, you must pass an index
Previously, I used this function to call historical data from binance api:
def Cryptodata2(symbol,tick_interval='1m'):
url = 'https://api.binance.com/api/v1/klines?symbol='+symbol+'&interval='+tick_interval
df = pd.read_json(url)
df.columns = [ "date","open","high","low","close","volume",
"close time","quote asset volume","number of trades","taker buy base asset volume",
"Taker buy quote asset volume","ignore"]
df['date'] = pd.to_datetime(df['date'],dayfirst=True, unit = 'ms')
df.set_index('date',inplace=True)
del df['ignore']
return df
And this works fluently.
I just want price of that coin and show it as an integer or dataframe from this url:
https://api.binance.com/api/v3/ticker/price?symbol=BTCUSDT
Thanks for helping me.
Also, it would be great if you could provide more detail on debugging such "value" errors.

TypeError: unhashable type: 'slice'

I am trying to run a regression using the following dataframe dfMyRoll the head of the dataframe looks like:
SCORE SCORE_LAG
date
2007-10-29 -0.031551 NaN
2007-10-30 0.000100 -0.031551
2007-10-31 0.000100 0.000100
2007-11-01 0.000100 0.000100
2007-11-02 0.000100 0.000100
The code that I am using is :
import glob
import pandas as pd
import os.path
import scipy
from scipy.stats import linregress
def main():
dataPath = "C:/Users/Stacey/Documents/data/Roll"
roll = 4
1ID = "BBG.XNGS.AAPL.S"
2ID = "BBG.XNGS.AMAT.S"
print(1ID,1ID)
cointergration = getCointergration(dataPath,1ID,2ID,roll)
return
def getCointergration(dataPath,1ID,2ID,roll):
for myRoll in range((roll-4),roll,1):
path = dataPath+str(myRoll)+'/'
filename='PairData_'+1ID+'_'+2ID+'.csv'
for fname in glob.iglob(path+filename):
dfMyRoll = pd.read_csv(fname, header=0, usecols=[0,31],parse_dates=[0], dayfirst=True,index_col=[0], names=['date', 'SCORE'])
dfMyRoll['SCORE_LAG'] = dfMyRoll['SCORE'].shift(1)
print('cointergration',dfMyRoll.head())
X = dfMyRoll[1:,'SCORE']
Y = dfMyRoll[1:,'SCORE_LAG']
slope,intercept,_,_,stderr=linregress(dfMyRoll[1:,'SCORE'],dfMyRoll[1:,'SCORE_LAG'])
if __name__ == "__main__":
print ("CointergrationTest...19/05/17")
try:
main()
except KeyboardInterrupt:
print ("Ctrl+C pressed. Stopping...")
I get the error: TypeError: unhashable type: 'slice'. I have looked at previous posts on this subject and tried adding iloc to the X and Y time series in the following way:
X = dfMyRoll.iloc[1:,'SCORE']
Y = dfMyRoll.iloc[1:,'SCORE_LAG']
but unfortunately I can't seem to find a solution. Please see below for a stack trace:
Traceback (most recent call last):
File "<ipython-input-3-431422978139>", line 1, in <module>
runfile('C:/Users/Stacey/Documents/scripts/cointergrationTest.py', wdir='C:/Users/Stacey/Documents/scripts')
File "C:\Anaconda\lib\site-packages\spyder\utils\site\sitecustomize.py", line 866, in runfile
execfile(filename, namespace)
File "C:\Anaconda\lib\site-packages\spyder\utils\site\sitecustomize.py", line 102, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "C:/Users/Stacey/Documents/scripts/cointergrationTest.py", line 64, in <module>
main()
File "C:/Users/Stacey/Documents/scripts/cointergrationTest.py", line 23, in main
cointergration = getCointergration(dataPath,1ID,2ID,roll)
File "C:/Users/Stacey/Documents/scripts/cointergrationTest.py", line 42, in getCointergration
X = dfMyRoll[1:,'SCORE']
File "C:\Anaconda\lib\site-packages\pandas\core\frame.py", line 2059, in __getitem__
return self._getitem_column(key)
File "C:\Anaconda\lib\site-packages\pandas\core\frame.py", line 2066, in _getitem_column
return self._get_item_cache(key)
File "C:\Anaconda\lib\site-packages\pandas\core\generic.py", line 1384, in _get_item_cache
res = cache.get(item)
TypeError: unhashable type: 'slice'
You need to use loc rather than iloc:
X = dfMyRoll.loc[1:,'SCORE']
Y = dfMyRoll.loc[1:,'SCORE_LAG']
iloc is read as "integer location", and only accepts integer position. loc is somewhat more forgiving and allows both (you can also use ix).

use a shift for new column value in pandas

Within an IP scope table I have the name of the location and the starting IP address of that location.
The rule is: if the following row is in the same address range then the ending IP address of the location is the next row's value - 1, otherwise the last address of his range.
Here is a sample data:
Name StartRange
loc1 172.28.10.15
loc2 172.28.10.128
loc3 172.28.12.0
loc4 172.28.12.58
Expected result is:
Name StartRange EndIP
loc1 172.28.10.15 172.28.10.127
loc2 172.28.10.128 172.28.10.255
loc3 172.28.12.0 172.28.12.57
loc4 172.28.12.58 172.28.12.255
Here is the code I was trying:
from socket import inet_aton
from struct import unpack
import pandas as pd
mask = unpack(">L", inet_aton('255.255.255.0'))[0]
def getEndIP(startIP, endIP):
hi = (startIP['StartIP'] & mask) + 255
return hi if hi < endIP['StartIP'] else endIP['StartIP'] - 1
xls = pd.read_excel("E:\\TEMP\\AllScope.xlsx")
xls['StartIP'] = xls['StartRange'].map(lambda a: unpack(">L", inet_aton(a))[0])
xls = xls.sort_values('StartIP')
xls['EndIP'] = getEndIP(xls['StartIP'], xls['StartIP'].shift(-1))
print xls[['Name', 'StartRange', 'StartIP', 'EndIP']]
But I have a key error message:
KeyError: 'StartIP'
What am I doing wrong? (I'm not too familiar yet with pandas)
Update:
Here is the trace:
runfile('E:/Documents/Projects/Python/Egyéb progik/Network/network.py', wdir='E:/Documents/Projects/Python/Egyéb progik/Network')
Traceback (most recent call last):
File "<ipython-input-67-6caaa536457c>", line 1, in <module>
runfile('E:/Documents/Projects/Python/Egyéb progik/Network/network.py', wdir='E:/Documents/Projects/Python/Egyéb progik/Network')
File "C:\Anaconda2\lib\site-packages\spyder\utils\site\sitecustomize.py", line 866, in runfile
execfile(filename, namespace)
File "C:\Anaconda2\lib\site-packages\spyder\utils\site\sitecustomize.py", line 87, in execfile
exec(compile(scripttext, filename, 'exec'), glob, loc)
File "E:/Documents/Projects/Python/Egyéb progik/Network/network.py", line 15, in <module>
File "E:/Documents/Projects/Python/Egyéb progik/Network/network.py", line 9, in getEndIP
File "C:\Anaconda2\lib\site-packages\pandas\core\series.py", line 603, in __getitem__
result = self.index.get_value(self, key)
File "C:\Anaconda2\lib\site-packages\pandas\indexes\base.py", line 2169, in get_value
tz=getattr(series.dtype, 'tz', None))
File "pandas\index.pyx", line 98, in pandas.index.IndexEngine.get_value (pandas\index.c:3557)
File "pandas\index.pyx", line 106, in pandas.index.IndexEngine.get_value (pandas\index.c:3240)
File "pandas\index.pyx", line 156, in pandas.index.IndexEngine.get_loc (pandas\index.c:4363)
KeyError: 'StartIP'
Here a pandas solution: Assuming dfl is
StartRange
Name
loc1 172.28.10.15
loc2 172.28.10.128
loc3 172.28.12.0
loc4 172.28.12.58
We translate first strings in int lists for arithmetic :
dfl['StartRange']= dfl.StartRange.apply(lambda s : [int(x) for x in s.split('.')])
dfl['EndIP']=dfl.StartRange.shift(-1)
dfl.ix[-1,'EndIP']=[255,255,255,255]
def adjust(row):
start,end=row
return min( start[:3]+[255],end[:3]+[end[3]-1])
dfl['EndIP']=dfl.apply(adjust,axis=1)
dfend=dfl.applymap(lambda l : '.'.join([str(x) for x in l]))
Then dfend is
StartRange EndIP
Name
loc1 172.28.10.15 172.28.10.127
loc2 172.28.10.128 172.28.10.255
loc3 172.28.12.0 172.28.12.57
loc4 172.28.12.58 172.28.12.255

Pandas hashtable with gives key error:0

I am trying to get the same elements of two pandas data table, with indexing the datas and merge it. I use it for a very large amount of data(millions). The frist table (df) is constatn, and the second(d2) is changing in every loop, with the new elements will be merged with the first table.
here is my code for this process:
df = pd.read_csv("inputfile.csv",header=None)
d1 = pd.DataFrame(df).set_index(0)
for i in range(0, len(df)):
try:
follower_id=twitter.get_followers_ids(user_id=df.iloc[i][0],cursor=next_cursor)
f=follower_id['ids']
json.dumps(f)
d2 = pd.DataFrame(f).set_index(0)
match_result = pd.merge(d1,d2,left_index=True,right_index=True)
fk=[df.iloc[i][0] for number in range(len(match_result))]
DF = pd.DataFrame(fk)
DF.to_csv(r'output1.csv',header=None,sep=' ',index=None)
match_result.to_csv(r'output2.csv', header=None, sep=' ')
I have experienced, that this code, runs well for a while, but after that- probably it is relatad to the second databasses size wich is change every loop- the program gives me the following error message, and stop running:
Traceback (most recent call last):
File "halozat3.py", line 39, in <module>
d2 = pd.DataFrame(f).set_index(0) #1Trump koveto kovetolistaja
File "/usr/lib/python2.7/dist-packages/pandas/core/frame.py", line 2372, in set_index
level = frame[col].values
File "/usr/lib/python2.7/dist-packages/pandas/core/frame.py", line 1678, in __getitem__
return self._getitem_column(key)
File "/usr/lib/python2.7/dist-packages/pandas/core/frame.py", line 1685, in _getitem_column
return self._get_item_cache(key)
File "/usr/lib/python2.7/dist-packages/pandas/core/generic.py", line 1052, in _get_item_cache
values = self._data.get(item)
File "/usr/lib/python2.7/dist-packages/pandas/core/internals.py", line 2565, in get
loc = self.items.get_loc(item)
File "/usr/lib/python2.7/dist-packages/pandas/core/index.py", line 1181, in get_loc
return self._engine.get_loc(_values_from_object(key))
File "index.pyx", line 129, in pandas.index.IndexEngine.get_loc (pandas/index.c:3656)
File "index.pyx", line 149, in pandas.index.IndexEngine.get_loc (pandas/index.c:3534)
File "hashtable.pyx", line 381, in pandas.hashtable.Int64HashTable.get_item (pandas/hashtable.c:7035)
File "hashtable.pyx", line 387, in pandas.hashtable.Int64HashTable.get_item (pandas/hashtable.c:6976)
KeyError: 0
What could be the problem?
Have you only one row in your dataframe?
You must write as many rows as you like
Look

Categories