I am trying to create different python file where the code is given below. While calling the method, I pass the mydata as data frame with these columns
['wage', 'educ', 'exper', 'tenure'].
import pandas as pd
import numpy as np
from prettytable import PrettyTable as pt
def LinearRegressionOLS(mydata,target_column):
if(not isinstance(mydata,pd.DataFrame)):
raise TypeError("Data must be of type Data Frame")
if(not isinstance(target_column,str)):
raise TypeError("target_column must be String")
if(target_column not in mydata.columns):
raise KeyError("target_column doesn't exist in Data Frame")
data=mydata.copy()
data["one"]=np.ones(data.count()[target_column])
column_list=["one"]
for i in data.columns:
column_list.append(i)
Y=data[target_column].as_matrix()
data.drop(target_column,inplace=True,axis=1)
X=data[column_list].as_matrix()
del data
beta = np.matmul(np.matmul(np.linalg.inv(np.matmul(X.T,X)),X.T),Y)
predY = np.matmul(X,beta)
total = np.matmul((Y-np.mean(Y)).T,(Y-np.mean(Y)))
residual = np.matmul((Y-predY).T,(Y-predY))
sigma = np.matmul((Y-predY).T,(Y-predY))/(X.shape[0]-X.shape[1])
omega = np.square(sigma)*np.linalg.inv(np.matmul(X.T,X))
SE = np.sqrt(np.diag(omega))
tstat = beta/SE
Rsq = 1-(residual/total)
final = pt()
final.add_column(" ",column_list)
final.add_column("Coefficients",beta)
final.add_column("Standard Error",SE)
final.add_column("t-stat",tstat)
print(final)
print("Residual: ",residual)
print("Total: ",total)
print("Standard Error: ",sigma)
print("R Square: ",Rsq)
After running the above code, by calling the function given below,
>>> c
['wage', 'educ', 'exper', 'tenure']
>>> import LR_OLS as inf
>>> inf.LinearRegressionOLS(file[c],"wage")
, i get some error like this
Traceback (most recent call last):
File "<pyshell#182>", line 1, in <module>
inf.LinearRegressionOLS(file[c],"wage")
File "E:\python\LR_OLS.py", line 29, in LinearRegressionOLS
File "C:\Program Files\Python35\lib\site-packages\pandas\core\frame.py", line 2133, in __getitem__
return self._getitem_array(key)
File "C:\Program Files\Python35\lib\site-packages\pandas\core\frame.py", line 2177, in _getitem_array
indexer = self.loc._convert_to_indexer(key, axis=1)
File "C:\Program Files\Python35\lib\site-packages\pandas\core\indexing.py", line 1269, in _convert_to_indexer
.format(mask=objarr[mask]))
KeyError: "['wage'] not in index"
Can anyone help me as to why i am getting this error. How can i resolve it?
The problem is that you still have 'wage' in 'column_list. So in order to never let it get in there do the following adaptation:
for i in data.columns:
if i != 'wage': # add this line to your code
column_list.append(i)
Related
SHORT DESCRIPTION:
The Main issue is that whenever i run the following code, i get the error below that:
import statsmodels.api as sm
from statsmodels.formula.api import ols
def onewayanaova (csv, vars, x="x-axis", y="y-axis"):
df = pd.read_csv(csv, delimiter=",")
df_melt = pd.melt(df.reset_index(), id_vars=['index'], value_vars=vars)
df_melt.columns = ['index', {x}, {y}]
model = ols(f'{y} ~ C({x})', data=df_melt).fit()
anova_table = sm.stats.anova_lm(model, typ=2)
print("The One-Way Anova Test Values are:\n")
print(anova_table)
onewayanaova("Book1.csv", ["a","b","c"])
The error is:
Traceback (most recent call last):
File "pandas\_libs\hashtable_class_helper.pxi", line 5231, in pandas._libs.hashtable.PyObjectHashTable.map_locations
TypeError: unhashable type: 'set'
Exception ignored in: 'pandas._libs.index.IndexEngine._call_map_locations'
Traceback (most recent call last):
File "pandas\_libs\hashtable_class_helper.pxi", line 5231, in pandas._libs.hashtable.PyObjectHashTable.map_locations
TypeError: unhashable type: 'set'
Traceback (most recent call last):
File "C:\Users\mghaf\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\patsy\compat.py", line 36, in call_and_wrap_exc
return f(*args, **kwargs)
File "C:\Users\mghaf\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\patsy\eval.py", line 165, in eval
return eval(code, {}, VarLookupDict([inner_namespace]
File "<string>", line 1, in <module>
NameError: name 'axis' is not defined
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "c:\Users\mghaf\Desktop\Python Codes\ReMan Edu\test.py", line 3, in <module>
mn.onewayanaova("Book1.csv", ["a","b","c"])
File "c:\Users\mghaf\Desktop\Python Codes\ReMan Edu\maincode.py", line 154, in onewayanaova
model = ols(f'{y} ~ C({x})', data=df_melt).fit()
File "C:\Users\mghaf\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\statsmodels\base\model.py", line 200, in from_formula
tmp = handle_formula_data(data, None, formula, depth=eval_env,
File "C:\Users\mghaf\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\statsmodels\formula\formulatools.py", line 63, in handle_formula_data
result = dmatrices(formula, Y, depth, return_type='dataframe',
File "C:\Users\mghaf\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\patsy\highlevel.py", line 309, in dmatrices
(lhs, rhs) = _do_highlevel_design(formula_like, data, eval_env,
File "C:\Users\mghaf\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\patsy\highlevel.py", line 164, in _do_highlevel_design
design_infos = _try_incr_builders(formula_like, data_iter_maker, eval_env,
File "C:\Users\mghaf\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\patsy\highlevel.py", line 66, in _try_incr_builders
return design_matrix_builders([formula_like.lhs_termlist,
File "C:\Users\mghaf\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\patsy\build.py", line 693, in design_matrix_builders
cat_levels_contrasts) = _examine_factor_types(all_factors,
File "C:\Users\mghaf\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\patsy\build.py", line 443, in _examine_factor_types
value = factor.eval(factor_states[factor], data)
File "C:\Users\mghaf\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\patsy\eval.py", line 564, in eval
return self._eval(memorize_state["eval_code"],
File "C:\Users\mghaf\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\patsy\eval.py", line 547, in _eval
return call_and_wrap_exc("Error evaluating factor",
File "C:\Users\mghaf\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\patsy\compat.py", line 43, in call_and_wrap_exc
exec("raise new_exc from e")
File "<string>", line 1, in <module>
patsy.PatsyError: Error evaluating factor: NameError: name 'axis' is not defined
y-axis ~ C(x-axis)
^^^^^^^^^
I think it is the X and Y variables I set in def onewayanaova (csv, vars, x="x-axis", y="y-axis"):. Maybe I need to change that so I don't get the error?
If you want a more detailed description, read below.
LONG DESCRIPTION:
I am trying to do a One Way Anova test. However, the main issue is that python keeps saying that there is a NameError, and that one of my values are not defined.
I am running the following code:
import statsmodels.api as sm
from statsmodels.formula.api import ols
def onewayanaova (csv, vars, x="x-axis", y="y-axis"):
df = pd.read_csv(csv, delimiter=",")
df_melt = pd.melt(df.reset_index(), id_vars=['index'], value_vars=vars)
df_melt.columns = ['index', {x}, {y}]
model = ols(f'{y} ~ C({x})', data=df_melt).fit()
anova_table = sm.stats.anova_lm(model, typ=2)
print("The One-Way Anova Test Values are:\n")
print(anova_table)
And:
import maincode as mn
mn.onewayanaova("Book1.csv", ["a","b","c"])
I get the following error (The first code is saved to a file named manicode.py, and the second code is saved to a file named test.py. "Book1.csv" is in the same folder as them). The error is:
Traceback (most recent call last):
File "pandas\_libs\hashtable_class_helper.pxi", line 5231, in pandas._libs.hashtable.PyObjectHashTable.map_locations
TypeError: unhashable type: 'set'
Exception ignored in: 'pandas._libs.index.IndexEngine._call_map_locations'
Traceback (most recent call last):
File "pandas\_libs\hashtable_class_helper.pxi", line 5231, in pandas._libs.hashtable.PyObjectHashTable.map_locations
TypeError: unhashable type: 'set'
Traceback (most recent call last):
File "C:\Users\mghaf\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\patsy\compat.py", line 36, in call_and_wrap_exc
return f(*args, **kwargs)
File "C:\Users\mghaf\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\patsy\eval.py", line 165, in eval
return eval(code, {}, VarLookupDict([inner_namespace]
File "<string>", line 1, in <module>
NameError: name 'axis' is not defined
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "c:\Users\mghaf\Desktop\Python Codes\ReMan Edu\test.py", line 3, in <module>
mn.onewayanaova("Book1.csv", ["a","b","c"])
File "c:\Users\mghaf\Desktop\Python Codes\ReMan Edu\maincode.py", line 154, in onewayanaova
model = ols(f'{y} ~ C({x})', data=df_melt).fit()
File "C:\Users\mghaf\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\statsmodels\base\model.py", line 200, in from_formula
tmp = handle_formula_data(data, None, formula, depth=eval_env,
File "C:\Users\mghaf\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\statsmodels\formula\formulatools.py", line 63, in handle_formula_data
result = dmatrices(formula, Y, depth, return_type='dataframe',
File "C:\Users\mghaf\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\patsy\highlevel.py", line 309, in dmatrices
(lhs, rhs) = _do_highlevel_design(formula_like, data, eval_env,
File "C:\Users\mghaf\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\patsy\highlevel.py", line 164, in _do_highlevel_design
design_infos = _try_incr_builders(formula_like, data_iter_maker, eval_env,
File "C:\Users\mghaf\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\patsy\highlevel.py", line 66, in _try_incr_builders
return design_matrix_builders([formula_like.lhs_termlist,
File "C:\Users\mghaf\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\patsy\build.py", line 693, in design_matrix_builders
cat_levels_contrasts) = _examine_factor_types(all_factors,
File "C:\Users\mghaf\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\patsy\build.py", line 443, in _examine_factor_types
value = factor.eval(factor_states[factor], data)
File "C:\Users\mghaf\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\patsy\eval.py", line 564, in eval
return self._eval(memorize_state["eval_code"],
File "C:\Users\mghaf\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\patsy\eval.py", line 547, in _eval
return call_and_wrap_exc("Error evaluating factor",
File "C:\Users\mghaf\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\patsy\compat.py", line 43, in call_and_wrap_exc
exec("raise new_exc from e")
File "<string>", line 1, in <module>
patsy.PatsyError: Error evaluating factor: NameError: name 'axis' is not defined
y-axis ~ C(x-axis)
^^^^^^^^^
The main error that I see is that I named the X and Y variables as: x="x-axis", y="y-axis". But i do not get why that gives me an error, as I made a very neat looking boxplot from it (but I know that X and Y are used as the axis titles):
def boxplot (csv, vars, x="x-axis", y="y-axis"):
#https://www.reneshbedre.com/blog/anova.html
df = pd.read_csv(csv, delimiter=",")
df_melt = pd.melt(df.reset_index(), id_vars=['index'], value_vars=vars)
df_melt.columns = ['index', x, y]
ax = sns.boxplot(x=x, y=y, data=df_melt, color='#99c2a2')
ax = sns.swarmplot(x=x, y=y, data=df_melt, color='#7d0013')
plt.show()
BUT, whenever I write this code from someone else, it gives the output I want:
import statsmodels.api as sm
from statsmodels.formula.api import ols
import pandas as pd
df = pd.read_csv("https://reneshbedre.github.io/assets/posts/anova/onewayanova.txt", sep="\t")
df_melt = pd.melt(df.reset_index(), id_vars=['index'], value_vars=['A', 'B', 'C', 'D'])
df_melt.columns = ['index', 'treatments', 'value']
model = ols('value ~ C(treatments)', data=df_melt).fit()
anova_table = sm.stats.anova_lm(model, typ=2)
print(anova_table)
The output that i get with the above code:
sum_sq df F PR(>F)
C(treatments) 3010.95 3.0 17.49281 0.000026
Residual 918.00 16.0 NaN NaN
The main issue is that i need to change values of model = ols('value ~ C(treatments)', data=df_melt).fit() and df_melt.columns = ['index', 'treatments', 'value'] because most datasets do not have 'treatments', 'value' as their database. If your wondering what my .csv file has is this:
Column headers of a, b and c
A list of equal amount of numbers in each of them
My main issue is:
Please try and help me understand why I cannot replace 'value ~ C(treatments)' with X and Y!
Source of the code: https://www.reneshbedre.com/blog/anova.html
In statsmodels formulae, you need to quote your variables (i.e. columns in your dataframe) when they contain special characters such as -. Have a look at the documentation, your term "x-axis" is interpreted as "x" - "axis". Quoting variable can be done with the Q() transformation. Make sure to quote the variable name inside with different (single/double) quotes that you use for the string:
model = ols(f'Q("{y}") ~ C(Q("{x}"))', data=df_melt).fit()
It seems that model = ols('value ~ C(treatments)', data=df_melt).fit() cannot have a variable subsitute (as i had in model = ols(f'{y} ~ C({x})', data=df_melt).fit()). This is also the case if i use model = ols(f'Q("{y}") ~ C(Q("{x}"))', data=df_melt).fit(), as mentioned by #Rob.
Therefore, to make it work and have my own names, i just have to rename df_melt.columns = ['index', 'treatments', 'value'] in relation to model = ols('value ~ C(treatments)', data=df_melt).fit() (where 'treatments', 'value' are the same thing in teh two lines of code).
here is the mismatch error I keep getting. I'm inputting "202710".
Traceback (most recent call last):
File "nbastatsrecieveit.py", line 29, in <module>
df.columns = headers
File "C:\Users\*\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.8_qbz5n2kfra8p0\LocalCache\local-packages\Python38\site-packages\pandas\core\generic.py", line 5149, in __setattr__
return object.__setattr__(self, name, value)
File "pandas\_libs\properties.pyx", line 66, in pandas._libs.properties.AxisProperty.__set__
File "C:\Users\*\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.8_qbz5n2kfra8p0\LocalCache\local-packages\Python38\site-packages\pandas\core\generic.py", line 564, in _set_axis
self._mgr.set_axis(axis, labels)
File "C:\Users\*\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.8_qbz5n2kfra8p0\LocalCache\local-packages\Python38\site-packages\pandas\core\internals\managers.py", line 226, in set_axis
raise ValueError(
ValueError: Length mismatch: Expected axis has 0 elements, new values have 24 elements
To be honest, I'm not sure as to how to go about fixing this problem as it works with specific player IDs but not all of then. Here is the rest of my code:
from nba_api.stats.endpoints import shotchartdetail
import pandas as pd
import json
from openpyxl import Workbook
print('Player ID?')
playerid = input()
filename = str(playerid) + '.xlsx'
response = shotchartdetail.ShotChartDetail(
team_id= 0,
context_measure_simple = 'FGA',
#last_n_games = numGames,
game_id_nullable = '0041900403',
player_id= playerid
)
content = json.loads(response.get_json())
# transform contents into dataframe
results = content['resultSets'][0]
headers = results['headers']
rows = results['rowSet']
#df = pd.DataFrame(rows)
df = pd.DataFrame(rows)
df.columns = headers
# write to excel file
df.to_excel(filename, index=False)
This is because your df is empty for ID 202710. Exception handling will resolve the issue here-
df = pd.DataFrame(rows)
try:
df.columns = headers
except:
pass
I can't get data into excel using this code
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
import time
import pandas as pd
driver = webdriver.Chrome()
mainurl = 'https://offerup.com/explore/sck/tx/austin/cars-trucks/'
driver.get(mainurl)
res = driver.execute_script("return document.documentElement.outerHTML")
page_soup = BeautifulSoup(res, 'html.parser')
# btn = driver.find_element_by_xpath('//*[#id="react-container"]/div/div[2]/div[2]/div[2]/div[3]/button').click()
records = []
for a in page_soup.find_all('span', class_='_nn5xny4 _y9ev9r'):
title = a.text
print(title)
records.append(title)
prices = []
for b in page_soup.find_all('span', class_='_s3g03e4'):
price = b.text
print(price)
prices.append(price)
location = []
for c in page_soup.find_all('span', class_='_19rx43s2'):
loc = c.text
print(loc)
location.append(loc)
df = pd.DataFrame(records, prices, location)
print(df)
df.to_csv('trymenew.csv')
Traceback (most recent call last):
File "C:\Users\Saba\AppData\Local\Programs\Python\Python38-32\lib\site-packages\pandas\core\internals\managers.py", line 1654, in create_block_manager_from_blocks
make_block(values=blocks[0], placement=slice(0, len(axes[0])))
File "C:\Users\Saba\AppData\Local\Programs\Python\Python38-32\lib\site-packages\pandas\core\internals\blocks.py", line 3047, in make_block
return klass(values, ndim=ndim, placement=placement)
File "C:\Users\Saba\AppData\Local\Programs\Python\Python38-32\lib\site-packages\pandas\core\internals\blocks.py", line 2595, in __init__
super().__init__(values, ndim=ndim, placement=placement)
File "C:\Users\Saba\AppData\Local\Programs\Python\Python38-32\lib\site-packages\pandas\core\internals\blocks.py", line 124, in __init__
raise ValueError(
ValueError: Wrong number of items passed 1, placement implies 44
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\Saba\Desktop\cars_offerup.py", line 29, in <module>
df = pd.DataFrame(records, prices, location)
File "C:\Users\Saba\AppData\Local\Programs\Python\Python38-32\lib\site-packages\pandas\core\frame.py", line 488, in __init__
mgr = init_ndarray(data, index, columns, dtype=dtype, copy=copy)
File "C:\Users\Saba\AppData\Local\Programs\Python\Python38-32\lib\site-packages\pandas\core\internals\construction.py", line 210, in init_ndarray
return create_block_manager_from_blocks(block_values, [columns, index])
File "C:\Users\Saba\AppData\Local\Programs\Python\Python38-32\lib\site-packages\pandas\core\internals\managers.py", line 1664, in create_block_manager_from_blocks
construction_error(tot_items, blocks[0].shape[1:], axes, e)
File "C:\Users\Saba\AppData\Local\Programs\Python\Python38-32\lib\site-packages\pandas\core\internals\managers.py", line 1694, in construction_error
raise ValueError(f"Shape of passed values is {passed}, indices imply {implied}")
ValueError: Shape of passed values is (44, 1), indices imply (44, 44)
i have this type of error, can you help me solve it?
The pd.DataFrame as we can see in the documentation expects as the first argument the data, but you passed the data as 3 separate arguments.
In order to fix that, we change:
df = pd.DataFrame(records, prices, location) # Wrong
to
df = pd.DataFrame([records, prices, location]) # Correct
I have wrote this simple code to test things out for a more complicated one:
import sys
from openpyxl import load_workbook
from openpyxl.chart import Reference, ScatterChart, Series
from openpyxl.chart.label import DataLabel, DataLabelList
from openpyxl.chart.marker import Marker
from openpyxl.chart.shapes import GraphicalProperties
from openpyxl.drawing.colors import ColorChoice
wb = load_workbook(sys.argv[1])
for ws in wb:
chart = ScatterChart(scatterStyle='smoothMarker')
for x in range(2, 192):
labels = Reference(ws, min_col=1, max_col=1, min_row=x, max_row=x)
xaxis = Reference(ws, min_col=2, max_col=2, min_row=x, max_row=x)
data = Reference(ws, min_col=3, max_col=3, min_row=x, max_row=x)
s = Series(xaxis, data, title=ws['A' + str(x)].value)
sp_color = ColorChoice(prstClr=(str(ws['A' + str(x)].value)))
sp_sppr = GraphicalProperties(solidFill=sp_color)
s.marker = Marker(symbol=('circle'), size=20, spPr=sp_sppr)
s.marker.spPr.ln.noFill = True
s.spPr.ln.noFill = True
slabel = list() #EDITED
for x in labels.cells: #EDITED
slabel.append(ws[x].value) #EDITED
s.labels = DataLabelList(dLbl=slabel, dLblPos='bestFit')
chart.series.append(s)
chart.dataLabels = (DataLabelList(showLegendKey=True))
ws.add_chart(chart, 'D1')
wb.save(sys.argv[1])
the file use as sys.argv[1] is like that (technically it's just the color list for prstClr of ColorChoice with x += 10 and y = x):
Color x y
beige 0 0
forestGreen 10 10
dkGoldenrod 20 20
lightPink 30 30
slateGrey 40 40
the line s.labels = DataLabelList(dLbl=slabel, dLblPos='bestFit') outputs the error:
Traceback (most recent call last):
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/openpyxl/descriptors/base.py", line 57, in _convert
value = expected_type(value)
ValueError: invalid literal for int() with base 10: 'beige'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/openpyxl/descriptors/base.py", line 57, in _convert
value = expected_type(value)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/openpyxl/chart/label.py", line 100, in __init__
self.idx = idx
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/openpyxl/descriptors/nested.py", line 36, in __set__
super(Nested, self).__set__(instance, value)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/openpyxl/descriptors/base.py", line 69, in __set__
value = _convert(self.expected_type, value)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/openpyxl/descriptors/base.py", line 59, in _convert
raise TypeError('expected ' + str(expected_type))
TypeError: expected <class 'int'>
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "csv/test.py", line 37, in <module>
s.labels = DataLabelList(dLbl=slabel, dLblPos='bestFit')
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/openpyxl/chart/label.py", line 128, in __init__
self.dLbl = dLbl
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/openpyxl/descriptors/sequence.py", line 27, in __set__
seq = [_convert(self.expected_type, value) for value in seq]
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/openpyxl/descriptors/sequence.py", line 27, in <listcomp>
seq = [_convert(self.expected_type, value) for value in seq]
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/openpyxl/descriptors/base.py", line 59, in _convert
raise TypeError('expected ' + str(expected_type))
TypeError: expected <class 'openpyxl.chart.label.DataLabel'>
EDIT: I used a list as Charlie pointed it out without more success, this list should contains the same amount of items with the label names of each.
Either slabel.append(ws[x].value) or slabel.append(x) output the same error, what should that list contains?
Versions:
- openpyxl 2.4.8
- python 3.6
- macOS Siera 10.12.6
Regards
Sequence is a descriptor used when writing classes you're using it incorrectly. You need to provide a Python sequence (list, tuple, etc.)
I am trying to run a regression using the following dataframe dfMyRoll the head of the dataframe looks like:
SCORE SCORE_LAG
date
2007-10-29 -0.031551 NaN
2007-10-30 0.000100 -0.031551
2007-10-31 0.000100 0.000100
2007-11-01 0.000100 0.000100
2007-11-02 0.000100 0.000100
The code that I am using is :
import glob
import pandas as pd
import os.path
import scipy
from scipy.stats import linregress
def main():
dataPath = "C:/Users/Stacey/Documents/data/Roll"
roll = 4
1ID = "BBG.XNGS.AAPL.S"
2ID = "BBG.XNGS.AMAT.S"
print(1ID,1ID)
cointergration = getCointergration(dataPath,1ID,2ID,roll)
return
def getCointergration(dataPath,1ID,2ID,roll):
for myRoll in range((roll-4),roll,1):
path = dataPath+str(myRoll)+'/'
filename='PairData_'+1ID+'_'+2ID+'.csv'
for fname in glob.iglob(path+filename):
dfMyRoll = pd.read_csv(fname, header=0, usecols=[0,31],parse_dates=[0], dayfirst=True,index_col=[0], names=['date', 'SCORE'])
dfMyRoll['SCORE_LAG'] = dfMyRoll['SCORE'].shift(1)
print('cointergration',dfMyRoll.head())
X = dfMyRoll[1:,'SCORE']
Y = dfMyRoll[1:,'SCORE_LAG']
slope,intercept,_,_,stderr=linregress(dfMyRoll[1:,'SCORE'],dfMyRoll[1:,'SCORE_LAG'])
if __name__ == "__main__":
print ("CointergrationTest...19/05/17")
try:
main()
except KeyboardInterrupt:
print ("Ctrl+C pressed. Stopping...")
I get the error: TypeError: unhashable type: 'slice'. I have looked at previous posts on this subject and tried adding iloc to the X and Y time series in the following way:
X = dfMyRoll.iloc[1:,'SCORE']
Y = dfMyRoll.iloc[1:,'SCORE_LAG']
but unfortunately I can't seem to find a solution. Please see below for a stack trace:
Traceback (most recent call last):
File "<ipython-input-3-431422978139>", line 1, in <module>
runfile('C:/Users/Stacey/Documents/scripts/cointergrationTest.py', wdir='C:/Users/Stacey/Documents/scripts')
File "C:\Anaconda\lib\site-packages\spyder\utils\site\sitecustomize.py", line 866, in runfile
execfile(filename, namespace)
File "C:\Anaconda\lib\site-packages\spyder\utils\site\sitecustomize.py", line 102, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "C:/Users/Stacey/Documents/scripts/cointergrationTest.py", line 64, in <module>
main()
File "C:/Users/Stacey/Documents/scripts/cointergrationTest.py", line 23, in main
cointergration = getCointergration(dataPath,1ID,2ID,roll)
File "C:/Users/Stacey/Documents/scripts/cointergrationTest.py", line 42, in getCointergration
X = dfMyRoll[1:,'SCORE']
File "C:\Anaconda\lib\site-packages\pandas\core\frame.py", line 2059, in __getitem__
return self._getitem_column(key)
File "C:\Anaconda\lib\site-packages\pandas\core\frame.py", line 2066, in _getitem_column
return self._get_item_cache(key)
File "C:\Anaconda\lib\site-packages\pandas\core\generic.py", line 1384, in _get_item_cache
res = cache.get(item)
TypeError: unhashable type: 'slice'
You need to use loc rather than iloc:
X = dfMyRoll.loc[1:,'SCORE']
Y = dfMyRoll.loc[1:,'SCORE_LAG']
iloc is read as "integer location", and only accepts integer position. loc is somewhat more forgiving and allows both (you can also use ix).