use a shift for new column value in pandas - python

Within an IP scope table I have the name of the location and the starting IP address of that location.
The rule is: if the following row is in the same address range then the ending IP address of the location is the next row's value - 1, otherwise the last address of his range.
Here is a sample data:
Name StartRange
loc1 172.28.10.15
loc2 172.28.10.128
loc3 172.28.12.0
loc4 172.28.12.58
Expected result is:
Name StartRange EndIP
loc1 172.28.10.15 172.28.10.127
loc2 172.28.10.128 172.28.10.255
loc3 172.28.12.0 172.28.12.57
loc4 172.28.12.58 172.28.12.255
Here is the code I was trying:
from socket import inet_aton
from struct import unpack
import pandas as pd
mask = unpack(">L", inet_aton('255.255.255.0'))[0]
def getEndIP(startIP, endIP):
hi = (startIP['StartIP'] & mask) + 255
return hi if hi < endIP['StartIP'] else endIP['StartIP'] - 1
xls = pd.read_excel("E:\\TEMP\\AllScope.xlsx")
xls['StartIP'] = xls['StartRange'].map(lambda a: unpack(">L", inet_aton(a))[0])
xls = xls.sort_values('StartIP')
xls['EndIP'] = getEndIP(xls['StartIP'], xls['StartIP'].shift(-1))
print xls[['Name', 'StartRange', 'StartIP', 'EndIP']]
But I have a key error message:
KeyError: 'StartIP'
What am I doing wrong? (I'm not too familiar yet with pandas)
Update:
Here is the trace:
runfile('E:/Documents/Projects/Python/Egyéb progik/Network/network.py', wdir='E:/Documents/Projects/Python/Egyéb progik/Network')
Traceback (most recent call last):
File "<ipython-input-67-6caaa536457c>", line 1, in <module>
runfile('E:/Documents/Projects/Python/Egyéb progik/Network/network.py', wdir='E:/Documents/Projects/Python/Egyéb progik/Network')
File "C:\Anaconda2\lib\site-packages\spyder\utils\site\sitecustomize.py", line 866, in runfile
execfile(filename, namespace)
File "C:\Anaconda2\lib\site-packages\spyder\utils\site\sitecustomize.py", line 87, in execfile
exec(compile(scripttext, filename, 'exec'), glob, loc)
File "E:/Documents/Projects/Python/Egyéb progik/Network/network.py", line 15, in <module>
File "E:/Documents/Projects/Python/Egyéb progik/Network/network.py", line 9, in getEndIP
File "C:\Anaconda2\lib\site-packages\pandas\core\series.py", line 603, in __getitem__
result = self.index.get_value(self, key)
File "C:\Anaconda2\lib\site-packages\pandas\indexes\base.py", line 2169, in get_value
tz=getattr(series.dtype, 'tz', None))
File "pandas\index.pyx", line 98, in pandas.index.IndexEngine.get_value (pandas\index.c:3557)
File "pandas\index.pyx", line 106, in pandas.index.IndexEngine.get_value (pandas\index.c:3240)
File "pandas\index.pyx", line 156, in pandas.index.IndexEngine.get_loc (pandas\index.c:4363)
KeyError: 'StartIP'

Here a pandas solution: Assuming dfl is
StartRange
Name
loc1 172.28.10.15
loc2 172.28.10.128
loc3 172.28.12.0
loc4 172.28.12.58
We translate first strings in int lists for arithmetic :
dfl['StartRange']= dfl.StartRange.apply(lambda s : [int(x) for x in s.split('.')])
dfl['EndIP']=dfl.StartRange.shift(-1)
dfl.ix[-1,'EndIP']=[255,255,255,255]
def adjust(row):
start,end=row
return min( start[:3]+[255],end[:3]+[end[3]-1])
dfl['EndIP']=dfl.apply(adjust,axis=1)
dfend=dfl.applymap(lambda l : '.'.join([str(x) for x in l]))
Then dfend is
StartRange EndIP
Name
loc1 172.28.10.15 172.28.10.127
loc2 172.28.10.128 172.28.10.255
loc3 172.28.12.0 172.28.12.57
loc4 172.28.12.58 172.28.12.255

Related

How to fix "Length of value doesnt match index" in python?

I've been trying to get the list of addresses from a franchise at Brazil, but when I run the code, it starts, runs two cities and then it stops and appears "ValueError"
The code I've been trying to run is this:
import requests
import json
import pandas as pd
dMun = pd.read_json('https://servicodados.ibge.gov.br/api/v1/localidades/municipios')
dEndTotal = pd.DataFrame()
for iMun in range(len(dMun)):
sCidade = dMun.loc[iMun,'nome']
print(str(iMun) + ' - '+ dMun.loc[iMun,'nome'])
sSigla = dMun.loc[iMun,'microrregiao']['mesorregiao']['UF']['sigla']
r = requests.post('https://www.5asec.com.br/busca-lojas-endereco', data = {'endereco':'A, 1 {}/{}'.format(sCidade,sSigla)})
jEnd = json.loads(r.text)
dEnd = pd.DataFrame.from_records(jEnd['lojas'])
print(dEnd)
if len(dEnd) > 0:
for sChave in jEnd['lojas'][0]['Endereco'].keys():
dEnd[sChave] = []
for i in range(len(dEnd)):
for sChave in jEnd['lojas'][i]['Endereco'].keys():
dEnd[sChave][i] = jEnd['lojas'][i]['Endereco'][sChave]
dEndTotal = pd.concat([dEndTotal,dEnd],ignore_index=False).drop_duplicates().reset_index(drop=True)
But its resulting on this error:
0 - Alta Floresta D'Oeste
Empty DataFrame
Columns: []
Index: []
1 - Ariquemes
CEP Codigo CodigoExterno ... Telefone TemEcommerce Url
0 76870512 675 69004P ... 35366864 False ariquemes
[1 rows x 16 columns]
Traceback (most recent call last):
File "<ipython-input-1-cd9a35514f7e>", line 1, in <module>
runfile('C:/Users/vinis/OneDrive/Área de Trabalho/5aSec.py', wdir='C:/Users/vinis/OneDrive/Área de Trabalho')
File "C:\Users\vinis\Anaconda2\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 786, in runfile
execfile(filename, namespace)
File "C:\Users\vinis\Anaconda2\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 95, in execfile
exec(compile(scripttext, filename, 'exec'), glob, loc)
File "C:/Users/vinis/OneDrive/Área de Trabalho/5aSec.py", line 38, in <module>
File "C:\Users\vinis\Anaconda2\lib\site-packages\pandas\core\frame.py", line 3370, in __setitem__
self._set_item(key, value)
File "C:\Users\vinis\Anaconda2\lib\site-packages\pandas\core\frame.py", line 3445, in _set_item
value = self._sanitize_column(key, value)
File "C:\Users\vinis\Anaconda2\lib\site-packages\pandas\core\frame.py", line 3630, in _sanitize_column
value = sanitize_index(value, self.index, copy=False)
File "C:\Users\vinis\Anaconda2\lib\site-packages\pandas\core\internals\construction.py", line 519, in sanitize_index
raise ValueError('Length of values does not match length of index')
ValueError: Length of values does not match length of index
How can i fix this one?
Thanks for the help, guys
And I'm sorry if the post isn't all correct

Pandas hashtable with gives key error:0

I am trying to get the same elements of two pandas data table, with indexing the datas and merge it. I use it for a very large amount of data(millions). The frist table (df) is constatn, and the second(d2) is changing in every loop, with the new elements will be merged with the first table.
here is my code for this process:
df = pd.read_csv("inputfile.csv",header=None)
d1 = pd.DataFrame(df).set_index(0)
for i in range(0, len(df)):
try:
follower_id=twitter.get_followers_ids(user_id=df.iloc[i][0],cursor=next_cursor)
f=follower_id['ids']
json.dumps(f)
d2 = pd.DataFrame(f).set_index(0)
match_result = pd.merge(d1,d2,left_index=True,right_index=True)
fk=[df.iloc[i][0] for number in range(len(match_result))]
DF = pd.DataFrame(fk)
DF.to_csv(r'output1.csv',header=None,sep=' ',index=None)
match_result.to_csv(r'output2.csv', header=None, sep=' ')
I have experienced, that this code, runs well for a while, but after that- probably it is relatad to the second databasses size wich is change every loop- the program gives me the following error message, and stop running:
Traceback (most recent call last):
File "halozat3.py", line 39, in <module>
d2 = pd.DataFrame(f).set_index(0) #1Trump koveto kovetolistaja
File "/usr/lib/python2.7/dist-packages/pandas/core/frame.py", line 2372, in set_index
level = frame[col].values
File "/usr/lib/python2.7/dist-packages/pandas/core/frame.py", line 1678, in __getitem__
return self._getitem_column(key)
File "/usr/lib/python2.7/dist-packages/pandas/core/frame.py", line 1685, in _getitem_column
return self._get_item_cache(key)
File "/usr/lib/python2.7/dist-packages/pandas/core/generic.py", line 1052, in _get_item_cache
values = self._data.get(item)
File "/usr/lib/python2.7/dist-packages/pandas/core/internals.py", line 2565, in get
loc = self.items.get_loc(item)
File "/usr/lib/python2.7/dist-packages/pandas/core/index.py", line 1181, in get_loc
return self._engine.get_loc(_values_from_object(key))
File "index.pyx", line 129, in pandas.index.IndexEngine.get_loc (pandas/index.c:3656)
File "index.pyx", line 149, in pandas.index.IndexEngine.get_loc (pandas/index.c:3534)
File "hashtable.pyx", line 381, in pandas.hashtable.Int64HashTable.get_item (pandas/hashtable.c:7035)
File "hashtable.pyx", line 387, in pandas.hashtable.Int64HashTable.get_item (pandas/hashtable.c:6976)
KeyError: 0
What could be the problem?
Have you only one row in your dataframe?
You must write as many rows as you like
Look

Pandas: KeyError when attempting to print value from df.loc in Excel file

I'm attempting to parse an Excel-file using Pandas.
import pandas as pd
import xlrd
xl_file = pd.ExcelFile("file.xlsx")
df = xl_file.parse("Sheet1")
Now, if I get a value (name) from the sheet:
if len(df.loc[df["Col A"].str.contains("John"), "Col B"]) > 0:
name = df.loc[df["Col A"].str.contains("John"), "Col B"]
And then print name, the result is:
1 John Doe
Name: Col B, dtype: object
or print name.values:
[u'John Doe']
But if I try to retrieve the actual string with print name[0], I get KeyError:
File "pandas/core/series.py", line 583, in __getitem__
result = self.index.get_value(self, key)
File "pandas/indexes/base.py", line 1980, in get_value
tz=getattr(series.dtype, 'tz', None))
File "pandas/index.pyx", line 103, in pandas.index.IndexEngine.get_value (pandas/index.c:3332)
File "pandas/index.pyx", line 111, in pandas.index.IndexEngine.get_value (pandas/index.c:3035)
File "pandas/index.pyx", line 159, in pandas.index.IndexEngine.get_loc (pandas/index.c:4018)
File "pandas/hashtable.pyx", line 303, in pandas.hashtable.Int64HashTable.get_item (pandas/hashtable.c:6610)
File "pandas/hashtable.pyx", line 309, in pandas.hashtable.Int64HashTable.get_item (pandas/hashtable.c:6554)
KeyError: 0
What could be the problem?
name is a series, and 0 is not in the series' index (check name.index). This explains the error message.
If you want to select the first element in the series, do:
name.iloc[0]

Apply a function from a groupby transform

My pandas looks like this
Date Ticker Open High Low Adj Close Adj_Close Volume
2016-04-18 vws.co 445.0 449.2 441.7 447.3 447.3 945300
2016-04-19 vws.co 449.0 455.8 448.3 450.9 450.9 907700
2016-04-20 vws.co 451.0 452.5 435.4 436.6 436.6 1268100
2016-04-21 vws.co 440.1 442.9 428.4 435.5 435.5 1308300
2016-04-22 vws.co 435.5 435.5 435.5 435.5 435.5 0
2016-04-25 vws.co 431.0 436.7 424.4 430.0 430.0 1311700
2016-04-18 nflx 109.9 110.7 106.02 108.4 108.4 27001500
2016-04-19 nflx 99.49 101.37 94.2 94.34 94.34 55623900
2016-04-20 nflx 94.34 96.98 93.14 96.77 96.77 25633600
2016-04-21 nflx 97.31 97.38 94.78 94.98 94.98 19859400
2016-04-22 nflx 94.85 96.69 94.21 95.9 95.9 15786000
2016-04-25 nflx 95.7 95.75 92.8 93.56 93.56 14965500
I have a program that at one of the functions with embedded functions sucessfully runs a groupby.
This line looks like this
df['MA3'] = df.groupby('Ticker').Adj_Close.transform(lambda group: pd.rolling_mean(group, window=3))
Se my initial question and the data-format here:
Select only one value in df col rows in same df for calc results from different val, and calc df only on one ticker at a time
It has now dawned on me that rather than doing the groupby in each embedded function of which I have 5, I would rather have the groupby run in the main program calling the top function, so all the embedded functions could work on the filtered groupby pandas dataframe from only doing the groupby once...
How do I apply my main function with groupby, in order to filter my pandas, so I only work on one ticker (value in col 'Ticker') at a time?
The 'Ticker' col contains 'aapl', 'msft', 'nflx' company identifyers etc, with timeseries data for a time-window.
Thanks a lot Karasinski. This is close to what I want. But I get an errror.
When I run:
def Screener(df_all, group):
# Copy df_all to df for single ticker operations
df = df_all.copy()
def diff_calc(df,ticker):
df['Difference'] = df['Adj_Close'].diff()
return df
df = diff_calc(df, ticker)
return df_all
for ticker in stocklist:
df_all[['Difference']] = df_all.groupby('Ticker').Adj_Close.apply(Screener, ticker)
I get this error:
Traceback (most recent call last):
File "<ipython-input-2-d7c1835f6b2a>", line 1, in <module>
runfile('C:/Users/Morten/Documents/Design/Python/CrystalBall - Local - Git/Git - CrystalBall/sandbox/screener_test simple for StockOverflowNestedFct_Getstock.py', wdir='C:/Users/Morten/Documents/Design/Python/CrystalBall - Local - Git/Git - CrystalBall/sandbox')
File "C:\Program Files\WinPython-64bit-3.3.5.7\python-3.3.5.amd64\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 682, in runfile
execfile(filename, namespace)
File "C:\Program Files\WinPython-64bit-3.3.5.7\python-3.3.5.amd64\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 85, in execfile
exec(compile(open(filename, 'rb').read(), filename, 'exec'), namespace)
File "C:/Users/Morten/Documents/Design/Python/CrystalBall - Local - Git/Git - CrystalBall/sandbox/screener_test simple for StockOverflowNestedFct_Getstock.py", line 144, in <module>
df_all[['Difference']] = df_all.groupby('Ticker').Adj_Close.apply(Screener, ticker)
File "C:\Program Files\WinPython-64bit-3.3.5.7\python-3.3.5.amd64\lib\site-packages\pandas\core\groupby.py", line 663, in apply
return self._python_apply_general(f)
File "C:\Program Files\WinPython-64bit-3.3.5.7\python-3.3.5.amd64\lib\site-packages\pandas\core\groupby.py", line 667, in _python_apply_general
self.axis)
File "C:\Program Files\WinPython-64bit-3.3.5.7\python-3.3.5.amd64\lib\site-packages\pandas\core\groupby.py", line 1286, in apply
res = f(group)
File "C:\Program Files\WinPython-64bit-3.3.5.7\python-3.3.5.amd64\lib\site-packages\pandas\core\groupby.py", line 659, in f
return func(g, *args, **kwargs)
File "C:/Users/Morten/Documents/Design/Python/CrystalBall - Local - Git/Git - CrystalBall/sandbox/screener_test simple for StockOverflowNestedFct_Getstock.py", line 112, in Screener
df = diff_calc(df, ticker)
File "C:/Users/Morten/Documents/Design/Python/CrystalBall - Local - Git/Git - CrystalBall/sandbox/screener_test simple for StockOverflowNestedFct_Getstock.py", line 70, in diff_calc
df['Difference'] = df['Adj_Close'].diff()
File "C:\Program Files\WinPython-64bit-3.3.5.7\python-3.3.5.amd64\lib\site-packages\pandas\core\series.py", line 514, in __getitem__
result = self.index.get_value(self, key)
File "C:\Program Files\WinPython-64bit-3.3.5.7\python-3.3.5.amd64\lib\site-packages\pandas\tseries\index.py", line 1221, in get_value
raise KeyError(key)
KeyError: 'Adj_Close'
And when I use functools like so
df_all = functools.partial(df_all.groupby('Ticker').Adj_Close.apply(Screener, ticker))
I get the same error as above...
Traceback (most recent call last):
File "<ipython-input-5-d7c1835f6b2a>", line 1, in <module>
runfile('C:/Users/Morten/Documents/Design/Python/CrystalBall - Local - Git/Git - CrystalBall/sandbox/screener_test simple for StockOverflowNestedFct_Getstock.py', wdir='C:/Users/Morten/Documents/Design/Python/CrystalBall - Local - Git/Git - CrystalBall/sandbox')
File "C:\Program Files\WinPython-64bit-3.3.5.7\python-3.3.5.amd64\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 682, in runfile
execfile(filename, namespace)
File "C:\Program Files\WinPython-64bit-3.3.5.7\python-3.3.5.amd64\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 85, in execfile
exec(compile(open(filename, 'rb').read(), filename, 'exec'), namespace)
File "C:/Users/Morten/Documents/Design/Python/CrystalBall - Local - Git/Git - CrystalBall/sandbox/screener_test simple for StockOverflowNestedFct_Getstock.py", line 148, in <module>
df_all = functools.partial(df_all.groupby('Ticker').Adj_Close.apply(Screener, [ticker]))
File "C:\Program Files\WinPython-64bit-3.3.5.7\python-3.3.5.amd64\lib\site-packages\pandas\core\groupby.py", line 663, in apply
return self._python_apply_general(f)
File "C:\Program Files\WinPython-64bit-3.3.5.7\python-3.3.5.amd64\lib\site-packages\pandas\core\groupby.py", line 667, in _python_apply_general
self.axis)
File "C:\Program Files\WinPython-64bit-3.3.5.7\python-3.3.5.amd64\lib\site-packages\pandas\core\groupby.py", line 1286, in apply
res = f(group)
File "C:\Program Files\WinPython-64bit-3.3.5.7\python-3.3.5.amd64\lib\site-packages\pandas\core\groupby.py", line 659, in f
return func(g, *args, **kwargs)
File "C:/Users/Morten/Documents/Design/Python/CrystalBall - Local - Git/Git - CrystalBall/sandbox/screener_test simple for StockOverflowNestedFct_Getstock.py", line 114, in Screener
df = diff_calc(df, ticker)
File "C:/Users/Morten/Documents/Design/Python/CrystalBall - Local - Git/Git - CrystalBall/sandbox/screener_test simple for StockOverflowNestedFct_Getstock.py", line 72, in diff_calc
df['Difference'] = df['Adj_Close'].diff()
File "C:\Program Files\WinPython-64bit-3.3.5.7\python-3.3.5.amd64\lib\site-packages\pandas\core\series.py", line 514, in __getitem__
result = self.index.get_value(self, key)
File "C:\Program Files\WinPython-64bit-3.3.5.7\python-
3.3.5.amd64\lib\site-packages\pandas\tseries\index.py", line 1221, in get_value
raise KeyError(key)
KeyError: 'Adj_Close'
Edit from Karasinski's edit from 31/5.
When I run the last suggestion from Karasinski I get this error.
mmm
mmm
nflx
vws.co
Traceback (most recent call last):
File "<ipython-input-4-d7c1835f6b2a>", line 1, in <module>
runfile('C:/Users/Morten/Documents/Design/Python/CrystalBall - Local - Git/Git - CrystalBall/sandbox/screener_test simple for StockOverflowNestedFct_Getstock.py', wdir='C:/Users/Morten/Documents/Design/Python/CrystalBall - Local - Git/Git - CrystalBall/sandbox')
File "C:\Program Files\WinPython-64bit-3.3.5.7\python-3.3.5.amd64\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 682, in runfile
execfile(filename, namespace)
File "C:\Program Files\WinPython-64bit-3.3.5.7\python-3.3.5.amd64\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 85, in execfile
exec(compile(open(filename, 'rb').read(), filename, 'exec'), namespace)
File "C:/Users/Morten/Documents/Design/Python/CrystalBall - Local - Git/Git - CrystalBall/sandbox/screener_test simple for StockOverflowNestedFct_Getstock.py", line 173, in <module>
df_all[['mean', 'max', 'median', 'min']] = df_all.groupby('Ticker').apply(group_func)
File "C:\Program Files\WinPython-64bit-3.3.5.7\python-3.3.5.amd64\lib\site-packages\pandas\core\groupby.py", line 663, in apply
return self._python_apply_general(f)
File "C:\Program Files\WinPython-64bit-3.3.5.7\python-3.3.5.amd64\lib\site-packages\pandas\core\groupby.py", line 670, in _python_apply_general
not_indexed_same=mutated)
File "C:\Program Files\WinPython-64bit-3.3.5.7\python-3.3.5.amd64\lib\site-packages\pandas\core\groupby.py", line 2785, in _wrap_applied_output
not_indexed_same=not_indexed_same)
File "C:\Program Files\WinPython-64bit-3.3.5.7\python-3.3.5.amd64\lib\site-packages\pandas\core\groupby.py", line 1142, in _concat_objects
result = result.reindex_axis(ax, axis=self.axis)
File "C:\Program Files\WinPython-64bit-3.3.5.7\python-3.3.5.amd64\lib\site-packages\pandas\core\frame.py", line 2508, in reindex_axis
fill_value=fill_value)
File "C:\Program Files\WinPython-64bit-3.3.5.7\python-3.3.5.amd64\lib\site-packages\pandas\core\generic.py", line 1841, in reindex_axis
{axis: [new_index, indexer]}, fill_value=fill_value, copy=copy)
File "C:\Program Files\WinPython-64bit-3.3.5.7\python-3.3.5.amd64\lib\site-packages\pandas\core\generic.py", line 1865, in _reindex_with_indexers
copy=copy)
File "C:\Program Files\WinPython-64bit-3.3.5.7\python-3.3.5.amd64\lib\site-packages\pandas\core\internals.py", line 3144, in reindex_indexer
raise ValueError("cannot reindex from a duplicate axis")
ValueError: cannot reindex from a duplicate axis
From an answer from your previous question we can set up with
import pandas as pd
from StringIO import StringIO
text = """Date Ticker Open High Low Adj_Close Volume
2015-04-09 vws.co 315.000000 316.100000 312.500000 311.520000 1686800
2015-04-10 vws.co 317.000000 319.700000 316.400000 312.700000 1396500
2015-04-13 vws.co 317.900000 321.500000 315.200000 315.850000 1564500
2015-04-14 vws.co 320.000000 322.400000 318.700000 314.870000 1370600
2015-04-15 vws.co 320.000000 321.500000 319.200000 316.150000 945000
2015-04-16 vws.co 319.000000 320.200000 310.400000 307.870000 2236100
2015-04-17 vws.co 309.900000 310.000000 302.500000 299.100000 2711900
2015-04-20 vws.co 303.000000 312.000000 303.000000 306.490000 1629700
2016-03-31 mmm 166.750000 167.500000 166.500000 166.630005 1762800
2016-04-01 mmm 165.630005 167.740005 164.789993 167.529999 1993700
2016-04-04 mmm 167.110001 167.490005 165.919998 166.399994 2022800
2016-04-05 mmm 165.179993 166.550003 164.649994 165.809998 1610300
2016-04-06 mmm 165.339996 167.080002 164.839996 166.809998 2092200
2016-04-07 mmm 165.880005 167.229996 165.250000 167.160004 2721900"""
df = pd.read_csv(StringIO(text), delim_whitespace=1, parse_dates=[0], index_col=0)
You can then make a function which calculates whatever statistics you'd like, such as:
def various_indicators(group):
mean = pd.rolling_mean(group, window=3)
max = pd.rolling_max(group, window=3)
median = pd.rolling_median(group, window=3)
min = pd.rolling_min(group, window=3)
return pd.DataFrame({'mean': mean,
'max': max,
'median': median,
'min': min})
To assign these new columns to your dataframe, you would then do a groupby and then apply the function by
df[['mean', 'max', 'median', 'min']] = df.groupby('Ticker').Adj_Close.apply(various_indicators)
EDIT
In regards to your further questions in the comments of the answer: To extract additional information from the dataframe, you should instead pass the entire group rather than just the single column.
def group_func(group):
ticker = group.Ticker.unique()[0]
adj_close = group.Adj_Close
return Screener(ticker, adj_close)
def Screener(ticker, adj_close):
print(ticker)
mean = pd.rolling_mean(adj_close, window=3)
max = pd.rolling_max(adj_close, window=3)
median = pd.rolling_median(adj_close, window=3)
min = pd.rolling_min(adj_close, window=3)
return pd.DataFrame({'mean': mean,
'max': max,
'median': median,
'min': min})
You can then assign these columns in a similar way as above
df[['mean', 'max', 'median', 'min']] = df.groupby('Ticker').apply(group_func)

pandas error: slice found as index?

The code below breaks on the simplest of lines, where I thought I was just adding a column called 'year' with the constant value of what is in the year variable. My packages are up to date with Anaconda 2.3.
Why is this indexing wrong?
The code:
# -*- coding: utf-8 -*-
import iopro
from pandas import *
for year in xrange(2005,2013):
for month in xrange(1,13):
if year == 2005 and month < 7:
continue
filename = 'SOMEPATH' + str(year) + '_mon'+ str(month) +'.txt'
adapter = iopro.text_adapter(filename,parser='csv',field_names=True,output='dataframe',delimiter='\t')
monthly = adapter[['var1','var2','var3']][:]
monthly['year']=year
The error message is:
Traceback (most recent call last):
File "drugs.py", line 21, in <module>
monthly['year']=year
File "/home/seidav/anaconda/lib/python2.7/site-packages/pandas/core/frame.py", line 2127, in __setitem__
self._set_item(key, value)
File "/home/seidav/anaconda/lib/python2.7/site-packages/pandas/core/frame.py", line 2205, in _set_item
NDFrame._set_item(self, key, value)
File "/home/seidav/anaconda/lib/python2.7/site-packages/pandas/core/generic.py", line 1196, in _set_item
self._data.set(key, value)
File "/home/seidav/anaconda/lib/python2.7/site-packages/pandas/core/internals.py", line 2980, in set
loc = self.items.get_loc(item)
File "/home/seidav/anaconda/lib/python2.7/site-packages/pandas/core/index.py", line 1572, in get_loc
return self._engine.get_loc(_values_from_object(key))
File "pandas/index.pyx", line 134, in pandas.index.IndexEngine.get_loc (pandas/index.c:3824)
TypeError: expected string or Unicode object, slice found

Categories