How to display all the values of an index in a Series? - python

I don't know how to display all the indexes of a Series.
# current settings
import pandas as pd
pd.set_option('display.max_columns', 10000)
pd.set_option('display.max_rows', 10000)
s = pd.Series(np.random.randn(199), index=['place_id', 'name', 'formatted_address', 'formatted_phone_number',
'num_comments', 'rating', 'price', 'website', 'lng', 'lat', 'category',
'permanently_closed', 'lunes_open', 'lunes_close_mid', 'lunes_open_mid',
'lunes_close', 'martes_open', 'martes_close_mid', 'martes_open_mid',
'martes_close', 'miércoles_open', 'miércoles_close_mid',
'miércoles_open_mid', 'miércoles_close', 'jueves_open',
'jueves_close_mid', 'jueves_open_mid', 'jueves_close', 'viernes_open',
'viernes_close_mid', 'viernes_open_mid', 'viernes_close', 'sábado_open',
'sábado_close_mid', 'sábado_open_mid', 'sábado_close', 'domingo_open',
'domingo_close_mid', 'domingo_open_mid', 'domingo_close', 'Monday_00',
'Monday_01', 'Monday_02', 'Monday_03', 'Monday_04', 'Monday_05',
'Monday_06', 'Monday_07', 'Monday_08', 'Monday_09','Monday_10', 'Monday_11', 'Monday_12', 'Monday_13', 'Monday_14',
'Monday_15', 'Monday_16', 'Monday_17', 'Monday_18', 'Monday_19',
'Monday_20', 'Monday_21', 'Monday_22', 'Monday_23', 'Monday_peak',
'Tuesday_00', 'Tuesday_01', 'Tuesday_02', 'Tuesday_03', 'Tuesday_04',
'Tuesday_05', 'Tuesday_06', 'Tuesday_07', 'Tuesday_08', 'Tuesday_09',
'Tuesday_10', 'Tuesday_11', 'Tuesday_12', 'Tuesday_13', 'Tuesday_14',
'Tuesday_15', 'Tuesday_16', 'Tuesday_17', 'Tuesday_18', 'Tuesday_19',
'Tuesday_20', 'Tuesday_21', 'Tuesday_22', 'Tuesday_23', 'Tuesday_peak',
'Wednesday_00', 'Wednesday_01', 'Wednesday_02', 'Wednesday_03',
'Wednesday_04', 'Wednesday_05', 'Wednesday_06', 'Wednesday_07',
'Wednesday_08', 'Wednesday_09','Wednesday_11', 'Wednesday_12', 'Wednesday_13', 'Wednesday_14',
'Wednesday_15', 'Wednesday_16', 'Wednesday_17', 'Wednesday_18',
'Wednesday_19', 'Wednesday_20', 'Wednesday_21', 'Wednesday_22',
'Wednesday_23', 'Wednesday_peak', 'Thursday_00', 'Thursday_01',
'Thursday_02', 'Thursday_03', 'Thursday_04', 'Thursday_05',
'Thursday_06', 'Thursday_07', 'Thursday_08', 'Thursday_09',
'Thursday_10', 'Thursday_11', 'Thursday_12', 'Thursday_13',
'Thursday_14', 'Thursday_15', 'Thursday_16', 'Thursday_17',
'Thursday_18', 'Thursday_19', 'Thursday_20', 'Thursday_21',
'Thursday_22', 'Thursday_23', 'Thursday_peak', 'Friday_00', 'Friday_01',
'Friday_02', 'Friday_03', 'Friday_04', 'Friday_05', 'Friday_06',
'Friday_07', 'Friday_08', 'Friday_09', 'Friday_10', 'Friday_11',
'Friday_12', 'Friday_13', 'Friday_14', 'Friday_15', 'Friday_16',
'Friday_17', 'Friday_18', 'Friday_19', 'Friday_20', 'Friday_21',
'Friday_22', 'Friday_23', 'Friday_peak', 'Saturday_00', 'Saturday_01',
'Saturday_02', 'Saturday_03', 'Saturday_04', 'Saturday_05',
'Saturday_06', 'Saturday_07', 'Saturday_08', 'Saturday_09',
'Saturday_10', 'Saturday_11', 'Saturday_12', 'Saturday_13',
'Saturday_14', 'Saturday_15', 'Saturday_16', 'Saturday_17',
'Saturday_18', 'Saturday_19', 'Saturday_20', 'Saturday_21',
'Saturday_22', 'Saturday_23', 'Saturday_peak', 'Sunday_00', 'Sunday_01',
'Sunday_02', 'Sunday_03', 'Sunday_04', 'Sunday_05', 'Sunday_06',
'Sunday_07', 'Sunday_08', 'Sunday_09'])
# I ask for the index
s.index
Instead of getting all the values I only see this
Out[94]: Index(['place_id', 'name', 'formatted_address',
'formatted_phone_number',
'num_comments', 'rating', 'price', 'website', 'lng', 'lat',
...
'Sunday_00', 'Sunday_01', 'Sunday_02', 'Sunday_03', 'Sunday_04',
'Sunday_05', 'Sunday_06', 'Sunday_07', 'Sunday_08', 'Sunday_09'],
dtype='object', length=199)
Any ideas on which settings to use?

s.index.values
And you can store it in a variable or print it(which seems to be the case here)

You need this:
pd.options.display.max_seq_items = None

Related

how do i Determine a Cut-Off or Threshold When Working With Fuzzymatcher in python

Please help on the photo is a screenshot of my output and code as well, how do i use the best_match_score I NEED TO FILTER BY THE RETURNED "PRECISION SCORE" THE COLUMN ONLY COMES AFTER THE MERGE (i.e. JUST RETURN EVERYTHING with 'best_match_score' BELOW -1.06)
import fuzzymatcher
import pandas as pd
import os
# pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
REDCAP = pd.read_csv(r"C:\Users\Selamola\Desktop\PythonThings\FuzzyMatching\REDCAP Form A v1 and v2 23 Feb 211.csv")
covidSheet = pd.read_csv(r"C:\Users\Selamola\Desktop\PythonThings\FuzzyMatching\Cases missing REC ID 23 Feb 211.csv")
Data_merge = fuzzymatcher.fuzzy_left_join(covidSheet, REDCAP,
left_on=['Participant Name', 'Particfipant Surname', 'Screening Date',
'Screening Date', 'Hospital Number', 'Alternative Hospital Number'],
right_on=['Patient Name', 'Patient Surname', 'Date Of Admission',
'Date Of Sample Collection', 'Hospital Number', 'Hospital Number'])
# Merged_data = pd.merge(REDCAP, covidSheet, how='left',
# left_on=['Patient Name', 'Patient Surname'],
# right_on=['Participant Name', 'Particfipant Surname'])
# Data_merge.to_csv(r'C:\Users\Selamola\Desktop\PythonThings\FuzzyMatching\DataMacth.csv')
print(Data_merge)
This seems very straightforward unless I'm missing something. Be sure to try read the documentation about slicing data in pandas.
mask = Data_merge['best_match_score'] < .1.06
filtered_data = Data_merge[mask]

how to merge two or more list in custom order in python

I have the following code:
import pandas as pd
y = pd.ExcelFile('C:\\Users\\vibhu\\Desktop\\Training docs\\excel training\\super store data transformation\\Sample - Superstore data transformation by Vaibhav.xlsx')
superstore_orders = y.parse(sheet_name='Orders Input data')
superstore_orders.dtypes
factual_table= superstore_orders[['Order ID','Customer ID','Postal Code','Product ID','Product Name','Sales','Quantity','Discount','Profit' ]]
Order_table= superstore_orders[['Order ID','Order Date','Ship Date','Ship Mode']]
Order_table1= Order_table.drop_duplicates(subset='Order ID', keep='first', inplace=False)
Customer_table= superstore_orders[['Customer ID','Customer Name','Segment']]
Customer_table1= Customer_table.drop_duplicates(subset='Customer ID', keep='first', inplace=False)
Geographical_table= superstore_orders[['Postal Code','Country','City','State','Region']]
Geographical_table1= Geographical_table.drop_duplicates(subset='Postal Code', keep='first', inplace=False)
Product_table= superstore_orders[['Product ID','Category','Sub-Category','Product Name']]
Product_table1= Product_table.drop_duplicates(subset=['Product ID','Product Name'], keep='first', inplace=False)
Final_factual_data = pd.merge(Order_table1, factual_table, how='left', on='Order ID')
Final_factual_data = pd.merge(Customer_table1, Final_factual_data, how='left', on='Customer ID')
Final_factual_data = pd.merge(Geographical_table1,Final_factual_data,how='left', on='Postal Code')
Final_factual_data = pd.merge(Product_table1,Final_factual_data,how='left', on=['Product ID','Product Name'] )
Output is this format:- Product ID Category Sub-Category Product Name Postal Code Country City State Region Customer ID Customer Name Segment Order ID Order Date Ship Date Ship Mode Sales Quantity Discount Profit
I require reformatting in this order :
Order ID order date ship date ship mode Customer ID cutomer name segment Postal Code country city state reion Product ID Product Name product key cateory subcategory Sales Quantity Discount Profit
Final_factual_data1 = Final_factual_data [['Order ID','Order Date','Ship Date','Ship Mode','Customer ID','Customer Name','Segment','Country','City','State','Postal Code','Region','Product ID','Category','Sub-Category','Product Name','Sales','Quantity','Discount','Profit']]
this code help me to get the desired answer
Just assign the intended ordered sequence to columns attribute:
Final_factual_data.columns = ['Order ID', 'order date', 'ship date', 'ship mode', 'Customer ID', 'cutomer name', 'segment', 'Postal Code', 'country', 'city', 'state reion', 'Product ID', 'Product Name', 'product key', 'cateory', 'subcategory', 'Sales', 'Quantity', 'Discount', 'Profit']

Nested dictionary groups from excel

I'm new in python and openpyxl. I started to learn in order to make my every day tasks easier and faster at my workplace.
Task:
There is an excel file with a lots of rows, looks like this
excel file
I want to create a daily report based on this excel file. In my example Today is 2019/05/08.
Expected result:
Only show the info where the date is match with Today date.
Expected structure:
required outcome
My solution
In my solution I create a list of the rows where I can find only the Today values. After that I read only that rows and create dictionaries. But the result is nothing. I also in a trouble about how to work with multiple keys. Because there are multiple issue numbers are in the list.
from datetime import datetime
import openpyxl
from openpyxl import load_workbook
from openpyxl.utils import get_column_letter
from openpyxl.utils import column_index_from_string
#Open excel file
excel_path = "\\REE.xlsx"
wb = openpyxl.load_workbook(excel_path, data_only=True)
ws_1 = wb.worksheets[1]
#The Today date. need some format due to excel date handling
today = datetime.today()
today = today.replace(hour=00, minute=00, second=00, microsecond=00)
#Crate a list of the lines where only Today values are present
issue_line_list = []
for cell in ws_1["B"]:
if cell.value == today:
issue_line = cell.row
issue_line_list.append(issue_line)
#Creare a txt file for output
file = open("daily_report.txt", "w")
#The dict what I want to use
dict = []
issue_numbers_list = []
issue = []
#Create a dict for the issues
for line in issue_line_list:
issue_number_value = ws_1.cell(row = line, column = 3).value
issue_numbers_list.append(issue_number_value)
#Create a dict for other information
for line in issue_line_list:
issue_number_value = ws_1.cell(row = line, column = 3).value
by_value = ws_1.cell(row = line, column = 2 ).value
group_value = ws_1.cell(row = line, column = 4).value
events_value = ws_1.cell(row = line, column = 5).value
deadline_value = ws_1.cell(row = line, column = 6).value
try:
deadline_value = deadline_value.strftime('%Y.%m.%d')
except:
deadline_value = ""
issue.append(issue_number_value)
issue.append(by_value)
issue.append(group_value)
issue.append(events_value)
issue.append(deadline_value)
issue.append(deadline_value)
#Append the two dict
dict.append(issue_numbers_list)
dict.append(issue)
#Save it to the txt file.
file.write(dict)
file.close()
Questions
- How to solve the multiple same key issue?
- How to create nested groups?
- What should add or delete to my code in order to get the expected result?
Remark
Openpyxl is not only option. If you have a bettwer/easier/faster way I open for every idea.
Thank you in advance for you support!
Can you try the following:
import pandas as pd
cols = ['date', 'by', 'issue_number', 'group', 'events', 'deadline']
req_cols = ['events', 'deadline']
data = [
['2019-05-07', 'john', '113140', '#issue_closed', 'something different', ''],
['2019-05-08', 'david', '113140', '#task', 'something different', ''],
['2019-05-08', 'victor', '114761', '#task_result', 'something different', ''],
['2019-05-08', 'john', '114761', '#task', 'something different', '2019-05-10'],
['2019-05-08', 'david', '114761', '#task',
'something different', '2019-05-08'],
['2019-05-08', 'victor', '113140', '#task_result', 'something different', ''],
['2019-05-07', 'john', '113140', '#issue_created',
'something different', '2019-05-09'],
['2019-05-07', 'david', '113140', '#location', 'something different', ''],
['2019-05-07', 'victor', '113140', '#issue_closed', 'something different', 'done'],
['2019-05-07', 'john', '113140', '#task_result', 'something different', ''],
['2019-05-07', 'david', '113140', '#task',
'something different', '2019-05-10'],
]
df = pd.DataFrame(data, columns=cols)
df1 = df.groupby(['issue_number', 'group']).describe()[req_cols].droplevel(0, axis=1)['top']
df1.columns = req_cols
print(df1)
Output:
events deadline
issue_number group
113140 #issue_closed something different done
#issue_created something different 2019-05-09
#location something different
#task something different 2019-05-10
#task_result something different
114761 #task something different 2019-05-08
#task_result something different
To open an excel file, you can do the following:
df = pd.read_excel(excel_path, sheet_name=my_sheet)
req_cols = ['EVENTS', 'DEADLINE']
df1 = df.groupby(['ISSUE NUMBER', 'GROUP']).describe()[req_cols].droplevel(0, axis=1)['top']
df1.columns = req_cols
print(df1)
The task almost solved, but I faced a new issue.
The code:
excel_path = "\\REE.xlsx"
my_sheet = 'Events'
cols = ['DATE', 'BY', 'ISSUE NUMBER', 'GROUP', 'EVENTS', 'DEADLINE']
req_cols = ['EVENTS', 'DEADLINE']
df = pd.read_excel(excel_path, sheet_name = my_sheet, columns=cols)
today = datetime.today().strftime('%Y-%m-%d')
today_filter = (df[(df['DATE'] == today)])
df = pd.DataFrame(today_filter, columns=cols)
df1 = df.groupby(['ISSUE NUMBER', 'GROUP']).describe()[req_cols].droplevel(0, axis=1['top']
df1.columns = req_cols
print(df1)
On the 'BY' column there are same values. eg. '#task'. But the script print only once.
int his case
Required result:
114761
#task Jane another words 2019-05-10
#task result John something
#task John something else 2019-05-08
...
...
...
...
My code result:
114761
#task Jane another words 2019-05-10
#task result John something
...
...
...
John #task something else 2019-05-08 do not print it out. Why?
And there is a some result in other options also. If there are more some values at'BY' column the script print out only the first and skip the rest.

How to iterate through pandas columns and replace cells with information with the next row down

I am trying to loop through a pandas dataframe column and based on if the next row down does not include "Property Address" add the information from that next row down to the previous row. For example, if I have a column that goes from top to bottom ["Property Address", "Alternate Address", "Property Address"] I would like to take the information from "Alternate Address" and add that information to the column above it ("Property Address"). I have already double checked that there are no trailing or leading spaces and that everything is lower case so that all comparisons will work. However, I still get this error:
if i == "Property Address" and df.loc[i+1, :] != "Property Address":
TypeError: must be str, not int
Does anyone have ideas on what I can do so that this will work? I am new to Python, and I am really lost. Please let me know if there is any more information that I should provide to make answering this question easier. Thanks
Here is my code so far:
import pandas as pd
import time
df = pd.read_excel('BRH.xls') # Reads the Excel File and creates a
dataframe
# Column Headers
df = df[['street', 'state', 'zip', 'Address Type', 'mStreet', 'mState', 'mZip']]
propertyAddress = "Property Address" # iterates thru column and replaces
the current row with info from next row down
for i in df['Address Type']:
if i == "Property Address" and df.loc[i+1, :] != "Property Address":
df['mStreet'] == df.loc[i + 1, 'street']
df['mState'] == df.loc[i + 1, 'state']
df['mZip'] = df.loc[i + 1, 'zip']
df.to_excel('BRHOut.xls')
print('operation complete in:', time.process_time(), 'ms')
You can use pd.Series.shift to construct an appropriate mask.
Here's some untested pseudo-code:
m1 = df['AddressType'].shift() == 'Property Address'
m2 = df['AddressType'] != 'Property Address'
mask = m1 & m2
for col in ['Street', 'State', 'Zip']:
df.loc[mask, 'm'+col] = df.loc[mask, col.lower()].shift(-1)
Your TypeError is happening because i is a string. When you call df.loc[i+1, :], you are attempting to do something like "Property Address" + 1. Once you resolve that, you will still have some indexing issues in the body of your for loop.
#jpp gave a very succinct answer, but I believe that it pulls information from the intended destination and writes it to the intended source. In other words, the roles of "Property Address" and "Alternate Address" are reversed. I believe this will deliver the correct result:
Setup
import pandas as pd
df = pd.DataFrame(data={
'street': [
'123 Main Street',
'1600 Pennsylvania Ave',
'567 Fake Ave',
'1 University Ave'
],
'state': ['CA', 'DC', 'DC', 'CA'],
'zip': ['95126', '20500', '20500', '94301'],
'Address Type': [
'Property Address',
'Alternate Address',
'Property Address',
'Alternate Address'
],
'mStreet': [None, None, None, None],
'mState': [None, None, None, None],
'mZip': [None, None, None, None],
},
columns=[
'street',
'state',
'zip',
'Address Type',
'mStreet',
'mState',
'mZip'
])
# Create a new dataframe with all address attributes shifted UP one row
next_address_attributes = df[['Address Type', 'street', 'state', 'zip']].shift(-1)
# Create a series to indicate whether information should be drawn from next row
# All the decision-making is right here
get_attributes_from_next_address = ((df['Address Type'] == 'Property Address')
& (next_address_attributes['Address Type'] != 'Property Address'))
Using For Loop
for i, getting_attributes_is_necessary in get_attributes_from_next_address.iteritems():
if getting_attributes_is_necessary:
df.at[i, 'mStreet'] = next_address_attributes.at[i, 'street']
df.at[i, 'mState'] = next_address_attributes.at[i, 'state']
df.at[i, 'mZip'] = next_address_attributes.at[i, 'zip']
Loopless
df.loc[get_attributes_from_next_address, 'mStreet'] = next_address_attributes.loc[get_attributes_from_next_address, 'street']
df.loc[get_attributes_from_next_address, 'mState'] = next_address_attributes.loc[get_attributes_from_next_address, 'state']
df.loc[get_attributes_from_next_address, 'mZip'] = next_address_attributes.loc[get_attributes_from_next_address, 'zip']

How do i select only certain rows based on label in pandas?

Here is my function:
def get_historical_closes(ticker, start_date, end_date):
my_dir = '/home/manish/Desktop/Equity/subset'
os.chdir(my_dir)
dfs = []
for files in glob.glob('*.txt'):
dfs.append(pd.read_csv(files, names = ['Ticker', 'Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Null'], parse_dates = [1]))
p = pd.concat(dfs)
d = p.reset_index(['Date', 'Ticker', 'Close'])
pivoted = d.pivot_table(index = ['Date'], columns =['Ticker'])
pivoted.columns = pivoted.columns.droplevel(0)
return pivoted
closes = get_historical_closes(['LT' or 'HDFC'or 'ACC'], '1999-01-01', '2014-12-31')
My problem is I just want to get data for a few rows namely, data for LT, HDFC and ACC for all the dates, but when I execute the function, I am getting data for all the rows (approx. 1500 nos.)
How can I slice the dataframe, so that I get only selected rows and not the entire dataframe?
Raw input data is a collection of text files as so:
20MICRONS,20150401,36.5,38.95,35.8,37.35,64023,0
3IINFOTECH,20150401,5.9,6.3,5.8,6.2,1602365,0
3MINDIA,20150401,7905,7905,7850,7879.6,310,0
8KMILES,20150401,710.05,721,706,712.9,20196,0
A2ZINFRA,20150401,15.5,16.55,15.2,16,218219,0
AARTIDRUGS,20150401,648.95,665.5,639.65,648.25,42927,0
AARTIIND,20150401,348,349.4,340.3,341.85,122071,0
AARVEEDEN,20150401,42,42.9,41.55,42.3,627,0
ABAN,20150401,422,434.3,419,429.1,625857,0
ABB,20150401,1266.05,1284,1266,1277.45,70294,0
ABBOTINDIA,20150401,3979.25,4009.95,3955.3,3981.25,2677,0
ABCIL,20150401,217.8,222.95,217,221.65,11583,0
ABGSHIP,20150401,225,225,215.3,220.2,237737,0
ABIRLANUVO,20150401,1677,1677,1639.25,1666.7,106336,0
ACC,20150401,1563.7,1591.3,1553.2,1585.9,176063,0
ACCELYA,20150401,932,953.8,923,950.5,4297,0
ACE,20150401,40.1,41.7,40.05,41.15,356130,0
ACROPETAL,20150401,2.75,3,2.7,2.85,33380,0
ADANIENT,20150401,608.8,615.8,603,612.4,868006,0
ADANIPORTS,20150401,308.45,312.05,306.1,310.95,1026200,0
ADANIPOWER,20150401,46.7,48,46.7,47.75,3015649,0
ADFFOODS,20150401,60.5,60.5,58.65,59.75,23532,0
ADHUNIK,20150401,20.95,21.75,20.8,21.2,149431,0
ADORWELD,20150401,224.9,224.9,215.65,219.2,2743,0
ADSL,20150401,19,20,18.7,19.65,35053,0
ADVANIHOTR,20150401,43.1,43.1,43,43,100,0
ADVANTA,20150401,419.9,430.05,418,428,16206,0
AEGISCHEM,20150401,609,668,600,658.4,264828,0
AFL,20150401,65.25,70,65.25,68.65,9507,0
AGARIND,20150401,95,100,87.25,97.45,14387,0
AGCNET,20150401,91.95,93.75,91.4,93,2453,0
AGRITECH,20150401,5.5,6.1,5.5,5.75,540,0
AGRODUTCH,20150401,2.7,2.7,2.6,2.7,451,0
AHLEAST,20150401,196,202.4,185,192.25,357,0
AHLUCONT,20150401,249.5,258.3,246,251.3,44541,0
AHLWEST,20150401,123.9,129.85,123.9,128.35,688,0
AHMEDFORGE,20150401,229.5,237.35,228,231.45,332680,0
AIAENG,20150401,1268,1268,1204.95,1214.1,48950,0
AIL,20150401,735,747.9,725.1,734.8,31780,0
AJANTPHARM,20150401,1235,1252,1207.05,1223.3,126442,0
AJMERA,20150401,118.7,121.9,117.2,118.45,23005,0
AKSHOPTFBR,20150401,14.3,14.8,14.15,14.7,214028,0
AKZOINDIA,20150401,1403.95,1412,1392,1400.7,17115,0
ALBK,20150401,99.1,101.65,99.1,101.4,2129046,0
ALCHEM,20150401,27.9,32.5,27.15,31.6,32338,0
ALEMBICLTD,20150401,34.6,36.7,34.3,36.45,692688,0
ALICON,20150401,280,288,279.05,281.05,5937,0
ALKALI,20150401,31.6,34.2,31.6,33.95,4663,0
ALKYLAMINE,20150401,314,334,313.1,328.8,1515,0
ALLCARGO,20150401,317,323.5,315,319.15,31056,0
ALLSEC,20150401,21.65,22.5,21.6,21.6,435,0
ALMONDZ,20150401,10.6,10.95,10.5,10.75,23600,0
ALOKTEXT,20150401,7.5,8.2,7.4,7.95,8145264,0
ALPA,20150401,11.85,11.85,10.75,11.8,3600,0
ALPHAGEO,20150401,384.3,425.05,383.95,419.75,13308,0
ALPSINDUS,20150401,1.85,1.85,1.85,1.85,1050,0
ALSTOMT&D,20150401,585.85,595,576.65,588.4,49234,0
AMARAJABAT,20150401,836.5,847.75,831,843.9,121150,0
AMBIKCO,20150401,790,809,780.25,802.6,4879,0
AMBUJACEM,20150401,254.95,261.4,253.4,260.25,1346375,0
AMDIND,20150401,20.5,22.75,20.5,22.3,693,0
AMRUTANJAN,20150401,480,527.05,478.35,518.3,216407,0
AMTEKAUTO,20150401,144.5,148.45,144.2,147.45,552874,0
AMTEKINDIA,20150401,55.6,58.3,55.1,57.6,700465,0
AMTL,20150401,13.75,14.45,13.6,14.45,2111,0
ANANTRAJ,20150401,39.9,40.3,39.35,40.05,376564,0
ANDHRABANK,20150401,78.35,80.8,78.2,80.55,993038,0
ANDHRACEMT,20150401,8.85,9.3,8.75,9.1,15848,0
ANDHRSUGAR,20150401,92.05,98.95,91.55,96.15,11551,0
ANGIND,20150401,36.5,36.9,35.6,36.5,34758,0
ANIKINDS,20150401,22.95,24.05,22.95,24.05,1936,0
ANKITMETAL,20150401,2.85,3.25,2.85,3.15,29101,0
ANSALAPI,20150401,23.45,24,23.45,23.8,76723,0
ANSALHSG,20150401,29.9,29.9,28.75,29.65,7748,0
ANTGRAPHIC,20150401,0.1,0.15,0.1,0.15,23500,0
APARINDS,20150401,368.3,375.6,368.3,373.45,2719,0
APCOTEXIND,20150401,505,505,481.1,495.85,3906,0
APLAPOLLO,20150401,411.5,434,411.5,428.65,88113,0
APLLTD,20150401,458.9,464,450,454.7,72075,0
APOLLOHOSP,20150401,1351,1393.85,1351,1390,132827,0
APOLLOTYRE,20150401,169.65,175.9,169,175.2,3515274,0
APOLSINHOT,20150401,195,197,194.3,195.2,71,0
APTECHT,20150401,57.6,61,57,59.7,206475,0
ARCHIDPLY,20150401,32.95,35.8,32.5,35.35,103036,0
ARCHIES,20150401,19.05,19.4,18.8,19.25,46840,0
ARCOTECH,20150401,342.5,350,339.1,345.2,44142,0
ARIES,20150401,106.75,113.9,105,112.7,96825,0
ARIHANT,20150401,43.5,50,43.5,49.3,1647,0
AROGRANITE,20150401,61.5,62,59.55,60.15,2293,0
ARROWTEX,20150401,25.7,27.8,25.1,26.55,17431,0
ARSHIYA,20150401,39.55,41.5,39,40,69880,0
ARSSINFRA,20150401,34.65,36.5,34.6,36.3,71442,0
ARVIND,20150401,260.85,268.2,259,267.2,1169433,0
ARVINDREM,20150401,15.9,17.6,15.5,17.6,5407412,0
ASAHIINDIA,20150401,145,145,141,142.45,16240,0
ASAHISONG,20150401,113,116.7,112.15,115.85,5475,0
ASAL,20150401,45.8,45.8,38,43.95,7429,0
ASHAPURMIN,20150401,74,75.4,74,74.05,36406,0
ASHIANA,20150401,248,259,246.3,249.5,21284,0
ASHIMASYN,20150401,8.4,8.85,8.05,8.25,3253,0
ASHOKA,20150401,175.1,185.4,175.1,183.75,1319134,0
ASHOKLEY,20150401,72.7,74.75,72.7,74.05,17233199,0
ASIANHOTNR,20150401,104.45,107.8,101.1,105.15,780,0
ASIANPAINT,20150401,810,825.9,803.5,821.7,898480,0
ASIANTILES,20150401,116.25,124.4,116.25,123.05,31440,0
ASSAMCO,20150401,4.05,4.3,4.05,4.3,476091,0
ASTEC,20150401,148.5,154.5,146,149.2,322308,0
ASTRAL,20150401,447.3,451.3,435.15,448.6,64889,0
ASTRAMICRO,20150401,146.5,151.9,145.2,150.05,735681,0
ASTRAZEN,20150401,908,940.95,908,920.35,3291,0
ATFL,20150401,635,648,625.2,629.25,6202,0
ATLANTA,20150401,67.2,71,67.2,68.6,238683,0
ATLASCYCLE,20150401,203.9,210.4,203,208.05,25208,0
ATNINTER,20150401,0.2,0.2,0.2,0.2,1704,0
ATUL,20150401,1116,1160,1113,1153.05,32969,0
ATULAUTO,20150401,556.55,576.9,555.9,566.25,59117,0
AURIONPRO,20150401,192.3,224.95,191.8,217.55,115464,0
AUROPHARMA,20150401,1215,1252,1215,1247.4,1140111,0
AUSOMENT,20150401,22.6,22.6,21.7,21.7,2952,0
AUSTRAL,20150401,0.5,0.55,0.5,0.5,50407,0
AUTOAXLES,20150401,834.15,834.15,803,810.2,4054,0
AUTOIND,20150401,60,65,59.15,63.6,212036,0
AUTOLITIND,20150401,36,39,35.2,37.65,14334,0
AVTNPL,20150401,27,28,26.7,27.9,44803,0
AXISBANK,20150401,557.7,572,555.25,569.65,3753262,0
AXISCADES,20150401,335.4,345,331.4,339.65,524538,0
AXISGOLD,20150401,2473.95,2493,2461.1,2483.15,138,0
BAFNAPHARM,20150401,29.95,31.45,29.95,30.95,21136,0
BAGFILMS,20150401,3.05,3.1,2.9,3,31278,0
BAJAJ-AUTO,20150401,2027.05,2035,2002.95,2019.8,208545,0
BAJAJCORP,20150401,459,482,454,466.95,121972,0
BAJAJELEC,20150401,230,234.8,229,232.4,95432,0
BAJAJFINSV,20150401,1412,1447.5,1396,1427.55,44811,0
BAJAJHIND,20150401,14.5,14.8,14.2,14.6,671746,0
BAJAJHLDNG,20150401,1302.3,1329.85,1285.05,1299.9,24626,0
BAJFINANCE,20150401,4158,4158,4062.2,4140.05,12923,0
BALAJITELE,20150401,65.75,67.9,65.3,67.5,47063,0
BALAMINES,20150401,81.5,83.5,81.5,83.45,6674,0
BALKRISIND,20150401,649,661,640,655,16919,0
BALLARPUR,20150401,13.75,13.95,13.5,13.9,271962,0
BALMLAWRIE,20150401,568.05,580.9,562.2,576.75,17423,0
BALPHARMA,20150401,68.9,74.2,67.1,68.85,84178,0
BALRAMCHIN,20150401,50.95,50.95,49.3,50,84400,0
BANARBEADS,20150401,33,39.5,33,39.25,1077,0
BANARISUG,20150401,834.7,855,820,849.85,618,0
BANCOINDIA,20150401,105,107.5,103.25,106.8,11765,0
BANG,20150401,6.2,6.35,6.1,6.35,9639,0
BANKBARODA,20150401,162.75,170.4,162.05,168.9,2949846,0
BANKBEES,20150401,1813.45,1863,1807,1859.78,19071,0
BANKINDIA,20150401,194.6,209.8,194.05,205.75,3396490,0
BANSWRAS,20150401,65,65,60.1,63.9,6238,0
BARTRONICS,20150401,11.45,11.85,11.35,11.6,109658,0
BASF,20150401,1115,1142,1115,1124.65,14009,0
BASML,20150401,184,192,183.65,191.6,642,0
BATAINDIA,20150401,1095,1104.9,1085,1094.7,137166,0
BAYERCROP,20150401,3333,3408.3,3286.05,3304.55,8839,0
BBL,20150401,627.95,641.4,622.2,629.8,5261,0
BBTC,20150401,441,458,431.3,449.15,141334,0
BEDMUTHA,20150401,16.85,18,16.25,17.95,16412,0
BEL,20150401,3355,3595,3350,3494.2,582755,0
BEML,20150401,1100,1163.8,1086,1139.2,631231,0
BEPL,20150401,22.1,22.45,21.15,22.3,5459,0
BERGEPAINT,20150401,209.3,216.9,208.35,215.15,675963,0
BFINVEST,20150401,168.8,176.8,159.5,172.7,113352,0
BFUTILITIE,20150401,707.4,741,702.05,736.05,1048274,0
BGLOBAL,20150401,2.9,3.05,2.9,3.05,16500,0
BGRENERGY,20150401,117.35,124,117.35,122.3,207979,0
BHAGYNAGAR,20150401,17.9,17.9,16.95,17.5,1136,0
BHARATFORG,20150401,1265.05,1333.1,1265.05,1322.6,704419,0
BHARATGEAR,20150401,73.5,77.7,72.7,75.9,13730,0
BHARATRAS,20150401,810,840,800,821.4,981,0
BHARTIARTL,20150401,393.3,404.85,393.05,402.3,5494883,0
BHEL,20150401,235.8,236,229.6,230.7,3346075,0
BHUSANSTL,20150401,65.15,67.9,63.65,64,1108540,0
BIL,20150401,401.3,422,401.3,419.35,2335,0
BILENERGY,20150401,0.8,0.95,0.8,0.95,8520,0
BINANIIND,20150401,90.55,93.95,90.2,93.3,27564,0
BINDALAGRO,20150401,23.4,23.4,22.25,22.8,111558,0
BIOCON,20150401,472.5,478.85,462.7,466.05,1942983,0
BIRLACORPN,20150401,415,420,402.8,414.7,11345,0
BIRLACOT,20150401,0.05,0.1,0.05,0.1,439292,0
BIRLAERIC,20150401,52.3,54.45,52.15,53.7,9454,0
BIRLAMONEY,20150401,24.35,28.85,23.9,28.65,78710,0
BLBLIMITED,20150401,3.7,3.7,3.65,3.65,550,0
BLISSGVS,20150401,128,132.55,124.3,126.15,261958,0
BLKASHYAP,20150401,13.7,15.15,13.7,14.15,118455,0
BLUEDART,20150401,7297.35,7315,7200,7285.55,2036,0
BLUESTARCO,20150401,308.75,315,302,311.35,19046,0
BLUESTINFO,20150401,199,199.9,196.05,199.45,1268,0
BODALCHEM,20150401,34.5,34.8,33.05,34.65,65623,0
BOMDYEING,20150401,64,66.3,63.7,65.95,1168851,0
BOSCHLTD,20150401,25488,25708,25201,25570.7,16121,0
BPCL,20150401,810.95,818,796.5,804.2,1065969,0
BPL,20150401,30.55,32.5,30.55,31.75,116804,0
BRFL,20150401,146,147.9,142.45,144.3,7257,0
BRIGADE,20150401,143.8,145.15,140.25,144.05,36484,0
BRITANNIA,20150401,2155.5,2215.3,2141.35,2177.55,245908,0
BROADCAST,20150401,3.35,3.5,3.3,3.3,4298,0
BROOKS,20150401,38.4,39.5,38.4,39.3,19724,0
BSELINFRA,20150401,1.9,2.15,1.85,2.05,97575,0
BSL,20150401,29.55,31.9,27.75,31,9708,0
BSLGOLDETF,20150401,2535,2535,2501.5,2501.5,122,0
BSLIMITED,20150401,27.5,27.5,25.45,27.15,728818,0
BURNPUR,20150401,9.85,9.85,9.1,9.15,144864,0
BUTTERFLY,20150401,190.95,194,186.1,192.35,25447,0
BVCL,20150401,17.25,17.7,16.5,17.7,9993,0
CADILAHC,20150401,1755,1796.8,1737.05,1790.15,302149,0
CAIRN,20150401,213.85,215.6,211.5,213.35,841463,0
CAMLINFINE,20150401,89.5,91.4,87.5,91.1,32027,0
CANBK,20150401,366.5,383.8,365.15,381,1512605,0
CANDC,20150401,20.6,24.6,20.6,23.25,9100,0
CANFINHOME,20150401,611.1,649.95,611.1,644.7,72233,0
CANTABIL,20150401,47.6,50.5,47.6,50.25,5474,0
CAPF,20150401,398.85,427,398,421.75,224074,0
CAPLIPOINT,20150401,1020,1127.8,1020,1122.65,108731,0
CARBORUNIV,20150401,191.05,197,188.35,190,42681,0
CAREERP,20150401,151.9,156.6,149,153.25,26075,0
CARERATING,20150401,1487,1632.75,1464,1579.2,65340,0
CASTROLIND,20150401,476,476.25,465.1,467.3,185850,0
CCCL,20150401,4.2,4.7,4.2,4.65,47963,0
CCHHL,20150401,10.8,11,10.4,10.8,69325,0
CCL,20150401,178.35,185.9,176,184.3,244917,0
CEATLTD,20150401,805.25,830.8,785.75,826.7,501415,0
CEBBCO,20150401,18.3,20.25,18.1,19.85,40541,0
CELEBRITY,20150401,11.5,12.5,11.5,12.1,5169,0
CELESTIAL,20150401,59.9,61.8,59.5,60.05,128386,0
CENTENKA,20150401,152,159.9,148.2,157.1,16739,0
CENTEXT,20150401,1.5,1.5,1.2,1.25,19308,0
CENTRALBK,20150401,106,107.2,104.3,106.3,992782,0
CENTUM,20150401,756.85,805,756.8,801.9,26848,0
CENTURYPLY,20150401,234,245,234,243.45,367540,0
CENTURYTEX,20150401,633.6,682.4,631,675.35,3619413,0
CERA,20150401,2524.75,2524.75,2470,2495.3,6053,0
CEREBRAINT,20150401,15.6,16.2,14.65,14.8,348478,0
CESC,20150401,604.95,613.4,595.4,609.75,294334,0
CGCL,20150401,173,173,173,173,9,0
CHAMBLFERT,20150401,70.2,73.4,70.2,72.65,2475030,0
CHEMFALKAL,20150401,72.8,77,72,76.3,1334,0
CHENNPETRO,20150401,69,70.35,68.3,68.95,160576,0
CHESLINTEX,20150401,10.1,10.1,8.75,9.4,1668,0
CHOLAFIN,20150401,599.85,604,582.15,598.2,23125,0
CHROMATIC,20150401,3.4,4.05,3,3.3,63493,0
CIGNITITEC,20150401,433,444.95,432,440,32923,0
CIMMCO,20150401,92,94.05,91,94.05,19931,0
CINELINE,20150401,14.5,14.95,14.5,14.9,4654,0
CINEVISTA,20150401,3.3,3.3,3.3,3.3,10,0
CIPLA,20150401,714,716.5,703.85,709.6,1693796,0
CLASSIC,20150401,1.5,1.55,1.45,1.45,7770,0
CLNINDIA,20150401,824.7,837.9,819,828.8,6754,0
CLUTCHAUTO,20150401,13.75,13.75,13.6,13.6,1414,0
CMAHENDRA,20150401,9.35,9.5,8.9,9.15,1005172,0
CMC,20150401,1925.85,1925.85,1891,1907.25,153068,0
CNOVAPETRO,20150401,20,22.75,17.1,22.75,1656,0
COALINDIA,20150401,362.9,364.25,358,363,1428949,0
COLPAL,20150401,2003.4,2009.9,1990.05,2002.5,92909,0
COMPUSOFT,20150401,9.4,10.05,9,9.7,15083,0
CONCOR,20150401,1582.35,1627.3,1561,1582.85,182280,0
CONSOFINVT,20150401,36.55,40,36.5,40,439,0
CORDSCABLE,20150401,25.55,28,24.1,25.8,15651,0
COREEDUTEC,20150401,8,8.85,7.6,8.4,890455,0
COROMANDEL,20150401,268.5,271.35,266.15,268.35,42173,0
CORPBANK,20150401,52.5,55,52.05,54.1,1141752,0
COSMOFILMS,20150401,76.9,80,76.2,79.25,21020,0
COUNCODOS,20150401,1.2,1.2,1.2,1.2,2850,0
COX&KINGS,20150401,323,324.85,316.5,317.8,76998,0
CPSEETF,20150401,24.2,24.37,24.08,24.34,180315,0
CREATIVEYE,20150401,3.4,3.6,2.8,3.45,8545,0
CRISIL,20150401,2049,2052.45,2000,2030.7,3928,0
CROMPGREAV,20150401,164.85,167.4,163.2,166.1,2739478,0
CTE,20150401,18.55,18.55,16.85,17.05,8260,0
CUB,20150401,97.35,98.75,96.4,98.3,182702,0
CUMMINSIND,20150401,879,900.95,874.75,889.9,358652,0
CURATECH,20150401,10.8,11,9.75,10,755,0
CYBERTECH,20150401,28.5,33.45,28.1,33.4,103549,0
CYIENT,20150401,509.9,515,495.1,514.1,30415,0
DAAWAT,20150401,105,112.25,99.5,108.4,26689,0
DABUR,20150401,266.5,268.5,264.65,266.55,642177,0
DALMIABHA,20150401,428.15,439.9,422.5,432.65,9751,0
DALMIASUG,20150401,17.5,17.5,16.45,17.15,12660,0
DATAMATICS,20150401,66.5,75,66,72.15,119054,0
DBCORP,20150401,378,378,362.6,369.45,8799,0
DBREALTY,20150401,67,67.15,65.8,66.3,212297,0
DBSTOCKBRO,20150401,47.6,47.65,47.45,47.55,24170,0
DCBBANK,20150401,110.95,114.95,110.15,114.45,935858,0
DCM,20150401,84.5,88.75,84.1,87,34747,0
DCMSHRIRAM,20150401,107.95,114.3,107.95,112.8,29474,0
DCW,20150401,16.75,17.2,16.65,17.15,270502,0
DECCANCE,20150401,310.05,323.9,310.05,321.55,446,0
DECOLIGHT,20150401,1.45,1.45,1.4,1.4,1100,0
DEEPAKFERT,20150401,140,144,138.25,139.95,162156,0
DEEPAKNTR,20150401,68,70.65,66.4,69.95,8349,0
DEEPIND,20150401,46.6,54.4,46.3,51.9,52130,0
DELTACORP,20150401,79.95,82.75,79.75,82.35,889247,0
DELTAMAGNT,20150401,36.6,37.45,36.6,37.45,60,0
DEN,20150401,121.45,127,121.2,122.4,59512,0
DENABANK,20150401,50.8,51.5,50.1,51.35,376680,0
DENORA,20150401,136.7,136.7,131.05,133.6,743,0
DHAMPURSUG,20150401,36.8,36.95,34.85,36.35,38083,0
DHANBANK,20150401,30.8,32.1,30.5,31.75,195779,0
DHANUKA,20150401,690,690,652,660.15,24958,0
DHARSUGAR,20150401,14.15,14.7,13.8,14.45,1748,0
DHFL,20150401,468.9,474.9,461.6,467.85,448551,0
DHUNINV,20150401,97.15,103,94.5,99.85,15275,0
DIAPOWER,20150401,44.9,45.95,43.3,45.55,126085,0
DICIND,20150401,343,347,341,341.95,7745,0
DIGJAM,20150401,8,8.15,7.75,8.05,96467,0
DISHMAN,20150401,168,172.65,164.7,171.8,778414,0
DISHTV,20150401,82.2,84.85,81.35,84.15,5845850,0
DIVISLAB,20150401,1770.1,1809,1770.1,1802.35,68003,0
DLF,20150401,157,160.9,156.2,159.7,3098216,0
DLINKINDIA,20150401,165.05,168,162.2,164.75,22444,0
DOLPHINOFF,20150401,120.8,134.4,119.5,130.2,190716,0
DONEAR,20150401,15,15.95,14.5,15.35,679,0
DPL,20150401,46.6,49,44,45.45,25444,0
DPSCLTD,20150401,17.15,17.15,16.55,16.85,916,0
DQE,20150401,24.3,24.8,22.75,23.1,57807,0
DRDATSONS,20150401,5.8,6.1,5.7,6,2191357,0
DREDGECORP,20150401,374.9,403,372.65,393.4,106853,0
DRREDDY,20150401,3541,3566.8,3501.7,3533.65,282785,0
DSKULKARNI,20150401,77.6,77.6,74,77.1,3012,0
DSSL,20150401,9.5,9.5,9.5,9.5,50,0
DTIL,20150401,206.95,231.75,205.95,219.05,1437,0
DUNCANSLTD,20150401,15.55,16.3,15.3,15.85,740,0
DWARKESH,20150401,21,21,19.85,20.7,9410,0
DYNAMATECH,20150401,3868,4233,3857.1,3920.55,59412,0
DYNATECH,20150401,2.85,3,2.85,3,3002,0
EASTSILK,20150401,1.55,1.85,1.55,1.75,9437,0
EASUNREYRL,20150401,40.05,43,40.05,42.55,21925,0
ECEIND,20150401,136,148,127,133.85,43034,0
ECLERX,20150401,1603.8,1697,1595,1600.65,123468,0
EDELWEISS,20150401,63.65,67.5,63,66.6,451255,0
EDL,20150401,23.9,25,23.9,24.4,7799,0
EDUCOMP,20150401,12.45,13.55,12.35,13.55,499009,0
EICHERMOT,20150401,15929,16196.95,15830.05,16019.5,45879,0
EIDPARRY,20150401,174.05,175.8,168.65,171.2,56813,0
EIHAHOTELS,20150401,228,232.8,225,228,85,0
EIHOTEL,20150401,107.25,110,107.25,109.5,57306,0
EIMCOELECO,20150401,399,409.5,399,409.5,184,0
EKC,20150401,9.35,11.15,9.35,11.05,350782,0
ELAND,20150401,14.3,16.45,14.3,16.25,191406,0
ELDERPHARM,20150401,90.5,91.5,89.45,91.5,23450,0
ELECON,20150401,66.5,76.2,66.25,74.45,6045416,0
ELECTCAST,20150401,19.8,20.55,18.9,19.4,1956889,0
ELECTHERM,20150401,25.9,25.9,22.2,24,14611,0
ELGIEQUIP,20150401,147.5,150.4,146.4,150,9475,0
....
ZENITH, 20150401,...
I use EdChum code from his comment and add some clarification. I think the main problem is d is output dataframe d cannot be looped in cycle for, if you need one output from all *.txt files.
import pandas as pd
import glob
def get_historical_closes(ticker, start_date, end_date):
dfs = []
#create empty df for output
d = pd.DataFrame()
#glob can use path with *.txt - see http://stackoverflow.com/a/3215392/2901002
for files in glob.glob('/home/manish/Desktop/Equity/subset/*.txt'):
#added index_col for multiindex df
dfs.append(pd.read_csv(files, index_col=['Date', 'Ticker', 'Close'], names = ['Ticker', 'Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Null'], parse_dates = [1]))
p = pd.concat(dfs)
#d is output from all .txt files, so cannot be looped in cycle for
d = p.reset_index(['Date', 'Ticker', 'Close'])
d = d[(d['Ticker'].isin(ticker)) & (d['Date'] > start_date) & (d['Date'] < end_date)]
pivoted = d.pivot_table(index = ['Date'], columns =['Ticker'])
pivoted.columns = pivoted.columns.droplevel(0)
return pivoted
#function isin need list of columns, so 'or' can be replaced by ','
#arguments are changed for testing: 'HDFC' to 'AGCNET' and end_date '2014-12-31' to '2015-12-31'
closes = get_historical_closes(['LT','AGCNET','ACC'], '1999-01-01', '2015-12-31')
print closes

Categories