Tensorflow dnnregressor keeps giving me the same predicted values - python
I am using Tensorflows dnnregressor and when I re-load the dataset to get predicted values for the neural network I trained after a certain number of rows the predicted values are all the same. I have tried changing the learning rate and the number of hidden layers and neurons but nothing seems to really work.
Here is my code:
import pandas as pd
from IPython.display import display
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
import warnings
with warnings.catch_warnings():
warnings.filterwarnings("ignore",category=FutureWarning)
import tensorflow as tf
import pickle # Used to save the model
import re
import csv
import logging
import os
from sklearn.model_selection import train_test_split
regex = re.compile(r"\[|\]|<", re.IGNORECASE)
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from mlxtend.plotting import plot_confusion_matrix
# Removes annoying warning messages in tensorflow and python
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='error', category=FutureWarning)
import sys
if not sys.warnoptions:
warnings.simplefilter("ignore")
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
tf.logging.set_verbosity(tf.logging.ERROR)
tf.get_logger().setLevel(3)
tf.get_logger().setLevel('INFO')
tf.get_logger().setLevel(logging.ERROR)
logging.getLogger('tensorflow').disabled = True
all_data = pd.read_csv('ML_DATASET.csv')
all_data = all_data.fillna(0)
# Create training and test set
all_data4 = all_data.iloc[:,0:]
all_data4.columns = all_data4.columns.str.replace('+', 'plus')
all_data4.columns = all_data4.columns.str.replace(')', ' ')
all_data4.columns = all_data4.columns.str.replace('!', ' ')
all_data4.columns = all_data4.columns.str.replace('(', ' ')
all_data4.columns = all_data4.columns.str.replace(',', ' ')
all_data4.columns = all_data4.columns.str.replace(' ', '_')
all_data4.columns = all_data4.columns.str.replace('__', '_')
all_data4.columns = all_data4.columns.str.replace('%', 'percentage')
all_data4.columns = all_data4.columns.str.replace('$', '')
all_data4.columns = all_data4.columns.str.replace('<', 'lessthan')
all_data4 = all_data4.dropna(subset = ['3_year_appreciation'])
train_dataset = all_data4.sample(frac=0.8,random_state=42)
test_dataset = all_data4.drop(train_dataset.index)
train_stats = train_dataset.describe()
train_stats.pop('3_year_appreciation')
train_stats = train_stats.transpose()
train_labels = train_dataset.pop('3_year_appreciation')
test_labels = test_dataset.pop('3_year_appreciation')
# Need to change feature columns to be of numeric type
feature_columns = ['Unweighted_Sample_Count_of_the_population', 'Avg_household_size_of_occupied_housing_units', 'Total_population_in_occupied_housing_units', 'Median_Estimated_Home_Value_owner_occupied_units_', 'Total_Population', 'Median_Gross_rent_as_percentage_of_household_inc', 'White_Population', 'Black/African_American_Population', 'Native_American_Population', 'Asian_Population', 'Pacific_Islander_Population', 'Some_other_race_Population', 'Mixed_Race_Population', 'Median_Age', 'Median_Household_Income', 'Total_Population_over_25', 'B15003_022E', 'B15003_023E', 'B15003_024E', 'B15003_025E', 'Median_Gross_Rent', 'Homeowner_households', 'Renter_households', 'Housing_units_with_mortgage', 'B19001_002E', 'B19001_003E', 'B19001_004E', 'B19001_005E', 'B19001_006E', 'B19001_007E', 'B19001_013E', 'B19001_014E', 'B19001_015E', 'B19001_016E', 'B19001_017E', 'Total_housing_Units', 'B25024_006E', 'B25024_007E', 'B25024_008E', 'B25024_009E', 'Total_Units', 'Units_with_9plus_Rooms', 'Families_making_more_than_5x_poverty_level_income', 'People_who_moved_in_the_past_year_within_same_county', 'Moved_within_same_state_but_not_same_county', 'Moved_from_different_state_same_country', 'Moved_from_different_country', 'Median_age_of_people_who_moved_from_different_state', 'Moved_within_same_county_bachelors_degree', 'Moved_from_different_state_At_or_above_150_percent_of_the_poverty_level', 'Number_of_people_who_work_at_home', 'Number_of_people_who_walk_to_work', 'White_women_25-29', 'Born_in_germany_population', 'Number_of_people_who_take_Non_taxi_public_transport_to_work', 'Number_of_people_who_work_in_county_government', 'Number_of_people_whose_Commuting_time_under_10_mins', 'Number_of_people_whose_commute_is_45-60_mins', 'Number_of_people_whose_commute_is_60-90_mins', 'Number_of_people_whose_commute_is_90plus_mins', 'Number_of_Sales_and_office_workers', 'Number_of_people_in_management_business_science_and_arts', 'Number_of_service_workers', 'Number_of_educational_and_health_service_workers', 'Number_of_arts_entertainment_and_food_service_workers', 'Number_of_finance_and_real_estate_workers', 'Number_of_tech_workers', 'Private_for-profit_wage_and_salary_workers', 'Self-employed_in_own_incorporated_business_workers', 'Local_government_workers', 'Federal_government_workers', 'Self-employed_in_own_not_incorporated_business_workers', 'People_in_households_receiving_SNAP_and_extra_social_security_income', 'Civilians_aged_25-64_with_more_than_a_bachelors_degree', 'Men_over_16_in_Education_legal_community_service_arts_and_media_occupations', 'Men_over_16_in_Food_preparation_and_serving_related_occupations', 'B08006_002E', 'B08006_003E', 'B08006_004E', 'B08006_005E', 'B08006_006E', 'B08006_007E', 'B08006_009E', 'B08006_010E', 'B08006_011E', 'B08006_012E', 'B08006_013E', 'B08006_014E', 'B08006_016E', 'B08006_019E', 'B08006_020E', 'B08006_021E', 'B08006_022E', 'B08006_023E', 'B08006_024E', 'B08006_025E', 'B08006_026E', 'B08006_027E', 'B08006_028E', 'B08006_029E', 'B08006_030E', 'B08006_031E', 'B08006_032E', 'B08006_033E', 'B08006_034E', 'B08006_036E', 'B08006_037E', 'B08006_038E', 'B08006_039E', 'B08006_040E', 'B08006_041E', 'B08006_042E', 'B08006_043E', 'B08006_044E', 'B08006_045E', 'B08006_046E', 'B08006_047E', 'B08006_048E', 'B08006_049E', 'B08006_050E', 'B08006_051E', 'B08007_002E', 'B08007_004E', 'B08007_005E', 'B08007_007E', 'B08007_008E', 'B08007_009E', 'B08007_010E', 'B08007_012E', 'B08007_013E', 'B08007_014E', 'B08007_015E', 'B08008_002E', 'B08008_003E', 'B08008_004E', 'B08008_005E', 'B08008_006E', 'B08008_007E', 'B08008_008E', 'B08008_009E', 'B08008_010E', 'B08008_011E', 'B08008_012E', 'B08008_013E', 'B08008_014E', 'B08013_001E', 'B08013_002E', 'B08013_003E', 'B08014_002E', 'B08014_003E', 'B08014_004E', 'B08014_005E', 'B08014_006E', 'B08014_007E', 'B08014_009E', 'B08014_010E', 'B08014_011E', 'B08014_012E', 'B08014_013E', 'B08014_014E', 'B08014_016E', 'B08014_017E', 'B08014_018E', 'B08014_019E', 'B08014_020E', 'B08014_021E', 'B08015_001E', 'B08015_002E', 'B08015_003E', 'B08105A_004E', 'B08105B_003E', 'B08111_002E', 'B08111_003E', 'B08111_004E', 'B08111_005E', 'B08113_002E', 'B08113_003E', 'B08113_004E', 'B08113_005E', 'B08113_006E', 'B08113_007E', 'B08113_008E', 'B13002_002E', 'B13002_003E', 'B13002_004E', 'B13002_005E', 'B13002_006E', 'B13002_007E', 'B13002_008E', 'B13002_009E', 'B13002_010E', 'B13002_011E', 'B13002_012E', 'B13002_013E', 'B13002_014E', 'B13002_015E', 'B13002_016E', 'B13002_017E', 'B13002_018E', 'B13002_019E', 'B13002A_002E', 'B13002A_003E', 'B13002A_004E', 'B13002A_005E', 'B13002A_006E', 'B13002A_007E', 'B13002B_002E', 'B13002B_003E', 'B13002B_004E', 'B13002B_005E', 'B13002B_006E', 'B13002B_007E', 'B13002C_002E', 'B13002C_003E', 'B13002C_004E', 'B13002C_005E', 'B13002C_006E', 'B13002C_007E', 'B13002D_002E', 'B13002D_003E', 'B13002D_004E', 'B13002D_005E', 'B13002D_006E', 'B13002D_007E', 'B13002E_002E', 'B13002E_003E', 'B13002E_004E', 'B13002E_005E', 'B13002E_006E', 'B13002E_007E', 'B13002F_002E', 'B13002F_003E', 'B13002F_004E', 'B13002F_005E', 'B13002F_006E', 'B13002F_007E', 'B13002G_002E', 'B13002G_003E', 'B13002G_004E', 'B13002G_005E', 'B13002G_006E', 'B13002G_007E', 'B13002H_002E', 'B13002H_003E', 'B13002H_004E', 'B13002H_005E', 'B13002H_006E', 'B13002H_007E', 'B13002I_002E', 'B13002I_003E', 'B13002I_004E', 'B13002I_005E', 'B13002I_006E', 'B13002I_007E', 'B13004_002E', 'B13004_003E', 'B13004_004E', 'B13004_005E', 'B13004_006E', 'B13004_007E', 'B13004_008E', 'B13004_009E', 'B13004_010E', 'B13004_011E', 'B13008_002E', 'B13008_003E', 'B13008_004E', 'B13008_005E', 'B13008_006E', 'B13008_007E', 'B13008_008E', 'B13008_009E', 'B13008_010E', 'B13008_011E', 'B13008_012E', 'B13008_013E', 'B13008_014E', 'B13008_015E', 'B13010_002E', 'B13010_003E', 'B13010_004E', 'B13010_005E', 'B13010_006E', 'B13010_007E', 'B13010_008E', 'B13010_009E', 'B13010_010E', 'B13010_011E', 'B13010_012E', 'B13010_013E', 'B13010_014E', 'B13010_015E', 'B13010_016E', 'B13010_017E', 'B13010_018E', 'B13010_019E', 'B13012_002E', 'B13012_003E', 'B13012_004E', 'B13012_005E', 'B13012_006E', 'B13012_007E', 'B13012_008E', 'B13012_009E', 'B13012_010E', 'B13012_011E', 'B13012_012E', 'B13012_013E', 'B13012_014E', 'B13012_015E', 'B13014_002E', 'B13014_003E', 'B13014_004E', 'B13014_005E', 'B13014_006E', 'B13014_007E', 'B13014_008E', 'B13014_009E', 'B13014_010E', 'B13014_011E', 'B13014_012E', 'B13014_013E', 'B13014_014E', 'B13014_015E', 'B13014_016E', 'B13014_017E', 'B13014_018E', 'B13014_019E', 'B13014_020E', 'B13014_021E', 'B13014_022E', 'B13014_023E', 'B13014_024E', 'B13014_025E', 'B13014_026E', 'B13014_027E', 'B13015_002E', 'B13015_003E', 'B13015_004E', 'B13015_005E', 'B13015_006E', 'B13015_007E', 'B13015_008E', 'B13015_009E', 'B13015_010E', 'B13015_011E', 'B13015_012E', 'B13015_013E', 'B13015_014E', 'B13015_015E', 'B13016_002E', 'B13016_003E', 'B13016_004E', 'B13016_005E', 'B13016_006E', 'B13016_007E', 'B13016_008E', 'B13016_009E', 'B13016_010E', 'B13016_011E', 'B13016_012E', 'B13016_013E', 'B13016_014E', 'B13016_015E', 'B13016_016E', 'B13016_017E', 'B14001_002E', 'B14001_003E', 'B14001_004E', 'B14001_005E', 'B14001_006E', 'B14001_007E', 'B14001_008E', 'B14001_009E', 'B14001_010E', 'B14002_003E', 'B14002_004E', 'B14002_005E', 'B14002_006E', 'B14002_007E', 'B14002_008E', 'B14002_009E', 'B14002_010E', 'B14002_011E', 'B14002_012E', 'B14002_013E', 'B14002_014E', 'B14002_015E', 'B14002_016E', 'B14002_017E', 'B14002_018E', 'B14002_019E', 'B14002_020E', 'B14002_021E', 'B14002_022E', 'B14002_023E', 'B14002_024E', 'B14002_025E', 'B14002_027E', 'B14002_028E', 'B14002_029E', 'B14002_030E', 'B14002_031E', 'B14002_032E', 'B14002_033E', 'B14002_034E', 'B14002_035E', 'B14002_036E', 'B14002_037E', 'B14002_038E', 'B14002_039E', 'B14002_040E', 'B14002_041E', 'B14002_042E', 'B14002_043E', 'B14002_044E', 'B14002_045E', 'B14002_046E', 'B14002_047E', 'B14002_048E', 'B14002_049E', 'B14003_003E', 'B14003_004E', 'B14003_005E', 'B14003_006E', 'B14003_007E', 'B14003_008E', 'B14003_009E', 'B14003_010E', 'B14003_011E', 'B14003_012E', 'B14003_013E', 'B14003_014E', 'B14003_015E', 'B14003_016E', 'B14003_017E', 'B14003_018E', 'B14003_019E', 'B14003_020E', 'B14003_021E', 'B14003_022E', 'B14003_023E', 'B14003_024E', 'B14003_025E', 'B14003_026E', 'B14003_027E', 'B14003_028E', 'B14003_029E', 'B14003_031E', 'B14003_032E', 'B14003_033E', 'B14003_034E', 'B14003_035E', 'B14003_036E', 'B14003_037E', 'B14003_038E', 'B14003_039E', 'B14003_040E', 'B14003_041E', 'B14003_042E', 'B14003_043E', 'B14003_044E', 'B14003_045E', 'B14003_046E', 'B14003_047E', 'B14003_048E', 'B14003_049E', 'B14003_050E', 'B14003_051E', 'B14003_052E', 'B14003_053E', 'B14003_054E', 'B14003_055E', 'B14003_056E', 'B14003_057E', 'B14004_003E', 'B14004_004E', 'B14004_005E', 'B14004_006E', 'B14004_007E', 'B14004_008E', 'B14004_009E', 'B14004_010E', 'B14004_011E', 'B14004_012E', 'B14004_013E', 'B14004_014E', 'B14004_015E', 'B14004_016E', 'B14004_017E', 'B14004_019E', 'B14004_020E', 'B14004_021E', 'B14004_022E', 'B14004_023E', 'B14004_024E', 'B14004_025E', 'B14004_026E', 'B14004_027E', 'B14004_028E', 'B14004_029E', 'B14004_030E', 'B14004_031E', 'B14004_032E', 'B14004_033E', 'B14005_003E', 'B14005_004E', 'B14005_005E', 'B14005_006E', 'B14005_007E', 'B14005_008E', 'B14005_009E', 'B14005_010E', 'B14005_011E', 'B14005_012E', 'B14005_013E', 'B14005_014E', 'B14005_015E', 'B14005_017E', 'B14005_018E', 'B14005_019E', 'B14005_020E', 'B14005_021E', 'B14005_022E', 'B14005_023E', 'B14005_024E', 'B14005_025E', 'B14005_026E', 'B14005_027E', 'B14005_028E', 'B14005_029E', 'B14006_002E', 'B14006_003E', 'B14006_004E', 'B14006_005E', 'B14006_006E', 'B14006_007E', 'B14006_008E', 'B14006_009E', 'B14006_010E', 'B14006_011E', 'B14006_012E', 'B14006_013E', 'B14006_014E', 'B14006_015E', 'B14006_016E', 'B14006_017E', 'B14006_018E', 'B14006_019E', 'B14006_020E', 'B14006_021E', 'B14007_003E', 'B14007_004E', 'B14007_005E', 'B14007_006E', 'B14007_007E', 'B14007_008E', 'B14007_009E', 'B14007_010E', 'B14007_011E', 'B14007_012E', 'B14007_013E', 'B14007_014E', 'B14007_015E', 'B14007_016E', 'B14007_017E', 'B14007_018E', 'B14007A_003E', 'B14007A_004E', 'B14007A_005E', 'B14007A_006E', 'B14007A_007E', 'B14007A_008E', 'B14007A_009E', 'B14007A_010E', 'B14007A_011E', 'B14007A_012E', 'B14007A_013E', 'B14007A_014E', 'B14007A_015E', 'B14007A_016E', 'B14007A_017E', 'B14007A_018E', 'B14007A_019E', 'B14007B_002E', 'B14007B_003E', 'B14007B_004E', 'B14007B_005E', 'B14007B_006E', 'B14007B_007E', 'B14007B_008E', 'B14007B_009E', 'B14007B_010E', 'B14007B_011E', 'B14007B_012E', 'B14007B_013E', 'B14007B_014E', 'B14007B_015E', 'B14007B_016E', 'B14007B_017E', 'B14007B_018E', 'B14007B_019E', 'B14007C_002E', 'B14007C_003E', 'B14007C_004E', 'B14007C_005E', 'B14007C_006E', 'B14007C_007E', 'B14007C_008E', 'B14007C_009E', 'B14007C_010E', 'B14007C_011E', 'B14007C_012E', 'B14007C_013E', 'B14007C_014E', 'B14007C_015E', 'B14007C_016E', 'B14007C_017E', 'B14007C_018E', 'B14007C_019E', 'B14007D_002E', 'B14007D_003E', 'B14007D_004E', 'B14007D_005E', 'B14007D_006E', 'B14007D_007E', 'B14007D_008E', 'B14007D_009E', 'B14007D_010E', 'B14007D_011E', 'B14007D_012E', 'B14007D_013E', 'B14007D_014E', 'B14007D_015E', 'B14007D_016E', 'B14007D_017E', 'B14007D_018E', 'B14007D_019E', 'B19054_002E', 'B19054_003E', 'B19055_002E', 'B19055_003E', 'B19056_002E', 'B19056_003E', 'B19057_002E', 'B19057_003E', 'B19058_002E', 'B19058_003E', 'B19059_002E', 'B19059_003E', 'B19060_002E', 'B19060_003E', 'B08016_002E', 'B08016_003E', 'B08016_004E', 'B08016_005E', 'B08016_006E', 'B08016_007E', 'B08016_008E', 'B08016_009E', 'B08016_010E', 'B08016_011E', 'B08016_012E', 'B08016_013E', 'B08016_014E', 'B08016_015E', 'B08016_016E', 'B08016_017E', 'B08016_018E', 'B08016_019E', 'B08016_020E', 'B08016_021E', 'B08016_022E', 'B08016_023E', 'B08017_002E', 'B08017_003E', 'B08017_004E', 'B08017_005E', 'B08017_006E', 'B08017_007E', 'B08017_008E', 'B08017_009E', 'B08017_010E', 'B08017_011E', 'B08017_012E', 'B08017_013E', 'B08017_015E', 'B08017_016E', 'B08017_017E', 'B08017_018E', 'B08017_019E', 'B08017_020E', 'B08017_021E', 'B08017_022E', 'B08017_023E', 'B08018_002E', 'B08018_003E', 'B08018_004E', 'B08018_005E', 'B08018_006E', 'B08018_007E', 'B08018_008E', 'B08101_049E', 'B08105A_007E', 'B08105B_007E', 'B08105C_007E', 'B08105D_007E', 'B08105E_007E', 'B08105F_007E', 'B08105G_007E', 'B08105H_007E', 'B08105I_007E', 'B08111_031E', 'B08113_049E', 'B08119_055E', 'B08121_007E', 'B08122_025E', 'B08122_026E', 'B08122_027E', 'B08122_028E', 'B24080_003E', 'B24080_004E', 'B24080_005E', 'B24080_006E', 'B24080_007E', 'B24080_008E', 'B24080_009E', 'B24080_010E', 'B24080_011E', 'B24080_012E', 'B24080_013E', 'B24080_014E', 'B24080_015E', 'B24080_016E', 'B24080_017E', 'B24080_018E', 'B24080_019E', 'B24080_020E', 'B24080_021E', 'B24081_001E', 'B24081_002E', 'B24081_003E', 'B24081_004E', 'B24081_005E', 'B24081_006E', 'B24081_007E', 'B24081_008E', 'B24081_009E', 'B24082_001E', 'B24082_002E', 'B24082_003E', 'B24082_004E', 'B24082_005E', 'B24082_006E', 'B24082_007E', 'B24082_008E', 'B24082_009E', 'B24082_010E', 'B24082_011E', 'B24082_012E', 'B24082_013E', 'B24082_014E', 'B24082_015E', 'B24082_016E', 'B24082_017E', 'B24082_018E', 'B24090_001E', 'B24090_002E', 'B24090_003E', 'B24090_004E', 'B24090_005E', 'B24090_006E', 'B24090_007E', 'B24090_008E', 'B24090_009E', 'B24090_010E', 'B24090_011E', 'B24090_012E', 'B24090_013E', 'B24090_014E', 'B24090_015E', 'B24090_016E', 'B24090_017E', 'B24090_018E', 'B24090_019E', 'B24090_020E', 'B24090_021E', 'B24091_001E', 'B24091_002E', 'B24091_003E', 'B24091_004E', 'B24091_005E', 'B24091_006E', 'B24091_007E', 'B24091_008E', 'B24091_009E', 'B24092_001E', 'B24092_002E', 'B24092_003E', 'B24092_004E', 'B24092_005E', 'B24092_006E', 'B24092_007E', 'B24092_008E', 'B24092_009E', 'B24092_010E', 'B24092_011E', 'B24092_012E', 'B24092_013E', 'B24092_014E', 'B24092_015E', 'B24092_016E', 'B24092_017E', 'B24092_018E', 'C24040_001E', 'C24040_002E', 'C24040_003E', 'C24040_004E', 'C24040_005E', 'C24040_006E', 'C24040_007E', 'C24040_008E', 'C24040_009E', 'C24040_010E', 'C24040_011E', 'C24040_012E', 'C24040_013E', 'C24040_014E', 'C24040_015E', 'C24040_016E', 'C24040_017E', 'C24040_018E', 'C24040_019E', 'C24040_020E', 'C24040_021E', 'C24040_022E', 'C24040_023E', 'C24040_024E', 'C24040_025E', 'C24040_026E', 'C24040_027E', 'C24040_028E', 'C24040_029E', 'C24040_030E', 'C24040_031E', 'C24040_032E', 'C24040_033E', 'C24040_034E', 'C24040_035E', 'C24040_036E', 'C24040_037E', 'C24040_038E', 'C24040_039E', 'C24040_040E', 'C24040_041E', 'C24040_042E', 'C24040_043E', 'C24040_044E', 'C24040_045E', 'C24040_046E', 'C24040_047E', 'C24040_048E', 'C24040_049E', 'C24040_050E', 'C24040_051E', 'C24040_052E', 'C24040_053E', 'C24040_054E', 'C24040_055E', 'C24050_001E', 'C24050_002E', 'C24050_003E', 'C24050_004E', 'C24050_005E', 'C24050_006E', 'C24050_007E', 'C24050_008E', 'C24050_009E', 'C24050_010E', 'C24050_011E', 'C24050_012E', 'C24050_013E', 'C24050_014E', 'C24050_015E', 'C24050_016E', 'C24050_017E', 'C24050_018E', 'C24050_019E', 'C24050_020E', 'C24050_021E', 'C24050_022E', 'C24050_023E', 'C24050_024E', 'C24050_025E', 'C24050_026E', 'C24050_027E', 'C24050_028E', 'C24050_029E', 'C24050_030E', 'C24050_031E', 'C24050_032E', 'C24050_033E', 'C24050_034E', 'C24050_035E', 'C24050_036E', 'C24050_037E', 'C24050_038E', 'C24050_039E', 'C24050_040E', 'C24050_041E', 'C24050_042E', 'C24050_043E', 'C24050_044E', 'C24050_045E', 'C24050_046E', 'C24050_047E', 'C24050_048E', 'C24050_049E', 'C24050_050E', 'C24050_051E', 'C24050_052E', 'C24050_053E', 'C24050_054E', 'C24050_055E', 'C24050_056E', 'C24050_057E', 'C24050_058E', 'C24050_059E', 'C24050_060E', 'C24050_061E', 'C24050_062E', 'C24050_063E', 'C24050_064E', 'C24050_065E', 'C24050_066E', 'C24050_067E', 'C24050_068E', 'C24050_069E', 'C24050_070E', 'C24050_071E', 'C24050_072E', 'C24050_073E', 'C24050_074E', 'C24050_075E', 'C24050_076E', 'C24050_077E', 'C24050_078E', 'C24050_079E', 'C24050_080E', 'C24050_081E', 'C24050_082E', 'C24050_083E', 'C24050_084E', 'C24060_001E', 'C24060_002E', 'C24060_003E', 'C24060_004E', 'C24060_005E', 'C24060_006E', 'C24060_007E', 'C24060_008E', 'C24060_009E', 'C24060_010E', 'C24060_011E', 'C24060_012E', 'C24060_013E', 'C24060_014E', 'C24060_015E', 'C24060_016E', 'C24060_017E', 'C24060_018E', 'C24060_019E', 'C24060_020E', 'C24060_021E', 'C24060_022E', 'C24060_023E', 'C24060_024E', 'C24060_025E', 'C24060_026E', 'C24060_027E', 'C24060_028E', 'C24060_029E', 'C24060_030E', 'C24060_031E', 'C24060_032E', 'C24060_033E', 'C24060_034E', 'C24060_035E', 'C24060_036E', 'C24070_001E', 'C24070_002E', 'C24070_003E', 'C24070_004E', 'C24070_005E', 'C24070_006E', 'C24070_007E', 'C24070_008E', 'C24070_009E', 'C24070_010E', 'C24070_011E', 'C24070_012E', 'C24070_013E', 'C24070_014E', 'C24070_015E', 'C24070_016E', 'C24070_017E', 'C24070_018E', 'C24070_019E', 'C24070_020E', 'C24070_021E', 'C24070_022E', 'C24070_023E', 'C24070_024E', 'C24070_025E', 'C24070_026E', 'C24070_027E', 'C24070_029E', 'C24070_030E', 'C24070_031E', 'C24070_032E', 'C24070_033E', 'C24070_034E', 'C24070_035E', 'C24070_036E', 'C24070_037E', 'C24070_038E', 'C24070_039E', 'C24070_040E', 'C24070_041E', 'C24070_043E', 'C24070_044E', 'C24070_045E', 'C24070_046E', 'C24070_047E', 'C24070_048E', 'C24070_049E', 'C24070_050E', 'C24070_051E', 'C24070_052E', 'C24070_053E', 'C24070_054E', 'C24070_055E', 'C24070_057E', 'C24070_058E', 'C24070_059E', 'C24070_060E', 'C24070_061E', 'C24070_062E', 'C24070_063E', 'C24070_064E', 'C24070_065E', 'C24070_066E', 'C24070_067E', 'C24070_068E', 'C24070_069E', 'C24070_070E', 'C24070_071E', 'C24070_072E', 'C24070_073E', 'C24070_074E', 'C24070_075E', 'C24070_076E', 'C24070_077E', 'C24070_078E', 'C24070_079E', 'C24070_080E', 'C24070_081E', 'C24070_082E', 'C24070_083E', 'C24070_084E', 'B27001_004E', 'B27001_005E', 'B27001_006E', 'B27001_007E', 'B27001_008E', 'B27001_009E', 'B27001_010E', 'B27001_011E', 'B27001_012E', 'B27001_013E', 'B27001_014E', 'B27001_015E', 'B27001_016E', 'B27001_017E', 'B27001_018E', 'B27001_019E', 'B27001_020E', 'B27001_021E', 'B27001_022E', 'B27001_023E', 'B27001_024E', 'B27001_025E', 'B27001_026E', 'B27001_027E', 'B27001_028E', 'B27001_029E', 'B27001_030E', 'B27001_031E', 'B27001_032E', 'B27001_033E', 'B27001_034E', 'B27001_035E', 'B27001_036E', 'B27001_037E', 'B27001_038E', 'B27001_039E', 'B27001_040E', 'B27001_041E', 'B27001_042E', 'B27001_043E', 'B27001_044E', 'B27001_045E', 'B27001_046E', 'B27001_047E', 'B27001_048E', 'B27001_049E', 'B27001_050E', 'B27001_051E', 'B27001_052E', 'B27001_053E', 'B27001_054E', 'B27001_055E', 'B27001_056E', 'B27001_057E', 'Bachelorsplus', 'Households_with_Income_lessthan35k', 'Households_with_Income_100kplus', 'Pct_of_housing_units_in_4plus_unit_buildings']
feat_cols = []
for x in feature_columns:
x.strip()
feat_cols.append(tf.feature_column.numeric_column(x))
# # Normalize data
def norm(x):
return (x - train_stats['mean']) / train_stats['std']
X_train = norm(train_dataset)
y_train = train_labels
X_test = norm(test_dataset)
y_test = test_labels
# Define the input function
BATCH_SIZE = 10
epochs = None
input_func=tf.estimator.inputs.pandas_input_fn(x=X_train,y=y_train,batch_size=BATCH_SIZE,num_epochs=None,shuffle=True)
eval_input_func = tf.estimator.inputs.pandas_input_fn(x=X_test,
y=y_test,
batch_size=10,
num_epochs=1,
shuffle=False)
test_input_func = tf.estimator.inputs.pandas_input_fn(x= X_test,
batch_size=100,
num_epochs=1,
shuffle=False)
dnn_regressor = tf.estimator.DNNRegressor(
feature_columns=feat_cols,
hidden_units=[1024, 512, 256],
optimizer=tf.train.ProximalAdagradOptimizer(
learning_rate=0.01,
l1_regularization_strength=0.01
))
# Train model
dnn_regressor.train(input_fn=input_func,steps=1000)
# Predictions
pred_input_func=tf.estimator.inputs.pandas_input_fn(x=X_test,batch_size=BATCH_SIZE,num_epochs=1,shuffle=False)
predictions=list(dnn_regressor.predict(input_fn=pred_input_func))
# Clear Cache
all_data = pd.DataFrame()
all_data4 = pd.DataFrame()
X_train = pd.DataFrame()
y_train = pd.DataFrame()
X_test = pd.DataFrame()
y_test = pd.DataFrame()
train_dataset = pd.DataFrame()
test_dataset = pd.DataFrame()
train_stats = pd.DataFrame()
train_labels = pd.DataFrame()
test_labels = pd.DataFrame()
#Normalize function
def norm(x,train_stats):
return (x - train_stats['mean']) / train_stats['std']
#Append_machine_learning_outputs
def append_ML_outputs(dataframe, year, dnn_regressor):
dataframe = dataframe[dataframe['Year'].isin([year])]
print(len(dataframe))
cols = dataframe.columns.tolist()
cols = cols[-2:] + cols[:-2]
cols.insert(0, cols.pop(cols.index('LocationplusType')))
dataframe=dataframe[cols]
dataframe = dataframe.replace([np.inf, -np.inf], np.nan)
dataframe = dataframe.fillna(0)
print(len(dataframe))
stats = dataframe.describe()
stats = stats.transpose()
dataframe3 = dataframe.drop(['LocationplusType','Tract_number','Year'],axis=1)
print(len(dataframe3))
normed_data = norm(dataframe3,stats)
normed_data = pd.merge(dataframe[['LocationplusType','Tract_number','Year']],normed_data,left_index=True,right_index=True)
dataframe4 = normed_data.drop(['LocationplusType','Year_x','Tract_number_x'],axis=1)
dataframe4 = dataframe4.drop(['3_year_appreciation'],axis=1)
print(len(dataframe4))
pred_input_func=tf.estimator.inputs.pandas_input_fn(x=dataframe4,batch_size=BATCH_SIZE,num_epochs=1,shuffle=False)
example_result = pd.DataFrame(dnn_regressor.predict(input_fn=pred_input_func))
orig_data = dataframe.reset_index(drop=True)
df_test = pd.merge(orig_data[['LocationplusType','Year']],example_result,left_index=True,right_index=True)
df_test.rename(columns={0:'Predicted Growth Rank'}, inplace=True)
return df_test
all_data.columns = all_data.columns.str.replace('+', 'plus')
all_data.columns = all_data.columns.str.replace(')', ' ')
all_data.columns = all_data.columns.str.replace('!', ' ')
all_data.columns = all_data.columns.str.replace('(', ' ')
all_data.columns = all_data.columns.str.replace(',', ' ')
all_data.columns = all_data.columns.str.replace(' ', '_')
all_data.columns = all_data.columns.str.replace('__', '_')
all_data.columns = all_data.columns.str.replace('%', 'percentage')
all_data.columns = all_data.columns.str.replace('$', '')
all_data.columns = all_data.columns.str.replace('<', 'lessthan')
# len(df)
df_list=[]
for year in all_data['Year'].unique():
df_list.append(append_ML_outputs(all_data, year, dnn_regressor))
df_final = pd.concat(df_list)
# Uncomment line below to write a new file
df_final.to_csv('predicted_values.csv',index=False)
EDIT:
I am now including a GitHub repo which you can find here. This will include the data and the source code. To clarify the issue is in regards to the last block of code in the notebook in which I after a certain number of predicted values I get the same predicted values.
UPDATED EDIT:
I realized the large ML_DATA.csv file was not in the github link I provided. The file is 3.6 GB so I had to zip it into a file and then push it. All the data should be there now.
Related
How to process dataframe from a list of csv with pandas
I am writing a program to process a set of answers from a csv. The csv is constructed like so: I have written a program that loads the data: positive results,resources,priorities,team focus,benefits,help,action steps,today,tomorrow,yesterday [studied],[schaums outlines], [Continue working on proof of concept for commitment process],[Continue working on proof of concept for commitment process],[By completing the commitment process demonstration I will have something to show stakeholders and I will feel good],[I need resources advice support and financial assistance so that I can continue to develop my project.],[Schedule time to study],[11:00AM review the Art of Discipline 12:00pm lunch - contact BOA 1:00pm (break and catch up) 1:15pm continue developing the commitment process. - follow up with XXX 3:00pm break (snack) 3:15pm prepare for meeting with Dr. XXX 3:30pm meeting with Dr. XXX 4:00pm PDMP 5:00pm Run 6:00pm Dinner 6:30pm work on communication skills 7:00pm reading 8:00pm Journaling 8:30pm reading],[6:00-7:30am study 7:30am PDMP 8:00am review email 9:00am review priorities 9:30am meeting with XXXX 12:00pm lunch 5:00pm run 6:00pm dinner 6:30pm PDMP 8:00pm journaling 8:30pm reading] ,[ 5:00AM woke up 6:00am reading/ developing communication skills 7:30-12:00pm 1:00pm lunch, dropped off suit to get fitted 2:00pm weekly planning 3:00pm leaving for XXXX 4:00pm dinner 4:30pm budgeting review 5:00pm drove home 5:30-6:30pm planning 7:30pm pack up belongings 8:00pm journaling 8:30pm read] import spacy nlp = spacy.load('en_core_web_sm') punctuations = string.punctuation def cleanup_text(docs, logging=False): texts = [] counter = 1 for doc in docs: if counter % 1000 == 0 and logging: print("Processed %d out of %d documents." % (counter, len(docs))) counter += 1 doc = nlp(doc, disable=['parser', 'ner']) tokens = [tok.lemma_.lower().strip() for tok in doc if tok.lemma_ != '-PRON-'] tokens = [tok for tok in tokens if tok not in stopwords and tok not in punctuations] tokens = ' '.join(tokens) texts.append(tokens) return pd.Series(texts) positive_text = [text for text in train[train['benefits'] == 'good']['action steps']] negative_text = [text for text in train[train['benefits'] == 'bad']['action steps']] positive_clean = cleanup_text(positive_text) positive_clean = ' '.join(positive_text).split() negative_clean = cleanup_text(negative_text) negative_clean = ' '.join(negative_clean).split() # 3. Calculate total positive words and negative words positive_counts = Counter(positive_clean) negative_counts = Counter(negative_clean) positive_common_words = [word[0] for word in positive_counts.most_common(20)] negative_common_counts = [word[1] for word in negative_counts.most_common(20)] from sklearn.feature_extraction.text import CountVectorizer from sklearn.base import TransformerMixin from sklearn.pipeline import Pipeline from sklearn.svm import LinearSVC from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS as sklearn_stop_words from sklearn.metrics import accuracy_score from nltk.corpus import stopwords import string import re import spacy spacy.load('en_core_web_sm') from spacy.lang.en import English parser = English() STOPLIST = set(stopwords.words('english') + list(sklearn_stop_words)) SYMBOLS = " ".join(string.punctuation).split(" ") + ["-", "...", "”", "”"] class CleanTextTransformer(TransformerMixin): def transform(self, X, **transform_params): return [cleanText(text) for text in X] def fit(self, X, y=None, **fit_params): return self def get_params(self, deep=True): return {} def cleanText(text): text = text.strip().replace("\n", " ").replace("\r", " ") text = text.lower() return text def tokenizeText(sample): tokens = parser(sample) lemmas = [] for tok in tokens: lemmas.append(tok.lemma_.lower().strip() if tok.lemma_ != "-PRON-" else tok.lower_) tokens = lemmas tokens = [tok for tok in tokens if tok not in STOPLIST] tokens = [tok for tok in tokens if tok not in SYMBOLS] return tokens def printNMostInformative(vectorizer, clf, N): feature_names = vectorizer.get_feature_names_out() coefs_with_fns = sorted(zip(clf.coef_[0], feature_names)) topClass1 = coefs_with_fns[:N] topClass2 = coefs_with_fns[:-(N + 1):-1] print("Class 1 best: ") for feat in topClass1: print(feat) print("Class 2 best: ") for feat in topClass2: print(feat) vectorizer = CountVectorizer(tokenizer=tokenizeText, ngram_range=(1,1)) clf = LinearSVC() pipe = Pipeline([('cleanText', CleanTextTransformer()), ('vectorizer', vectorizer), ('clf', clf)]) # data train1 = train['benefits'].tolist() labelsTrain1 = train['action steps'].tolist() test1 = test['benefits'].tolist() labelsTest1 = test['action steps'].tolist() # train pipe.fit(train1, labelsTrain1) # test preds = pipe.predict(test1) print("accuracy:", accuracy_score(labelsTest1, preds)) print("Top 10 features used to predict: ") printNMostInformative(vectorizer, clf, 10) pipe = Pipeline([('cleanText', CleanTextTransformer()), ('vectorizer', vectorizer)]) transform = pipe.fit_transform(train1, labelsTrain1) vocab = vectorizer.get_feature_names_out() for i in range(len(train1)): s = "" indexIntoVocab = transform.indices[transform.indptr[i]:transform.indptr[i+1]] numOccurences = transform.data[transform.indptr[i]:transform.indptr[i+1]] for idx, num in zip(indexIntoVocab, numOccurences): s += str((vocab[idx], num)) from sklearn import metrics print(metrics.classification_report(labelsTest1, preds, target_names=df['benefits'].unique())) I would like to use spacy after loading the data to process the positive and negative sentiment from the text content. Expected: Data showing the text from the benefits and action steps columns. Actual: File "/Users/evangertis/development/PythonAutomation/IGTS/TwilioMessaging/accountability.py", line 214, in <module> pipe.fit(train1, labelsTrain1) File "/usr/local/lib/python3.9/site-packages/sklearn/pipeline.py", line 390, in fit Xt = self._fit(X, y, **fit_params_steps) File "/usr/local/lib/python3.9/site-packages/sklearn/pipeline.py", line 348, in _fit X, fitted_transformer = fit_transform_one_cached( File "/usr/local/lib/python3.9/site-packages/joblib/memory.py", line 352, in __call__ return self.func(*args, **kwargs) File "/usr/local/lib/python3.9/site-packages/sklearn/pipeline.py", line 891, in _fit_transform_one res = transformer.fit_transform(X, y, **fit_params) File "/usr/local/lib/python3.9/site-packages/sklearn/base.py", line 847, in fit_transform return self.fit(X, y, **fit_params).transform(X) File "/Users/evangertis/development/PythonAutomation/IGTS/TwilioMessaging/accountability.py", line 170, in transform return [cleanText(text) for text in X] File "/Users/evangertis/development/PythonAutomation/IGTS/TwilioMessaging/accountability.py", line 170, in <listcomp> return [cleanText(text) for text in X] File "/Users/evangertis/development/PythonAutomation/IGTS/TwilioMessaging/accountability.py", line 177, in cleanText text = text.strip().replace("\n", " ").replace("\r", " ") AttributeError: 'float' object has no attribute 'strip'
Scikit Learn fit(): Setting an array element with a sequence fit
I am trying to call scikit learn fit functions on dataframes where the elements of each column are numpy arrays. However, I get the error "setting an array element with a sequence," presumably because I am trying to call fit on a dataframe of arrays rather than scalar values. How do I work around this? I'd really appreciate some help. Here is my code. You can find the data I'm using here: https://competitions.codalab.org/competitions/21163 training_data = pd.read_csv('/train.tsv', sep='\t') testing_data = pd.read_csv('/dev.tsv', sep='\t') tokenizer = BertTokenizer.from_pretrained('bert-base-uncased',do_lower_case=True,max_length=1024) model = BertModel.from_pretrained('bert-base-uncased') model = model.to(device) # These are used to map the data to their appropriate column on each pass pomt_train_x = pd.DataFrame(columns=["claim", "reason", "category", "speaker", "checker", "tags", "claim entities", "article title"]) feature_dict = {1: "claim", 4: "reason", 5: "category", 6: "speaker", 7: "checker", 8: "tags", 9: "claim entities", 10: "article title"} # Sort the data appropriately. for i, data in enumerate(training_data[training_data.columns].to_numpy()): if 'pomt' in data[0]: appended_data = {} for j, sentence in enumerate(data): if j in feature_dict: inputs = tokenizer(str(sentence), return_tensors="pt", max_length=512, pad_to_max_length=True).to(device) outputs = model(**inputs) appended_data[feature_dict[j]] = outputs.last_hidden_state[:,0][0].cpu().detach().numpy() pomt_train_x = pomt_train_x.append(appended_data, ignore_index=True) print(f"{i + 1} out of {training_data.index.stop} from training") count = 0 # append testing data to training data for i, data in enumerate(testing_data[testing_data.columns].to_numpy()): if 'pomt' in data[0]: appended_data = {} for j, sentence in enumerate(data): if j in feature_dict: inputs = tokenizer(str(sentence), return_tensors="pt", max_length=512, pad_to_max_length=True).to(device) outputs = model(**inputs) appended_data[feature_dict[j]] = outputs.last_hidden_state[:,0][0].cpu().detach().numpy() pomt_train_x = pomt_train_x.append(appended_data, ignore_index=True) print(f"{i + 1} out of {testing_data.index.stop} from testing") count += 1 # Map the possible labels to an emotion positive_set = set(['half-true', 'correct attribution!', 'correct', 'determination: barely true', 'factscan score: true', 'correct attribution', 'mostly true', 'mostly-correct', 'truth!', 'partially true', 'half true', 'mostly truth!', 'determination: true', 'true messages', 'authorship confirmed!', 'verdict: true', 'mostly_true', 'determination: mostly true', 'confirmed authorship!', 'conclusion: accurate', 'accurate', 'true', 'partly true', 'fact', 'full flop', 'in-the-green', 'verified']) negative_set = set({'fake news', 'verdict: false', '3 pinnochios', 'fiction!', 'bogus warning', 'we rate this claim false', 'determination: false', 'disputed!', 'false', 'fiction', 'a lot of baloney', '2 pinnochios', 'some baloney', 'mostly_false', 'cherry picks', 'miscaptioned', 'misleading!', 'misleading recommendations', 'mostly fiction!', 'mostly false', 'a little baloney', 'fiction! & satire!', 'conclusion: false', 'rating: false', 'determination: misleading', 'promise broken', '4 pinnochios', 'misleading', 'promise kept', 'misattributed', 'fake', 'previously truth! now resolved!','incorrect attribution!', 'incorrect', 'spins the facts', 'determination: a stretch', 'factscan score: misleading', 'pants on fire!', 'factscan score: false', 'exaggerates', 'outdated', 'facebook scams', 'unsupported', 'opinion!', 'verdict: unsubstantiated', 'scam', 'virus!', 'no flip', 'scam!', 'unverified', 'distorts the facts', 'outdated!' 'understated', 'no evidence', 'unproven!', 'inaccurate attribution!', 'statirical reports', 'unproven', 'exaggerated', 'determination: huckster propaganda', 'grass roots movement!', 'commentary!', 'in-the-red', 'unsubstantiated messages',}) neutral_set = set({'truth! & fiction!', 'conclusion: unclear', '1', 'unobservable', 'needs context', 'truth! & disputed!', 'half flip', '0', 'in-between', '4', 'None', '2', 'none', 'investigation pending!','not the whole story', '10','in the works', 'truth! & misleading!', '3', 'mixture', 'not yet rated', 'legend', 'stalled', 'truth! & unproven!', 'truth! & outdated!', 'compromise'}) # Read in the labels for the appropriate data pomt_train_y = pd.DataFrame(columns=["label"]) sign_to_append = 0 for i, data in enumerate(training_data[training_data.columns].to_numpy()): if 'pomt' in data[0]: if data[2] in positive_set: sign_to_append = 1 elif data[2] in negative_set: sign_to_append = -1 else: sign_to_append = 0 pomt_train_y = pomt_train_y.append({'label':sign_to_append}, ignore_index=True) print(f"{i + 1} out of {training_data.index.stop} from training") # append testing data to training data for i, data in enumerate(testing_data[testing_data.columns].to_numpy()): if 'pomt' in data[0]: if data[2] in positive_set: sign_to_append = 1 elif data[2] in negative_set: sign_to_append = -1 else: sign_to_append = 0 pomt_train_y = pomt_train_y.append({'label':sign_to_append}, ignore_index=True) print(f"{i + 1} out of {testing_data.index.stop} from testing") pomt_X_train, pomt_X_test, pomt_Y_train, pomt_Y_test = train_test_split(pomt_train_x, pomt_train_y, test_size= (count / pomt_train_x.shape[0]), stratify=pomt_train_y) pomt_Y_train = pomt_Y_train.astype("int") pomt_Y_test = pomt_Y_test.astype("int") # One Vs. One Multiclass Classification clf = OneVsOneClassifier(SVC(C = 1, verbose=True)) # Fit to Training Data clf.fit(pomt_X_train, pomt_Y_train) --------------------------------------------------------------------------- TypeError Traceback (most recent call last) TypeError: only size-1 arrays can be converted to Python scalars The above exception was the direct cause of the following exception: ValueError Traceback (most recent call last) <ipython-input-22-3314e23093e3> in <module>() 1 # Fit to Training Data ----> 2 clf.fit(pomt_X_train.squeeze(), pomt_Y_train) 3 4 # Training data accuracy 5 X_train_prediction = clf.predict(pomt_X_train) 4 frames /usr/local/lib/python3.7/dist-packages/pandas/core/generic.py in __array__(self, dtype) 1991 1992 def __array__(self, dtype: NpDtype | None = None) -> np.ndarray: -> 1993 return np.asarray(self._values, dtype=dtype) 1994 1995 def __array_wrap__( ValueError: setting an array element with a sequence.
I figured out what to do on my own end. I basically just created a column in the dataframe to reflect each element of the list, not each list itself. It's a bit unintuitive but it works.
Split a CSV in three parts and calculating the mean
I have a file containing: Time 60Ni 61Ni 62Ni 63Cu 64Ni 65Cu 66Zn 0. 9.13242244720459 0.406570166349411 1.326429009437561 5.754200458526611 0.4233334958553314 2.68562912940979 4.148788005113602e-002 8.390999794006348 9.187464714050293 0.4089393615722656 1.334462523460388 5.790649890899658 0.425884485244751 2.702604055404663 4.17313240468502e-002 16.78300094604492 9.254316329956055 0.4119723737239838 1.344084143638611 5.832504749298096 0.428943395614624 2.722275018692017 4.203101620078087e-002 25.17399978637695 9.19857120513916 0.4094997346401215 1.336091756820679 5.791898727416992 0.4264563024044037 2.703336715698242 4.185733571648598e-002 33.56499862670898 9.194388389587402 0.4092871248722076 1.335391044616699 5.794968605041504 0.4264419078826904 2.704529047012329 4.192239791154862e-002 41.95600128173828 9.162041664123535 0.4078944325447083 1.330722570419312 5.766440868377686 0.425002932548523 2.691519498825073 4.182799160480499e-002 50.34700012207031 9.190646171569824 0.4091125726699829 1.334963202476502 5.786285877227783 0.426413893699646 2.700882434844971 4.196327552199364e-002 58.73799896240234 9.211565971374512 0.4100649058818817 1.337916374206543 5.8003830909729 0.4273969829082489 2.707314252853394 4.207673668861389e-002 67.12799835205078 9.240947723388672 0.4113766849040985 1.342136979103088 5.822870254516602 0.4287911653518677 2.717630624771118 4.222121462225914e-002 75.51899719238281 9.208130836486816 0.4099342525005341 1.337505698204041 5.802256584167481 0.4273860156536102 2.708084583282471 4.214133694767952e-002 83.91000366210938 9.196262359619141 0.4093911945819855 1.335786700248718 5.799176692962647 0.4268693923950195 2.706451416015625 4.215647280216217e-002 92.30100250244141 9.213265419006348 0.4101545214653015 1.338128447532654 5.807514190673828 0.4277283549308777 2.71068549156189 4.221603646874428e-002 100.6920013427734 9.163029670715332 0.407885879278183 1.330831050872803 5.775251865386963 0.4254410266876221 2.695534229278565 4.204751178622246e-002 109.0839996337891 9.144490242004395 0.4070722758769989 1.328153848648071 5.764679908752441 0.4246650040149689 2.690402746200562 4.198652133345604e-002 117.4749984741211 9.114171028137207 0.4057718515396118 1.32369875907898 5.745044231414795 0.4233448505401611 2.681406497955322 4.190905019640923e-002 125.8659973144531 9.149589538574219 0.407274603843689 1.328810453414917 5.766050815582275 0.4248199760913849 2.691139459609985 4.200970754027367e-002 134.2570037841797 9.168668746948242 0.4081465899944305 1.331702351570129 5.777794361114502 0.4256783723831177 2.696741819381714 4.206346347928047e-002 142.6479949951172 9.11380672454834 0.4057287871837616 1.323864817619324 5.740524291992188 0.4232001006603241 2.67945122718811 4.187140986323357e-002 151.0390014648438 9.100893974304199 0.4051263332366943 1.321851253509522 5.729655265808106 0.4226666390895844 2.674278259277344 4.182597994804382e-002 159.4299926757813 9.072731971740723 0.4039073586463928 1.317763328552246 5.713830471038818 0.4213792979717255 2.666974782943726 4.169051349163055e-002 167.8209991455078 9.186164855957031 0.4089057147502899 1.334116697311401 5.786634922027588 0.4264728426933289 2.700879812240601 4.211126267910004e-002 176.2129974365234 9.13982105255127 0.4068569839000702 1.327479124069214 5.76115083694458 0.4244593381881714 2.688895463943481 4.199059307575226e-002 184.60400390625 9.146007537841797 0.4071221053600311 1.328468441963196 5.762693881988525 0.4247534275054932 2.689634084701538 4.1985172778368e-002 192.9949951171875 9.18150806427002 0.4086942672729492 1.333438873291016 5.785679817199707 0.4262394905090332 2.700178623199463 4.207265004515648e-002 201.3860015869141 9.134004592895508 0.4066038727760315 1.326677560806274 5.753909587860107 0.424109697341919 2.685543775558472 4.191514849662781e-002 209.7769927978516 9.192599296569824 0.4091922044754028 1.335113883018494 5.792657852172852 0.4266164898872376 2.703598737716675 4.208896681666374e-002 218.1679992675781 9.166966438293457 0.4080702364444733 1.331447958946228 5.776984214782715 0.4254603683948517 2.696239709854126 4.19912114739418e-002 226.5590057373047 9.166423797607422 0.4080766439437866 1.331416010856628 5.771696090698242 0.4254250526428223 2.693812847137451 4.191195592284203e-002 234.9510040283203 9.122139930725098 0.4060815274715424 1.325031995773315 5.74381160736084 0.4234589040279388 2.680959224700928 4.174426198005676e-002 243.3419952392578 9.178729057312012 0.4085982143878937 1.333097338676453 5.783432006835938 0.4259471595287323 2.699411153793335 4.196531698107719e-002 251.7330017089844 9.196023941040039 0.4093179702758789 1.335668444633484 5.792133331298828 0.4266210496425629 2.703416347503662 4.196692258119583e-002 260.1239929199219 9.195613861083984 0.4093446731567383 1.33561098575592 5.790852546691895 0.4264806509017944 2.702755451202393 4.19374406337738e-002 268.5150146484375 9.124658584594727 0.4061901867389679 1.325218439102173 5.749895572662354 0.4233379364013672 2.683579206466675 4.166891798377037e-002 276.906005859375 9.071592330932617 0.4038631021976471 1.317633748054504 5.711780071258545 0.4209088683128357 2.666091680526733 4.146279022097588e-002 285.2969970703125 9.090703010559082 0.4047099351882935 1.320350289344788 5.724553108215332 0.4218063056468964 2.671880960464478 4.148663952946663e-002 293.68798828125 9.049410820007324 0.4028385281562805 1.314435601234436 5.699662208557129 0.4198987782001495 2.660340070724487 4.135752841830254e-002 302.0790100097656 9.158493995666504 0.4077092707157135 1.330130934715271 5.770212650299072 0.4247544705867767 2.693133354187012 4.172087088227272e-002 310.4700012207031 9.294267654418945 0.4137440025806427 1.350019454956055 5.85582971572876 0.4307662844657898 2.733232498168945 4.217509180307388e-002 318.8609924316406 9.266000747680664 0.4124558866024017 1.34581983089447 5.838682651519775 0.429353654384613 2.724989175796509 4.206011816859245e-002 327.2520141601563 9.227903366088867 0.4107420146465302 1.340180039405823 5.813295841217041 0.4277106523513794 2.713207006454468 4.191378504037857e-002 335.6430053710938 9.248990058898926 0.4117128551006317 1.343235015869141 5.836093425750732 0.4286618232727051 2.72357988357544 4.200825467705727e-002 344.0339965820313 9.200018882751465 0.4095089137554169 1.336208343505859 5.805673122406006 0.4264824092388153 2.709526300430298 4.185647144913673e-002 352.4259948730469 9.162602424621582 0.4079090356826782 1.330750703811646 5.780079364776611 0.4248281121253967 2.697546243667603 4.17003221809864e-002 360.8169860839844 9.165441513061523 0.4079831540584564 1.331099987030029 5.780121326446533 0.424967348575592 2.697607517242432 4.169800505042076e-002 369.2070007324219 9.242767333984375 0.4114582240581513 1.342459917068481 5.828019142150879 0.4283893704414368 2.719994068145752 4.194791615009308e-002 377.5989990234375 9.211434364318848 0.4100139439105988 1.337894320487976 5.801908493041992 0.4268820583820343 2.708046913146973 4.185103997588158e-002 385.989990234375 9.168110847473145 0.4081266224384308 1.33171010017395 5.772421360015869 0.4250668585300446 2.694308280944824 4.166359454393387e-002 394.3810119628906 9.162002563476563 0.4078731238842011 1.330778479576111 5.770648956298828 0.4247135519981384 2.693532466888428 4.165602847933769e-002 402.7720031738281 9.219051361083984 0.4104039072990418 1.339054584503174 5.805272579193115 0.4273586571216583 2.709418296813965 4.186749085783958e-002 411.1640014648438 9.225748062133789 0.4106448590755463 1.340008854866028 5.808595180511475 0.4276045560836792 2.711185216903687 4.189140349626541e-002 425.0020141601563 9.11283016204834 0.4056265950202942 1.323553919792175 5.742629528045654 0.4226277768611908 2.680011749267578 4.150775447487831e-002 433.3930053710938 9.15496826171875 0.4075464010238648 1.329663395881653 5.76693058013916 0.4244976043701172 2.691663980484009 4.165017232298851e-002 441.7839965820313 9.179342269897461 0.4086317718029022 1.333258748054504 5.783347606658936 0.4256252646446228 2.699387073516846 4.177364706993103e-002 450.1759948730469 9.202337265014648 0.4096647799015045 1.336641907691956 5.799064636230469 0.4267286956310272 2.706497669219971 4.189135506749153e-002 458.5669860839844 9.126877784729004 0.4062632024288178 1.325594425201416 5.7450852394104 0.4234336316585541 2.681554317474365 4.164514690637589e-002 466.9580078125 9.130221366882324 0.4063588082790375 1.326080322265625 5.750959873199463 0.4235436022281647 2.6843581199646 4.169851914048195e-002 475.3489990234375 9.142138481140137 0.4069503247737885 1.32788360118866 5.753814697265625 0.4240946471691132 2.685687065124512 4.17218841612339e-002 483.739990234375 9.144487380981445 0.4070816040039063 1.328163623809815 5.764283180236816 0.4243338704109192 2.69016432762146 4.180238768458366e-002 492.1310119628906 9.213832855224609 0.4101627767086029 1.338177442550659 5.806262969970703 0.4273685812950134 2.709989309310913 4.204079136252403e-002 500.5220031738281 9.151962280273438 0.4073929488658905 1.329235196113586 5.765473365783691 0.4247141480445862 2.691080808639526 4.187702387571335e-002 508.9129943847656 9.133262634277344 0.4065472185611725 1.326548576354981 5.755089282989502 0.4239353835582733 2.685916900634766 4.184074699878693e-002 517.3040161132813 9.194231033325195 0.4092318415641785 1.335361480712891 5.791540622711182 0.4266365468502045 2.703181505203247 4.204431921243668e-002 525.6950073242188 9.174141883850098 0.4084053635597229 1.332433700561523 5.780707836151123 0.4258663356304169 2.697983264923096 4.203671962022781e-002 534.0869750976563 9.127938270568848 0.4063973724842072 1.325674772262573 5.753820896148682 0.4238673448562622 2.685414791107178 4.189241677522659e-002 542.4769897460938 9.228574752807617 0.4108735322952271 1.340509295463562 5.816771030426025 0.4283493161201477 2.714869976043701 4.227539896965027e-002 550.8679809570313 9.247261047363281 0.4116438031196594 1.34306275844574 5.829936504364014 0.4292499721050263 2.720824480056763 4.234698414802551e-002 559.2589721679688 9.259587287902832 0.4121484756469727 1.344773530960083 5.840207099914551 0.4296930134296417 2.725474834442139 4.239725694060326e-002 567.6500244140625 9.236879348754883 0.4112152457237244 1.341552734375 5.824738502502441 0.4288162887096405 2.718418121337891 4.232741147279739e-002 576.041015625 9.265199661254883 0.4123806655406952 1.345624566078186 5.837865352630615 0.4300332069396973 2.724727630615234 4.243086278438568e-002 584.4310302734375 9.193467140197754 0.4092609882354736 1.335316061973572 5.791056632995606 0.4267773926258087 2.702801465988159 4.214197397232056e-002 592.822021484375 9.178906440734863 0.408621221780777 1.333141565322876 5.783803462982178 0.4262367188930512 2.699366569519043 4.21367958188057e-002 601.2139892578125 9.179999351501465 0.4086976051330566 1.333412766456604 5.781562805175781 0.4262183606624603 2.698424100875855 4.212524741888046e-002 609.60498046875 9.158502578735352 0.4077076315879822 1.330240249633789 5.771774768829346 0.4252981841564179 2.693920612335205 4.206201061606407e-002 617.9949951171875 9.168906211853027 0.4081432521343231 1.331776857376099 5.777164459228516 0.4257596433162689 2.696363210678101 4.212769865989685e-002 626.385986328125 9.148199081420898 0.4072228968143463 1.328739166259766 5.764687061309815 0.4248482882976532 2.690601110458374 4.204926639795303e-002 634.7769775390625 9.153997421264648 0.4075290560722351 1.329600691795349 5.76605749130249 0.4250805974006653 2.691195011138916 4.203818738460541e-002 643.1680297851563 9.142102241516113 0.4070025384426117 1.327812790870667 5.758194923400879 0.4244733154773712 2.687539577484131 4.197685047984123e-002 651.5599975585938 9.157526016235352 0.4076575040817261 1.33014190196991 5.771289825439453 0.4252424538135529 2.693483829498291 4.207025840878487e-002 659.9509887695313 9.142055511474609 0.4069408476352692 1.327834606170654 5.75890064239502 0.4245132505893707 2.687950849533081 4.196911677718163e-002 668.3410034179688 9.163941383361816 0.4079061448574066 1.331052899360657 5.773416519165039 0.425525963306427 2.694749593734741 4.208214208483696e-002 676.7329711914063 9.214210510253906 0.4101268947124481 1.338269472122192 5.804011821746826 0.4277287721633911 2.70874834060669 4.224084317684174e-002 685.1240234375 9.221725463867188 0.410546600818634 1.33942449092865 5.808478832244873 0.4280569553375244 2.710729837417603 4.224072396755219e-002 693.5139770507813 9.195225715637207 0.4093619287014008 1.335615515708923 5.792295932769775 0.4269255101680756 2.703481912612915 4.215554893016815e-002 701.905029296875 9.236662864685059 0.4111031889915466 1.341474533081055 5.820279121398926 0.4286713898181915 2.716408491134644 4.231745004653931e-002 710.2969970703125 9.219303131103516 0.4103749394416809 1.33903431892395 5.809108257293701 0.4279004633426666 2.711240530014038 4.220414161682129e-002 718.68798828125 9.196757316589356 0.4093507528305054 1.335767865180969 5.794125556945801 0.4269102811813355 2.704240798950195 4.217429086565971e-002 727.0789794921875 9.169294357299805 0.4081831276416779 1.331677913665772 5.778267860412598 0.4257012009620667 2.696781396865845 4.20493595302105e-002 735.468994140625 9.254044532775879 0.4119507372379303 1.344122529029846 5.83418083190918 0.4294586181640625 2.722884654998779 4.238997399806976e-002 743.8610229492188 9.224509239196777 0.4105926156044006 1.339867234230042 5.812450408935547 0.4280983507633209 2.712637424468994 4.227783530950546e-002 752.2520141601563 9.167038917541504 0.4080414175987244 1.331365466117859 5.778883457183838 0.4256396591663361 2.697120428085327 4.206839948892593e-002 760.6430053710938 9.156136512756348 0.407585471868515 1.329828977584839 5.771244049072266 0.4251766502857208 2.693709135055542 4.204395413398743e-002 769.0339965820313 9.206752777099609 0.4098866879940033 1.337259769439697 5.798995018005371 0.4273804128170013 2.706660270690918 4.218916967511177e-002 777.4249877929688 9.185664176940918 0.4088890254497528 1.33407187461853 5.787529468536377 0.426471084356308 2.701387643814087 4.21074777841568e-002 785.8159790039063 9.148477554321289 0.4072705209255219 1.328797459602356 5.764423847198486 0.4247606992721558 2.690322160720825 4.200183600187302e-002 794.2069702148438 9.139849662780762 0.4068310558795929 1.327486157417297 5.760977268218994 0.4244396984577179 2.688838005065918 4.198827594518662e-002 802.5980224609375 9.198716163635254 0.409488320350647 1.336077690124512 5.797767639160156 0.4270517528057098 2.705855131149292 4.215721413493156e-002 810.989013671875 9.175697326660156 0.4084174335002899 1.332631826400757 5.781099796295166 0.425992488861084 2.698201894760132 4.206936806440353e-002 819.3800048828125 9.106189727783203 0.4053537547588348 1.322664737701416 5.740387916564941 0.4229016602039337 2.679165840148926 4.18708510696888e-002 827.77099609375 9.11962890625 0.4059470593929291 1.324671149253845 5.745753765106201 0.4235488474369049 2.681836843490601 4.189123585820198e-002 836.1619873046875 9.221225738525391 0.4104022979736328 1.33923864364624 5.813970565795898 0.4279847741127014 2.713436365127564 4.224034398794174e-002 849.9970092773438 9.109155654907227 0.4055195748806 1.323018074035645 5.738785743713379 0.4229097962379456 2.678738832473755 4.17560487985611e-002 858.3880004882813 9.081585884094238 0.4043126106262207 1.319140315055847 5.720804691314697 0.4216950535774231 2.670202732086182 4.168836399912834e-002 866.7789916992188 9.1737060546875 0.4083895683288574 1.332486510276794 5.779799461364746 0.4258598983287811 2.697497129440308 4.201843962073326e-002 875.1699829101563 9.215715408325195 0.4102407991886139 1.33849024772644 5.806502342224121 0.4276199042797089 2.710031509399414 4.214433580636978e-002 883.5609741210938 9.29750919342041 0.4138506650924683 1.350215315818787 5.858696460723877 0.4313125610351563 2.734477758407593 4.240995645523071e-002 891.9520263671875 9.251111030578613 0.411830872297287 1.343641996383667 5.826048374176025 0.4292575418949127 2.719125270843506 4.226363822817802e-002 900.343017578125 9.236968994140625 0.411191999912262 1.341637492179871 5.816394329071045 0.4285323023796082 2.71470046043396 4.218020662665367e-002 908.7340087890625 9.18012809753418 0.4086549580097199 1.333361864089966 5.780932903289795 0.4260410964488983 2.698340177536011 4.198113456368446e-002 917.125 9.18910026550293 0.4090204238891602 1.334587931632996 5.791236877441406 0.426427572965622 2.702847242355347 4.205641150474548e-002 925.5159912109375 9.163248062133789 0.4078385829925537 1.330891489982605 5.775006771087647 0.4252764880657196 2.695378065109253 4.195348545908928e-002 933.906982421875 9.184928894042969 0.4089162349700928 1.334069848060608 5.789799213409424 0.42618727684021 2.702196598052979 4.199947416782379e-002 942.2979736328125 9.157343864440918 0.4076671004295349 1.330055475234985 5.770273208618164 0.4249707460403442 2.693178653717041 4.188660532236099e-002 950.6890258789063 9.162631988525391 0.4078827202320099 1.330793499946594 5.77417516708374 0.4251722097396851 2.695005416870117 4.190302640199661e-002 959.0800170898438 9.114273071289063 0.4057436585426331 1.323749780654907 5.743786811828613 0.4230408370494843 2.680756568908691 4.173881560564041e-002 967.4710083007813 9.244811058044434 0.4115355014801025 1.34266197681427 5.823981761932373 0.4288525879383087 2.718071460723877 4.214448481798172e-002 975.8619995117188 9.219685554504395 0.4104566872119904 1.339130640029907 5.808487892150879 0.4276332259178162 2.710957288742065 4.206658154726028e-002 984.2529907226563 9.184207916259766 0.4088565707206726 1.33392071723938 5.792478561401367 0.4260831475257874 2.703508853912354 4.195259138941765e-002 992.6439819335938 9.13871955871582 0.4068254828453064 1.327333569526672 5.761001586914063 0.4240987598896027 2.688708066940308 4.179005324840546e-002 1001.034973144531 9.151439666748047 0.4073895514011383 1.329284429550171 5.767615795135498 0.4246693849563599 2.691930532455444 4.182363301515579e-002 1009.424987792969 9.19940185546875 0.409492164850235 1.335996866226196 5.800271034240723 0.4267957508563995 2.70706057548523 4.198677837848663e-002 1017.815979003906 9.255974769592285 0.4120437800884247 1.344139099121094 5.840244770050049 0.4293366670608521 2.725528001785278 4.220050573348999e-002 1026.20703125 9.220073699951172 0.4104630351066589 1.339051723480225 5.81441593170166 0.4276903867721558 2.713610172271729 4.208677262067795e-002 1034.598022460938 9.158895492553711 0.4077011644840241 1.330096125602722 5.776969432830811 0.4249850511550903 2.696006536483765 4.186514392495155e-002 1042.989013671875 9.135567665100098 0.4066715240478516 1.326890826225281 5.756415843963623 0.423865556716919 2.686625719070435 4.174899682402611e-002 1051.380981445313 9.150594711303711 0.4073532521724701 1.329049825668335 5.765689849853516 0.4245824813842773 2.691075325012207 4.179978370666504e-002 1059.77197265625 9.146571159362793 0.4071609079837799 1.32847785949707 5.760791778564453 0.4242803156375885 2.688825607299805 4.17768582701683e-002 1068.162963867188 9.131063461303711 0.4064978063106537 1.326229453086853 5.752644538879395 0.4236991405487061 2.684972286224365 4.172741994261742e-002 1076.553955078125 9.098221778869629 0.4049918949604034 1.321496725082398 5.731342792510986 0.4222320318222046 2.675036668777466 4.162869602441788e-002 1084.944946289063 9.169441223144531 0.4081719219684601 1.331780910491943 5.776838779449463 0.4254011511802673 2.696260452270508 4.184866324067116e-002 1093.337036132813 9.187003135681152 0.4089777171611786 1.334323048591614 5.790809154510498 0.4261792898178101 2.702747344970703 4.196572676301003e-002 1101.72802734375 9.179986953735352 0.4086208045482636 1.333386778831482 5.783829689025879 0.4258585274219513 2.699674844741821 4.191147163510323e-002 1110.119018554688 9.200528144836426 0.4095506370067596 1.336296439170837 5.797418117523193 0.4267379641532898 2.7057945728302 4.19546514749527e-002 1118.509033203125 9.158334732055664 0.4076752066612244 1.330214262008667 5.770383834838867 0.4248470067977905 2.693165063858032 4.180992022156715e-002 1126.900024414063 9.194581985473633 0.4093466997146606 1.335410833358765 5.798298358917236 0.4264914393424988 2.706053495407105 4.194727912545204e-002 1135.291015625 9.176510810852051 0.4084961414337158 1.3328697681427 5.778421401977539 0.4256733357906342 2.697108507156372 4.18514646589756e-002 1143.682983398438 9.163573265075684 0.4079014360904694 1.330968260765076 5.773004055023193 0.4250616133213043 2.694518804550171 4.183558747172356e-002 1152.072998046875 9.159396171569824 0.4077317416667938 1.330322265625 5.771379947662354 0.4248954653739929 2.693806171417236 4.181275144219399e-002 1160.464965820313 9.165866851806641 0.4080128371715546 1.331347465515137 5.772171497344971 0.4252021610736847 2.694234848022461 4.181317612528801e-002 1168.85595703125 9.151269912719727 0.407374233007431 1.329119086265564 5.760807991027832 0.424500435590744 2.688781023025513 4.176882281899452e-002 1177.246948242188 9.141792297363281 0.4069608747959137 1.327713966369629 5.75624418258667 0.4241056740283966 2.68661379814148 4.173726961016655e-002 1185.636962890625 9.130838394165039 0.406494677066803 1.326230525970459 5.751668930053711 0.4236221015453339 2.684362649917603 4.168353974819183e-002 1194.027954101563 9.206241607666016 0.4098086059093475 1.337079763412476 5.802299022674561 0.4269396662712097 2.707928895950317 4.194400832056999e-002 1202.4189453125 9.17149543762207 0.4083086550235748 1.332085609436035 5.776546001434326 0.4253532886505127 2.696049451828003 4.180750250816345e-002 1210.81005859375 9.140050888061523 0.4068616330623627 1.327504873275757 5.760209083557129 0.4239790141582489 2.6883225440979 4.170787334442139e-002 1219.201049804688 9.165439605712891 0.4079880714416504 1.331203103065491 5.77871561050415 0.4250532984733582 2.697003841400147 4.180311039090157e-002 1227.593017578125 9.177500724792481 0.4085498750209808 1.332932233810425 5.783236026763916 0.4255987405776978 2.699163913726807 4.181493073701859e-002 1235.984008789063 9.177756309509277 0.408606618642807 1.33305811882019 5.782862663269043 0.4256067276000977 2.699074268341065 4.182154312729836e-002 1244.375 9.143049240112305 0.4070280194282532 1.327925682067871 5.766200542449951 0.4240804016590118 2.691066265106201 4.171686246991158e-002 1252.765991210938 9.110544204711914 0.4055243730545044 1.323151469230652 5.742761135101318 0.422651082277298 2.680213212966919 4.159015789628029e-002 1261.156982421875 9.153350830078125 0.4074757993221283 1.329340934753418 5.772144794464111 0.4244934320449829 2.693885564804077 4.173129424452782e-002 I want to split the file in three parts á 50 rows: data = pd.read_csv(file, sep='\t', names=['Time', '60Ni', '61Ni', '62Ni', '63Cu', '64Ni', '65Cu', '66Zn'], skiprows=3, nrows=50, index_col=False, dtype=float) data2 = pd.read_csv(file, sep='\t', names=['Time', '60Ni', '61Ni', '62Ni', '63Cu', '64Ni', '65Cu', '66Zn'], skiprows=53, nrows=50, index_col=False, dtype=float) data3 = pd.read_csv(file, sep='\t', names=['Time', '60Ni', '61Ni', '62Ni', '63Cu', '64Ni', '65Cu', '66Zn'], skiprows=103, nrows=50, index_col=False, dtype=float) Then I'm removing outliers with: cols = list(data.drop(columns='Time').columns) datao = pd.DataFrame({'Time':data['Time']}) datao[cols] = data[cols].where(np.abs(stats.zscore(data[cols])) < 2) cols = list(data2.drop(columns='Time').columns) data2o = pd.DataFrame({'Time':data2['Time']}) data2o[cols] = data2[cols].where(np.abs(stats.zscore(data2[cols])) < 2) data2o[cols] = data2o[cols].mean() cols = list(data3.drop(columns='Time').columns) data3o = pd.DataFrame({'Time':data3['Time']}) data3o[cols] = data3[cols].where(np.abs(stats.zscore(data3[cols])) < 2) data3o[cols] = data3o[cols].mean() Does this make sense so far? And now I would like to create a mean of datao, data2o and data3o seperately, resulting in three values for 60Ni, 61Ni, 62Ni, 63Cu, 64Ni, 65Cu, 66Zn. After that, I want to make a mean of these three values again. How should I do this? I tried to make it this way: mean_filtered_transposed = pd.DataFrame(data=np.mean(data)).T mean_filtered_transposed['Time'] = pd.to_datetime(mean_filtered_transposed["Time"], unit='s')mean_filtered_transposed = pd.DataFrame(data=np.mean(data)).T mean_filtered_transposed['Time'] = pd.to_datetime(mean_filtered_transposed["Time"], unit='s') mean_filtered_transposed2 = pd.DataFrame(data=np.mean(data2)).T mean_filtered_transposed2['Time'] = pd.to_datetime(mean_filtered_transposed["Time"], unit='s') mean_filtered_transposed3 = pd.DataFrame(data=np.mean(data3)).T mean_filtered_transposed3['Time'] = pd.to_datetime(mean_filtered_transposed3["Time"], unit='s') mean_all = pd.concat(mean_filtered_transposed, mean_filtered_transposed2, mean_filtered_transposed3) However, this results in: "TypeError: first argument must be an iterable of pandas objects, you passed an object of type "DataFrame""
Based on documentation: objs: a sequence or mapping of Series or DataFrame objects So: s1 = pd.Series(['a', 'b']) s2 = pd.Series(['c', 'd']) pd.concat([s1, s2]) result: But: s1 = pd.Series(['a', 'b']) s2 = pd.Series(['c', 'd']) pd.concat(s1, s2) generates:
how to use the input with pandas to get all the value.count linked to this input
my dataframe looks like this: Index(['#Organism/Name', 'TaxID', 'BioProject Accession', 'BioProject ID', 'Group', 'SubGroup', 'Size (Mb)', 'GC%', 'Replicons', 'WGS', 'Scaffolds', 'Genes', 'Proteins', 'Release Date', 'Modify Date', 'Status', 'Center', 'BioSample Accession', 'Assembly Accession', 'Reference', 'FTP Path', 'Pubmed ID', 'Strain'], dtype='object') I ask the user to enter the name of the species with this script : print("bacterie species?") species=input() I want to look for the rows with "Organism/Name" equal to the species written by the user (input) then to calculate with "values.count" of the status column and finally to retrieve 'FTP Path'. Here is the code that I could do but that does not work: if (data.loc[(data["Organism/Name"]==species) print(Data['Status'].value_counts()) else: print("This species not found") if (data.loc[(data["Organism/Name"]==species) print(Data['Status'].value_counts()) else: print(Data.get["FTP Path"]
If I understand your question correctly, this is what you're trying to achieve: import wget import numpy as np import pandas as pd URL='https://ftp.ncbi.nlm.nih.gov/genomes/GENOME_REPORTS/prokaryotes.txt' data = pd.read_csv(wget.download(URL) , sep = '\t', header = 0) species = input("Enter the bacteria species: ") if data["#Organism/Name"].str.contains(species, case = False).any(): print(data.loc[data["#Organism/Name"].str.contains(species, case = False)]['Status'].value_counts()) FTP_list = data.loc[data["#Organism/Name"].str.contains(species, case = False)]["FTP Path"].values else: print("This species not found") To wite all the FTP_Path urls into a txt file, you can do this: with open('/path/urls.txt', mode='wt') as file: file.write('\n'.join(FTP_list))
How do i select only certain rows based on label in pandas?
Here is my function: def get_historical_closes(ticker, start_date, end_date): my_dir = '/home/manish/Desktop/Equity/subset' os.chdir(my_dir) dfs = [] for files in glob.glob('*.txt'): dfs.append(pd.read_csv(files, names = ['Ticker', 'Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Null'], parse_dates = [1])) p = pd.concat(dfs) d = p.reset_index(['Date', 'Ticker', 'Close']) pivoted = d.pivot_table(index = ['Date'], columns =['Ticker']) pivoted.columns = pivoted.columns.droplevel(0) return pivoted closes = get_historical_closes(['LT' or 'HDFC'or 'ACC'], '1999-01-01', '2014-12-31') My problem is I just want to get data for a few rows namely, data for LT, HDFC and ACC for all the dates, but when I execute the function, I am getting data for all the rows (approx. 1500 nos.) How can I slice the dataframe, so that I get only selected rows and not the entire dataframe? Raw input data is a collection of text files as so: 20MICRONS,20150401,36.5,38.95,35.8,37.35,64023,0 3IINFOTECH,20150401,5.9,6.3,5.8,6.2,1602365,0 3MINDIA,20150401,7905,7905,7850,7879.6,310,0 8KMILES,20150401,710.05,721,706,712.9,20196,0 A2ZINFRA,20150401,15.5,16.55,15.2,16,218219,0 AARTIDRUGS,20150401,648.95,665.5,639.65,648.25,42927,0 AARTIIND,20150401,348,349.4,340.3,341.85,122071,0 AARVEEDEN,20150401,42,42.9,41.55,42.3,627,0 ABAN,20150401,422,434.3,419,429.1,625857,0 ABB,20150401,1266.05,1284,1266,1277.45,70294,0 ABBOTINDIA,20150401,3979.25,4009.95,3955.3,3981.25,2677,0 ABCIL,20150401,217.8,222.95,217,221.65,11583,0 ABGSHIP,20150401,225,225,215.3,220.2,237737,0 ABIRLANUVO,20150401,1677,1677,1639.25,1666.7,106336,0 ACC,20150401,1563.7,1591.3,1553.2,1585.9,176063,0 ACCELYA,20150401,932,953.8,923,950.5,4297,0 ACE,20150401,40.1,41.7,40.05,41.15,356130,0 ACROPETAL,20150401,2.75,3,2.7,2.85,33380,0 ADANIENT,20150401,608.8,615.8,603,612.4,868006,0 ADANIPORTS,20150401,308.45,312.05,306.1,310.95,1026200,0 ADANIPOWER,20150401,46.7,48,46.7,47.75,3015649,0 ADFFOODS,20150401,60.5,60.5,58.65,59.75,23532,0 ADHUNIK,20150401,20.95,21.75,20.8,21.2,149431,0 ADORWELD,20150401,224.9,224.9,215.65,219.2,2743,0 ADSL,20150401,19,20,18.7,19.65,35053,0 ADVANIHOTR,20150401,43.1,43.1,43,43,100,0 ADVANTA,20150401,419.9,430.05,418,428,16206,0 AEGISCHEM,20150401,609,668,600,658.4,264828,0 AFL,20150401,65.25,70,65.25,68.65,9507,0 AGARIND,20150401,95,100,87.25,97.45,14387,0 AGCNET,20150401,91.95,93.75,91.4,93,2453,0 AGRITECH,20150401,5.5,6.1,5.5,5.75,540,0 AGRODUTCH,20150401,2.7,2.7,2.6,2.7,451,0 AHLEAST,20150401,196,202.4,185,192.25,357,0 AHLUCONT,20150401,249.5,258.3,246,251.3,44541,0 AHLWEST,20150401,123.9,129.85,123.9,128.35,688,0 AHMEDFORGE,20150401,229.5,237.35,228,231.45,332680,0 AIAENG,20150401,1268,1268,1204.95,1214.1,48950,0 AIL,20150401,735,747.9,725.1,734.8,31780,0 AJANTPHARM,20150401,1235,1252,1207.05,1223.3,126442,0 AJMERA,20150401,118.7,121.9,117.2,118.45,23005,0 AKSHOPTFBR,20150401,14.3,14.8,14.15,14.7,214028,0 AKZOINDIA,20150401,1403.95,1412,1392,1400.7,17115,0 ALBK,20150401,99.1,101.65,99.1,101.4,2129046,0 ALCHEM,20150401,27.9,32.5,27.15,31.6,32338,0 ALEMBICLTD,20150401,34.6,36.7,34.3,36.45,692688,0 ALICON,20150401,280,288,279.05,281.05,5937,0 ALKALI,20150401,31.6,34.2,31.6,33.95,4663,0 ALKYLAMINE,20150401,314,334,313.1,328.8,1515,0 ALLCARGO,20150401,317,323.5,315,319.15,31056,0 ALLSEC,20150401,21.65,22.5,21.6,21.6,435,0 ALMONDZ,20150401,10.6,10.95,10.5,10.75,23600,0 ALOKTEXT,20150401,7.5,8.2,7.4,7.95,8145264,0 ALPA,20150401,11.85,11.85,10.75,11.8,3600,0 ALPHAGEO,20150401,384.3,425.05,383.95,419.75,13308,0 ALPSINDUS,20150401,1.85,1.85,1.85,1.85,1050,0 ALSTOMT&D,20150401,585.85,595,576.65,588.4,49234,0 AMARAJABAT,20150401,836.5,847.75,831,843.9,121150,0 AMBIKCO,20150401,790,809,780.25,802.6,4879,0 AMBUJACEM,20150401,254.95,261.4,253.4,260.25,1346375,0 AMDIND,20150401,20.5,22.75,20.5,22.3,693,0 AMRUTANJAN,20150401,480,527.05,478.35,518.3,216407,0 AMTEKAUTO,20150401,144.5,148.45,144.2,147.45,552874,0 AMTEKINDIA,20150401,55.6,58.3,55.1,57.6,700465,0 AMTL,20150401,13.75,14.45,13.6,14.45,2111,0 ANANTRAJ,20150401,39.9,40.3,39.35,40.05,376564,0 ANDHRABANK,20150401,78.35,80.8,78.2,80.55,993038,0 ANDHRACEMT,20150401,8.85,9.3,8.75,9.1,15848,0 ANDHRSUGAR,20150401,92.05,98.95,91.55,96.15,11551,0 ANGIND,20150401,36.5,36.9,35.6,36.5,34758,0 ANIKINDS,20150401,22.95,24.05,22.95,24.05,1936,0 ANKITMETAL,20150401,2.85,3.25,2.85,3.15,29101,0 ANSALAPI,20150401,23.45,24,23.45,23.8,76723,0 ANSALHSG,20150401,29.9,29.9,28.75,29.65,7748,0 ANTGRAPHIC,20150401,0.1,0.15,0.1,0.15,23500,0 APARINDS,20150401,368.3,375.6,368.3,373.45,2719,0 APCOTEXIND,20150401,505,505,481.1,495.85,3906,0 APLAPOLLO,20150401,411.5,434,411.5,428.65,88113,0 APLLTD,20150401,458.9,464,450,454.7,72075,0 APOLLOHOSP,20150401,1351,1393.85,1351,1390,132827,0 APOLLOTYRE,20150401,169.65,175.9,169,175.2,3515274,0 APOLSINHOT,20150401,195,197,194.3,195.2,71,0 APTECHT,20150401,57.6,61,57,59.7,206475,0 ARCHIDPLY,20150401,32.95,35.8,32.5,35.35,103036,0 ARCHIES,20150401,19.05,19.4,18.8,19.25,46840,0 ARCOTECH,20150401,342.5,350,339.1,345.2,44142,0 ARIES,20150401,106.75,113.9,105,112.7,96825,0 ARIHANT,20150401,43.5,50,43.5,49.3,1647,0 AROGRANITE,20150401,61.5,62,59.55,60.15,2293,0 ARROWTEX,20150401,25.7,27.8,25.1,26.55,17431,0 ARSHIYA,20150401,39.55,41.5,39,40,69880,0 ARSSINFRA,20150401,34.65,36.5,34.6,36.3,71442,0 ARVIND,20150401,260.85,268.2,259,267.2,1169433,0 ARVINDREM,20150401,15.9,17.6,15.5,17.6,5407412,0 ASAHIINDIA,20150401,145,145,141,142.45,16240,0 ASAHISONG,20150401,113,116.7,112.15,115.85,5475,0 ASAL,20150401,45.8,45.8,38,43.95,7429,0 ASHAPURMIN,20150401,74,75.4,74,74.05,36406,0 ASHIANA,20150401,248,259,246.3,249.5,21284,0 ASHIMASYN,20150401,8.4,8.85,8.05,8.25,3253,0 ASHOKA,20150401,175.1,185.4,175.1,183.75,1319134,0 ASHOKLEY,20150401,72.7,74.75,72.7,74.05,17233199,0 ASIANHOTNR,20150401,104.45,107.8,101.1,105.15,780,0 ASIANPAINT,20150401,810,825.9,803.5,821.7,898480,0 ASIANTILES,20150401,116.25,124.4,116.25,123.05,31440,0 ASSAMCO,20150401,4.05,4.3,4.05,4.3,476091,0 ASTEC,20150401,148.5,154.5,146,149.2,322308,0 ASTRAL,20150401,447.3,451.3,435.15,448.6,64889,0 ASTRAMICRO,20150401,146.5,151.9,145.2,150.05,735681,0 ASTRAZEN,20150401,908,940.95,908,920.35,3291,0 ATFL,20150401,635,648,625.2,629.25,6202,0 ATLANTA,20150401,67.2,71,67.2,68.6,238683,0 ATLASCYCLE,20150401,203.9,210.4,203,208.05,25208,0 ATNINTER,20150401,0.2,0.2,0.2,0.2,1704,0 ATUL,20150401,1116,1160,1113,1153.05,32969,0 ATULAUTO,20150401,556.55,576.9,555.9,566.25,59117,0 AURIONPRO,20150401,192.3,224.95,191.8,217.55,115464,0 AUROPHARMA,20150401,1215,1252,1215,1247.4,1140111,0 AUSOMENT,20150401,22.6,22.6,21.7,21.7,2952,0 AUSTRAL,20150401,0.5,0.55,0.5,0.5,50407,0 AUTOAXLES,20150401,834.15,834.15,803,810.2,4054,0 AUTOIND,20150401,60,65,59.15,63.6,212036,0 AUTOLITIND,20150401,36,39,35.2,37.65,14334,0 AVTNPL,20150401,27,28,26.7,27.9,44803,0 AXISBANK,20150401,557.7,572,555.25,569.65,3753262,0 AXISCADES,20150401,335.4,345,331.4,339.65,524538,0 AXISGOLD,20150401,2473.95,2493,2461.1,2483.15,138,0 BAFNAPHARM,20150401,29.95,31.45,29.95,30.95,21136,0 BAGFILMS,20150401,3.05,3.1,2.9,3,31278,0 BAJAJ-AUTO,20150401,2027.05,2035,2002.95,2019.8,208545,0 BAJAJCORP,20150401,459,482,454,466.95,121972,0 BAJAJELEC,20150401,230,234.8,229,232.4,95432,0 BAJAJFINSV,20150401,1412,1447.5,1396,1427.55,44811,0 BAJAJHIND,20150401,14.5,14.8,14.2,14.6,671746,0 BAJAJHLDNG,20150401,1302.3,1329.85,1285.05,1299.9,24626,0 BAJFINANCE,20150401,4158,4158,4062.2,4140.05,12923,0 BALAJITELE,20150401,65.75,67.9,65.3,67.5,47063,0 BALAMINES,20150401,81.5,83.5,81.5,83.45,6674,0 BALKRISIND,20150401,649,661,640,655,16919,0 BALLARPUR,20150401,13.75,13.95,13.5,13.9,271962,0 BALMLAWRIE,20150401,568.05,580.9,562.2,576.75,17423,0 BALPHARMA,20150401,68.9,74.2,67.1,68.85,84178,0 BALRAMCHIN,20150401,50.95,50.95,49.3,50,84400,0 BANARBEADS,20150401,33,39.5,33,39.25,1077,0 BANARISUG,20150401,834.7,855,820,849.85,618,0 BANCOINDIA,20150401,105,107.5,103.25,106.8,11765,0 BANG,20150401,6.2,6.35,6.1,6.35,9639,0 BANKBARODA,20150401,162.75,170.4,162.05,168.9,2949846,0 BANKBEES,20150401,1813.45,1863,1807,1859.78,19071,0 BANKINDIA,20150401,194.6,209.8,194.05,205.75,3396490,0 BANSWRAS,20150401,65,65,60.1,63.9,6238,0 BARTRONICS,20150401,11.45,11.85,11.35,11.6,109658,0 BASF,20150401,1115,1142,1115,1124.65,14009,0 BASML,20150401,184,192,183.65,191.6,642,0 BATAINDIA,20150401,1095,1104.9,1085,1094.7,137166,0 BAYERCROP,20150401,3333,3408.3,3286.05,3304.55,8839,0 BBL,20150401,627.95,641.4,622.2,629.8,5261,0 BBTC,20150401,441,458,431.3,449.15,141334,0 BEDMUTHA,20150401,16.85,18,16.25,17.95,16412,0 BEL,20150401,3355,3595,3350,3494.2,582755,0 BEML,20150401,1100,1163.8,1086,1139.2,631231,0 BEPL,20150401,22.1,22.45,21.15,22.3,5459,0 BERGEPAINT,20150401,209.3,216.9,208.35,215.15,675963,0 BFINVEST,20150401,168.8,176.8,159.5,172.7,113352,0 BFUTILITIE,20150401,707.4,741,702.05,736.05,1048274,0 BGLOBAL,20150401,2.9,3.05,2.9,3.05,16500,0 BGRENERGY,20150401,117.35,124,117.35,122.3,207979,0 BHAGYNAGAR,20150401,17.9,17.9,16.95,17.5,1136,0 BHARATFORG,20150401,1265.05,1333.1,1265.05,1322.6,704419,0 BHARATGEAR,20150401,73.5,77.7,72.7,75.9,13730,0 BHARATRAS,20150401,810,840,800,821.4,981,0 BHARTIARTL,20150401,393.3,404.85,393.05,402.3,5494883,0 BHEL,20150401,235.8,236,229.6,230.7,3346075,0 BHUSANSTL,20150401,65.15,67.9,63.65,64,1108540,0 BIL,20150401,401.3,422,401.3,419.35,2335,0 BILENERGY,20150401,0.8,0.95,0.8,0.95,8520,0 BINANIIND,20150401,90.55,93.95,90.2,93.3,27564,0 BINDALAGRO,20150401,23.4,23.4,22.25,22.8,111558,0 BIOCON,20150401,472.5,478.85,462.7,466.05,1942983,0 BIRLACORPN,20150401,415,420,402.8,414.7,11345,0 BIRLACOT,20150401,0.05,0.1,0.05,0.1,439292,0 BIRLAERIC,20150401,52.3,54.45,52.15,53.7,9454,0 BIRLAMONEY,20150401,24.35,28.85,23.9,28.65,78710,0 BLBLIMITED,20150401,3.7,3.7,3.65,3.65,550,0 BLISSGVS,20150401,128,132.55,124.3,126.15,261958,0 BLKASHYAP,20150401,13.7,15.15,13.7,14.15,118455,0 BLUEDART,20150401,7297.35,7315,7200,7285.55,2036,0 BLUESTARCO,20150401,308.75,315,302,311.35,19046,0 BLUESTINFO,20150401,199,199.9,196.05,199.45,1268,0 BODALCHEM,20150401,34.5,34.8,33.05,34.65,65623,0 BOMDYEING,20150401,64,66.3,63.7,65.95,1168851,0 BOSCHLTD,20150401,25488,25708,25201,25570.7,16121,0 BPCL,20150401,810.95,818,796.5,804.2,1065969,0 BPL,20150401,30.55,32.5,30.55,31.75,116804,0 BRFL,20150401,146,147.9,142.45,144.3,7257,0 BRIGADE,20150401,143.8,145.15,140.25,144.05,36484,0 BRITANNIA,20150401,2155.5,2215.3,2141.35,2177.55,245908,0 BROADCAST,20150401,3.35,3.5,3.3,3.3,4298,0 BROOKS,20150401,38.4,39.5,38.4,39.3,19724,0 BSELINFRA,20150401,1.9,2.15,1.85,2.05,97575,0 BSL,20150401,29.55,31.9,27.75,31,9708,0 BSLGOLDETF,20150401,2535,2535,2501.5,2501.5,122,0 BSLIMITED,20150401,27.5,27.5,25.45,27.15,728818,0 BURNPUR,20150401,9.85,9.85,9.1,9.15,144864,0 BUTTERFLY,20150401,190.95,194,186.1,192.35,25447,0 BVCL,20150401,17.25,17.7,16.5,17.7,9993,0 CADILAHC,20150401,1755,1796.8,1737.05,1790.15,302149,0 CAIRN,20150401,213.85,215.6,211.5,213.35,841463,0 CAMLINFINE,20150401,89.5,91.4,87.5,91.1,32027,0 CANBK,20150401,366.5,383.8,365.15,381,1512605,0 CANDC,20150401,20.6,24.6,20.6,23.25,9100,0 CANFINHOME,20150401,611.1,649.95,611.1,644.7,72233,0 CANTABIL,20150401,47.6,50.5,47.6,50.25,5474,0 CAPF,20150401,398.85,427,398,421.75,224074,0 CAPLIPOINT,20150401,1020,1127.8,1020,1122.65,108731,0 CARBORUNIV,20150401,191.05,197,188.35,190,42681,0 CAREERP,20150401,151.9,156.6,149,153.25,26075,0 CARERATING,20150401,1487,1632.75,1464,1579.2,65340,0 CASTROLIND,20150401,476,476.25,465.1,467.3,185850,0 CCCL,20150401,4.2,4.7,4.2,4.65,47963,0 CCHHL,20150401,10.8,11,10.4,10.8,69325,0 CCL,20150401,178.35,185.9,176,184.3,244917,0 CEATLTD,20150401,805.25,830.8,785.75,826.7,501415,0 CEBBCO,20150401,18.3,20.25,18.1,19.85,40541,0 CELEBRITY,20150401,11.5,12.5,11.5,12.1,5169,0 CELESTIAL,20150401,59.9,61.8,59.5,60.05,128386,0 CENTENKA,20150401,152,159.9,148.2,157.1,16739,0 CENTEXT,20150401,1.5,1.5,1.2,1.25,19308,0 CENTRALBK,20150401,106,107.2,104.3,106.3,992782,0 CENTUM,20150401,756.85,805,756.8,801.9,26848,0 CENTURYPLY,20150401,234,245,234,243.45,367540,0 CENTURYTEX,20150401,633.6,682.4,631,675.35,3619413,0 CERA,20150401,2524.75,2524.75,2470,2495.3,6053,0 CEREBRAINT,20150401,15.6,16.2,14.65,14.8,348478,0 CESC,20150401,604.95,613.4,595.4,609.75,294334,0 CGCL,20150401,173,173,173,173,9,0 CHAMBLFERT,20150401,70.2,73.4,70.2,72.65,2475030,0 CHEMFALKAL,20150401,72.8,77,72,76.3,1334,0 CHENNPETRO,20150401,69,70.35,68.3,68.95,160576,0 CHESLINTEX,20150401,10.1,10.1,8.75,9.4,1668,0 CHOLAFIN,20150401,599.85,604,582.15,598.2,23125,0 CHROMATIC,20150401,3.4,4.05,3,3.3,63493,0 CIGNITITEC,20150401,433,444.95,432,440,32923,0 CIMMCO,20150401,92,94.05,91,94.05,19931,0 CINELINE,20150401,14.5,14.95,14.5,14.9,4654,0 CINEVISTA,20150401,3.3,3.3,3.3,3.3,10,0 CIPLA,20150401,714,716.5,703.85,709.6,1693796,0 CLASSIC,20150401,1.5,1.55,1.45,1.45,7770,0 CLNINDIA,20150401,824.7,837.9,819,828.8,6754,0 CLUTCHAUTO,20150401,13.75,13.75,13.6,13.6,1414,0 CMAHENDRA,20150401,9.35,9.5,8.9,9.15,1005172,0 CMC,20150401,1925.85,1925.85,1891,1907.25,153068,0 CNOVAPETRO,20150401,20,22.75,17.1,22.75,1656,0 COALINDIA,20150401,362.9,364.25,358,363,1428949,0 COLPAL,20150401,2003.4,2009.9,1990.05,2002.5,92909,0 COMPUSOFT,20150401,9.4,10.05,9,9.7,15083,0 CONCOR,20150401,1582.35,1627.3,1561,1582.85,182280,0 CONSOFINVT,20150401,36.55,40,36.5,40,439,0 CORDSCABLE,20150401,25.55,28,24.1,25.8,15651,0 COREEDUTEC,20150401,8,8.85,7.6,8.4,890455,0 COROMANDEL,20150401,268.5,271.35,266.15,268.35,42173,0 CORPBANK,20150401,52.5,55,52.05,54.1,1141752,0 COSMOFILMS,20150401,76.9,80,76.2,79.25,21020,0 COUNCODOS,20150401,1.2,1.2,1.2,1.2,2850,0 COX&KINGS,20150401,323,324.85,316.5,317.8,76998,0 CPSEETF,20150401,24.2,24.37,24.08,24.34,180315,0 CREATIVEYE,20150401,3.4,3.6,2.8,3.45,8545,0 CRISIL,20150401,2049,2052.45,2000,2030.7,3928,0 CROMPGREAV,20150401,164.85,167.4,163.2,166.1,2739478,0 CTE,20150401,18.55,18.55,16.85,17.05,8260,0 CUB,20150401,97.35,98.75,96.4,98.3,182702,0 CUMMINSIND,20150401,879,900.95,874.75,889.9,358652,0 CURATECH,20150401,10.8,11,9.75,10,755,0 CYBERTECH,20150401,28.5,33.45,28.1,33.4,103549,0 CYIENT,20150401,509.9,515,495.1,514.1,30415,0 DAAWAT,20150401,105,112.25,99.5,108.4,26689,0 DABUR,20150401,266.5,268.5,264.65,266.55,642177,0 DALMIABHA,20150401,428.15,439.9,422.5,432.65,9751,0 DALMIASUG,20150401,17.5,17.5,16.45,17.15,12660,0 DATAMATICS,20150401,66.5,75,66,72.15,119054,0 DBCORP,20150401,378,378,362.6,369.45,8799,0 DBREALTY,20150401,67,67.15,65.8,66.3,212297,0 DBSTOCKBRO,20150401,47.6,47.65,47.45,47.55,24170,0 DCBBANK,20150401,110.95,114.95,110.15,114.45,935858,0 DCM,20150401,84.5,88.75,84.1,87,34747,0 DCMSHRIRAM,20150401,107.95,114.3,107.95,112.8,29474,0 DCW,20150401,16.75,17.2,16.65,17.15,270502,0 DECCANCE,20150401,310.05,323.9,310.05,321.55,446,0 DECOLIGHT,20150401,1.45,1.45,1.4,1.4,1100,0 DEEPAKFERT,20150401,140,144,138.25,139.95,162156,0 DEEPAKNTR,20150401,68,70.65,66.4,69.95,8349,0 DEEPIND,20150401,46.6,54.4,46.3,51.9,52130,0 DELTACORP,20150401,79.95,82.75,79.75,82.35,889247,0 DELTAMAGNT,20150401,36.6,37.45,36.6,37.45,60,0 DEN,20150401,121.45,127,121.2,122.4,59512,0 DENABANK,20150401,50.8,51.5,50.1,51.35,376680,0 DENORA,20150401,136.7,136.7,131.05,133.6,743,0 DHAMPURSUG,20150401,36.8,36.95,34.85,36.35,38083,0 DHANBANK,20150401,30.8,32.1,30.5,31.75,195779,0 DHANUKA,20150401,690,690,652,660.15,24958,0 DHARSUGAR,20150401,14.15,14.7,13.8,14.45,1748,0 DHFL,20150401,468.9,474.9,461.6,467.85,448551,0 DHUNINV,20150401,97.15,103,94.5,99.85,15275,0 DIAPOWER,20150401,44.9,45.95,43.3,45.55,126085,0 DICIND,20150401,343,347,341,341.95,7745,0 DIGJAM,20150401,8,8.15,7.75,8.05,96467,0 DISHMAN,20150401,168,172.65,164.7,171.8,778414,0 DISHTV,20150401,82.2,84.85,81.35,84.15,5845850,0 DIVISLAB,20150401,1770.1,1809,1770.1,1802.35,68003,0 DLF,20150401,157,160.9,156.2,159.7,3098216,0 DLINKINDIA,20150401,165.05,168,162.2,164.75,22444,0 DOLPHINOFF,20150401,120.8,134.4,119.5,130.2,190716,0 DONEAR,20150401,15,15.95,14.5,15.35,679,0 DPL,20150401,46.6,49,44,45.45,25444,0 DPSCLTD,20150401,17.15,17.15,16.55,16.85,916,0 DQE,20150401,24.3,24.8,22.75,23.1,57807,0 DRDATSONS,20150401,5.8,6.1,5.7,6,2191357,0 DREDGECORP,20150401,374.9,403,372.65,393.4,106853,0 DRREDDY,20150401,3541,3566.8,3501.7,3533.65,282785,0 DSKULKARNI,20150401,77.6,77.6,74,77.1,3012,0 DSSL,20150401,9.5,9.5,9.5,9.5,50,0 DTIL,20150401,206.95,231.75,205.95,219.05,1437,0 DUNCANSLTD,20150401,15.55,16.3,15.3,15.85,740,0 DWARKESH,20150401,21,21,19.85,20.7,9410,0 DYNAMATECH,20150401,3868,4233,3857.1,3920.55,59412,0 DYNATECH,20150401,2.85,3,2.85,3,3002,0 EASTSILK,20150401,1.55,1.85,1.55,1.75,9437,0 EASUNREYRL,20150401,40.05,43,40.05,42.55,21925,0 ECEIND,20150401,136,148,127,133.85,43034,0 ECLERX,20150401,1603.8,1697,1595,1600.65,123468,0 EDELWEISS,20150401,63.65,67.5,63,66.6,451255,0 EDL,20150401,23.9,25,23.9,24.4,7799,0 EDUCOMP,20150401,12.45,13.55,12.35,13.55,499009,0 EICHERMOT,20150401,15929,16196.95,15830.05,16019.5,45879,0 EIDPARRY,20150401,174.05,175.8,168.65,171.2,56813,0 EIHAHOTELS,20150401,228,232.8,225,228,85,0 EIHOTEL,20150401,107.25,110,107.25,109.5,57306,0 EIMCOELECO,20150401,399,409.5,399,409.5,184,0 EKC,20150401,9.35,11.15,9.35,11.05,350782,0 ELAND,20150401,14.3,16.45,14.3,16.25,191406,0 ELDERPHARM,20150401,90.5,91.5,89.45,91.5,23450,0 ELECON,20150401,66.5,76.2,66.25,74.45,6045416,0 ELECTCAST,20150401,19.8,20.55,18.9,19.4,1956889,0 ELECTHERM,20150401,25.9,25.9,22.2,24,14611,0 ELGIEQUIP,20150401,147.5,150.4,146.4,150,9475,0 .... ZENITH, 20150401,...
I use EdChum code from his comment and add some clarification. I think the main problem is d is output dataframe d cannot be looped in cycle for, if you need one output from all *.txt files. import pandas as pd import glob def get_historical_closes(ticker, start_date, end_date): dfs = [] #create empty df for output d = pd.DataFrame() #glob can use path with *.txt - see http://stackoverflow.com/a/3215392/2901002 for files in glob.glob('/home/manish/Desktop/Equity/subset/*.txt'): #added index_col for multiindex df dfs.append(pd.read_csv(files, index_col=['Date', 'Ticker', 'Close'], names = ['Ticker', 'Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Null'], parse_dates = [1])) p = pd.concat(dfs) #d is output from all .txt files, so cannot be looped in cycle for d = p.reset_index(['Date', 'Ticker', 'Close']) d = d[(d['Ticker'].isin(ticker)) & (d['Date'] > start_date) & (d['Date'] < end_date)] pivoted = d.pivot_table(index = ['Date'], columns =['Ticker']) pivoted.columns = pivoted.columns.droplevel(0) return pivoted #function isin need list of columns, so 'or' can be replaced by ',' #arguments are changed for testing: 'HDFC' to 'AGCNET' and end_date '2014-12-31' to '2015-12-31' closes = get_historical_closes(['LT','AGCNET','ACC'], '1999-01-01', '2015-12-31') print closes