Tensorflow dnnregressor keeps giving me the same predicted values - python

I am using Tensorflows dnnregressor and when I re-load the dataset to get predicted values for the neural network I trained after a certain number of rows the predicted values are all the same. I have tried changing the learning rate and the number of hidden layers and neurons but nothing seems to really work.
Here is my code:
import pandas as pd
from IPython.display import display
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
import warnings
with warnings.catch_warnings():
warnings.filterwarnings("ignore",category=FutureWarning)
import tensorflow as tf
import pickle # Used to save the model
import re
import csv
import logging
import os
from sklearn.model_selection import train_test_split
regex = re.compile(r"\[|\]|<", re.IGNORECASE)
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from mlxtend.plotting import plot_confusion_matrix
# Removes annoying warning messages in tensorflow and python
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='error', category=FutureWarning)
import sys
if not sys.warnoptions:
warnings.simplefilter("ignore")
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
tf.logging.set_verbosity(tf.logging.ERROR)
tf.get_logger().setLevel(3)
tf.get_logger().setLevel('INFO')
tf.get_logger().setLevel(logging.ERROR)
logging.getLogger('tensorflow').disabled = True
all_data = pd.read_csv('ML_DATASET.csv')
all_data = all_data.fillna(0)
# Create training and test set
all_data4 = all_data.iloc[:,0:]
all_data4.columns = all_data4.columns.str.replace('+', 'plus')
all_data4.columns = all_data4.columns.str.replace(')', ' ')
all_data4.columns = all_data4.columns.str.replace('!', ' ')
all_data4.columns = all_data4.columns.str.replace('(', ' ')
all_data4.columns = all_data4.columns.str.replace(',', ' ')
all_data4.columns = all_data4.columns.str.replace(' ', '_')
all_data4.columns = all_data4.columns.str.replace('__', '_')
all_data4.columns = all_data4.columns.str.replace('%', 'percentage')
all_data4.columns = all_data4.columns.str.replace('$', '')
all_data4.columns = all_data4.columns.str.replace('<', 'lessthan')
all_data4 = all_data4.dropna(subset = ['3_year_appreciation'])
train_dataset = all_data4.sample(frac=0.8,random_state=42)
test_dataset = all_data4.drop(train_dataset.index)
train_stats = train_dataset.describe()
train_stats.pop('3_year_appreciation')
train_stats = train_stats.transpose()
train_labels = train_dataset.pop('3_year_appreciation')
test_labels = test_dataset.pop('3_year_appreciation')
# Need to change feature columns to be of numeric type
feature_columns = ['Unweighted_Sample_Count_of_the_population', 'Avg_household_size_of_occupied_housing_units', 'Total_population_in_occupied_housing_units', 'Median_Estimated_Home_Value_owner_occupied_units_', 'Total_Population', 'Median_Gross_rent_as_percentage_of_household_inc', 'White_Population', 'Black/African_American_Population', 'Native_American_Population', 'Asian_Population', 'Pacific_Islander_Population', 'Some_other_race_Population', 'Mixed_Race_Population', 'Median_Age', 'Median_Household_Income', 'Total_Population_over_25', 'B15003_022E', 'B15003_023E', 'B15003_024E', 'B15003_025E', 'Median_Gross_Rent', 'Homeowner_households', 'Renter_households', 'Housing_units_with_mortgage', 'B19001_002E', 'B19001_003E', 'B19001_004E', 'B19001_005E', 'B19001_006E', 'B19001_007E', 'B19001_013E', 'B19001_014E', 'B19001_015E', 'B19001_016E', 'B19001_017E', 'Total_housing_Units', 'B25024_006E', 'B25024_007E', 'B25024_008E', 'B25024_009E', 'Total_Units', 'Units_with_9plus_Rooms', 'Families_making_more_than_5x_poverty_level_income', 'People_who_moved_in_the_past_year_within_same_county', 'Moved_within_same_state_but_not_same_county', 'Moved_from_different_state_same_country', 'Moved_from_different_country', 'Median_age_of_people_who_moved_from_different_state', 'Moved_within_same_county_bachelors_degree', 'Moved_from_different_state_At_or_above_150_percent_of_the_poverty_level', 'Number_of_people_who_work_at_home', 'Number_of_people_who_walk_to_work', 'White_women_25-29', 'Born_in_germany_population', 'Number_of_people_who_take_Non_taxi_public_transport_to_work', 'Number_of_people_who_work_in_county_government', 'Number_of_people_whose_Commuting_time_under_10_mins', 'Number_of_people_whose_commute_is_45-60_mins', 'Number_of_people_whose_commute_is_60-90_mins', 'Number_of_people_whose_commute_is_90plus_mins', 'Number_of_Sales_and_office_workers', 'Number_of_people_in_management_business_science_and_arts', 'Number_of_service_workers', 'Number_of_educational_and_health_service_workers', 'Number_of_arts_entertainment_and_food_service_workers', 'Number_of_finance_and_real_estate_workers', 'Number_of_tech_workers', 'Private_for-profit_wage_and_salary_workers', 'Self-employed_in_own_incorporated_business_workers', 'Local_government_workers', 'Federal_government_workers', 'Self-employed_in_own_not_incorporated_business_workers', 'People_in_households_receiving_SNAP_and_extra_social_security_income', 'Civilians_aged_25-64_with_more_than_a_bachelors_degree', 'Men_over_16_in_Education_legal_community_service_arts_and_media_occupations', 'Men_over_16_in_Food_preparation_and_serving_related_occupations', 'B08006_002E', 'B08006_003E', 'B08006_004E', 'B08006_005E', 'B08006_006E', 'B08006_007E', 'B08006_009E', 'B08006_010E', 'B08006_011E', 'B08006_012E', 'B08006_013E', 'B08006_014E', 'B08006_016E', 'B08006_019E', 'B08006_020E', 'B08006_021E', 'B08006_022E', 'B08006_023E', 'B08006_024E', 'B08006_025E', 'B08006_026E', 'B08006_027E', 'B08006_028E', 'B08006_029E', 'B08006_030E', 'B08006_031E', 'B08006_032E', 'B08006_033E', 'B08006_034E', 'B08006_036E', 'B08006_037E', 'B08006_038E', 'B08006_039E', 'B08006_040E', 'B08006_041E', 'B08006_042E', 'B08006_043E', 'B08006_044E', 'B08006_045E', 'B08006_046E', 'B08006_047E', 'B08006_048E', 'B08006_049E', 'B08006_050E', 'B08006_051E', 'B08007_002E', 'B08007_004E', 'B08007_005E', 'B08007_007E', 'B08007_008E', 'B08007_009E', 'B08007_010E', 'B08007_012E', 'B08007_013E', 'B08007_014E', 'B08007_015E', 'B08008_002E', 'B08008_003E', 'B08008_004E', 'B08008_005E', 'B08008_006E', 'B08008_007E', 'B08008_008E', 'B08008_009E', 'B08008_010E', 'B08008_011E', 'B08008_012E', 'B08008_013E', 'B08008_014E', 'B08013_001E', 'B08013_002E', 'B08013_003E', 'B08014_002E', 'B08014_003E', 'B08014_004E', 'B08014_005E', 'B08014_006E', 'B08014_007E', 'B08014_009E', 'B08014_010E', 'B08014_011E', 'B08014_012E', 'B08014_013E', 'B08014_014E', 'B08014_016E', 'B08014_017E', 'B08014_018E', 'B08014_019E', 'B08014_020E', 'B08014_021E', 'B08015_001E', 'B08015_002E', 'B08015_003E', 'B08105A_004E', 'B08105B_003E', 'B08111_002E', 'B08111_003E', 'B08111_004E', 'B08111_005E', 'B08113_002E', 'B08113_003E', 'B08113_004E', 'B08113_005E', 'B08113_006E', 'B08113_007E', 'B08113_008E', 'B13002_002E', 'B13002_003E', 'B13002_004E', 'B13002_005E', 'B13002_006E', 'B13002_007E', 'B13002_008E', 'B13002_009E', 'B13002_010E', 'B13002_011E', 'B13002_012E', 'B13002_013E', 'B13002_014E', 'B13002_015E', 'B13002_016E', 'B13002_017E', 'B13002_018E', 'B13002_019E', 'B13002A_002E', 'B13002A_003E', 'B13002A_004E', 'B13002A_005E', 'B13002A_006E', 'B13002A_007E', 'B13002B_002E', 'B13002B_003E', 'B13002B_004E', 'B13002B_005E', 'B13002B_006E', 'B13002B_007E', 'B13002C_002E', 'B13002C_003E', 'B13002C_004E', 'B13002C_005E', 'B13002C_006E', 'B13002C_007E', 'B13002D_002E', 'B13002D_003E', 'B13002D_004E', 'B13002D_005E', 'B13002D_006E', 'B13002D_007E', 'B13002E_002E', 'B13002E_003E', 'B13002E_004E', 'B13002E_005E', 'B13002E_006E', 'B13002E_007E', 'B13002F_002E', 'B13002F_003E', 'B13002F_004E', 'B13002F_005E', 'B13002F_006E', 'B13002F_007E', 'B13002G_002E', 'B13002G_003E', 'B13002G_004E', 'B13002G_005E', 'B13002G_006E', 'B13002G_007E', 'B13002H_002E', 'B13002H_003E', 'B13002H_004E', 'B13002H_005E', 'B13002H_006E', 'B13002H_007E', 'B13002I_002E', 'B13002I_003E', 'B13002I_004E', 'B13002I_005E', 'B13002I_006E', 'B13002I_007E', 'B13004_002E', 'B13004_003E', 'B13004_004E', 'B13004_005E', 'B13004_006E', 'B13004_007E', 'B13004_008E', 'B13004_009E', 'B13004_010E', 'B13004_011E', 'B13008_002E', 'B13008_003E', 'B13008_004E', 'B13008_005E', 'B13008_006E', 'B13008_007E', 'B13008_008E', 'B13008_009E', 'B13008_010E', 'B13008_011E', 'B13008_012E', 'B13008_013E', 'B13008_014E', 'B13008_015E', 'B13010_002E', 'B13010_003E', 'B13010_004E', 'B13010_005E', 'B13010_006E', 'B13010_007E', 'B13010_008E', 'B13010_009E', 'B13010_010E', 'B13010_011E', 'B13010_012E', 'B13010_013E', 'B13010_014E', 'B13010_015E', 'B13010_016E', 'B13010_017E', 'B13010_018E', 'B13010_019E', 'B13012_002E', 'B13012_003E', 'B13012_004E', 'B13012_005E', 'B13012_006E', 'B13012_007E', 'B13012_008E', 'B13012_009E', 'B13012_010E', 'B13012_011E', 'B13012_012E', 'B13012_013E', 'B13012_014E', 'B13012_015E', 'B13014_002E', 'B13014_003E', 'B13014_004E', 'B13014_005E', 'B13014_006E', 'B13014_007E', 'B13014_008E', 'B13014_009E', 'B13014_010E', 'B13014_011E', 'B13014_012E', 'B13014_013E', 'B13014_014E', 'B13014_015E', 'B13014_016E', 'B13014_017E', 'B13014_018E', 'B13014_019E', 'B13014_020E', 'B13014_021E', 'B13014_022E', 'B13014_023E', 'B13014_024E', 'B13014_025E', 'B13014_026E', 'B13014_027E', 'B13015_002E', 'B13015_003E', 'B13015_004E', 'B13015_005E', 'B13015_006E', 'B13015_007E', 'B13015_008E', 'B13015_009E', 'B13015_010E', 'B13015_011E', 'B13015_012E', 'B13015_013E', 'B13015_014E', 'B13015_015E', 'B13016_002E', 'B13016_003E', 'B13016_004E', 'B13016_005E', 'B13016_006E', 'B13016_007E', 'B13016_008E', 'B13016_009E', 'B13016_010E', 'B13016_011E', 'B13016_012E', 'B13016_013E', 'B13016_014E', 'B13016_015E', 'B13016_016E', 'B13016_017E', 'B14001_002E', 'B14001_003E', 'B14001_004E', 'B14001_005E', 'B14001_006E', 'B14001_007E', 'B14001_008E', 'B14001_009E', 'B14001_010E', 'B14002_003E', 'B14002_004E', 'B14002_005E', 'B14002_006E', 'B14002_007E', 'B14002_008E', 'B14002_009E', 'B14002_010E', 'B14002_011E', 'B14002_012E', 'B14002_013E', 'B14002_014E', 'B14002_015E', 'B14002_016E', 'B14002_017E', 'B14002_018E', 'B14002_019E', 'B14002_020E', 'B14002_021E', 'B14002_022E', 'B14002_023E', 'B14002_024E', 'B14002_025E', 'B14002_027E', 'B14002_028E', 'B14002_029E', 'B14002_030E', 'B14002_031E', 'B14002_032E', 'B14002_033E', 'B14002_034E', 'B14002_035E', 'B14002_036E', 'B14002_037E', 'B14002_038E', 'B14002_039E', 'B14002_040E', 'B14002_041E', 'B14002_042E', 'B14002_043E', 'B14002_044E', 'B14002_045E', 'B14002_046E', 'B14002_047E', 'B14002_048E', 'B14002_049E', 'B14003_003E', 'B14003_004E', 'B14003_005E', 'B14003_006E', 'B14003_007E', 'B14003_008E', 'B14003_009E', 'B14003_010E', 'B14003_011E', 'B14003_012E', 'B14003_013E', 'B14003_014E', 'B14003_015E', 'B14003_016E', 'B14003_017E', 'B14003_018E', 'B14003_019E', 'B14003_020E', 'B14003_021E', 'B14003_022E', 'B14003_023E', 'B14003_024E', 'B14003_025E', 'B14003_026E', 'B14003_027E', 'B14003_028E', 'B14003_029E', 'B14003_031E', 'B14003_032E', 'B14003_033E', 'B14003_034E', 'B14003_035E', 'B14003_036E', 'B14003_037E', 'B14003_038E', 'B14003_039E', 'B14003_040E', 'B14003_041E', 'B14003_042E', 'B14003_043E', 'B14003_044E', 'B14003_045E', 'B14003_046E', 'B14003_047E', 'B14003_048E', 'B14003_049E', 'B14003_050E', 'B14003_051E', 'B14003_052E', 'B14003_053E', 'B14003_054E', 'B14003_055E', 'B14003_056E', 'B14003_057E', 'B14004_003E', 'B14004_004E', 'B14004_005E', 'B14004_006E', 'B14004_007E', 'B14004_008E', 'B14004_009E', 'B14004_010E', 'B14004_011E', 'B14004_012E', 'B14004_013E', 'B14004_014E', 'B14004_015E', 'B14004_016E', 'B14004_017E', 'B14004_019E', 'B14004_020E', 'B14004_021E', 'B14004_022E', 'B14004_023E', 'B14004_024E', 'B14004_025E', 'B14004_026E', 'B14004_027E', 'B14004_028E', 'B14004_029E', 'B14004_030E', 'B14004_031E', 'B14004_032E', 'B14004_033E', 'B14005_003E', 'B14005_004E', 'B14005_005E', 'B14005_006E', 'B14005_007E', 'B14005_008E', 'B14005_009E', 'B14005_010E', 'B14005_011E', 'B14005_012E', 'B14005_013E', 'B14005_014E', 'B14005_015E', 'B14005_017E', 'B14005_018E', 'B14005_019E', 'B14005_020E', 'B14005_021E', 'B14005_022E', 'B14005_023E', 'B14005_024E', 'B14005_025E', 'B14005_026E', 'B14005_027E', 'B14005_028E', 'B14005_029E', 'B14006_002E', 'B14006_003E', 'B14006_004E', 'B14006_005E', 'B14006_006E', 'B14006_007E', 'B14006_008E', 'B14006_009E', 'B14006_010E', 'B14006_011E', 'B14006_012E', 'B14006_013E', 'B14006_014E', 'B14006_015E', 'B14006_016E', 'B14006_017E', 'B14006_018E', 'B14006_019E', 'B14006_020E', 'B14006_021E', 'B14007_003E', 'B14007_004E', 'B14007_005E', 'B14007_006E', 'B14007_007E', 'B14007_008E', 'B14007_009E', 'B14007_010E', 'B14007_011E', 'B14007_012E', 'B14007_013E', 'B14007_014E', 'B14007_015E', 'B14007_016E', 'B14007_017E', 'B14007_018E', 'B14007A_003E', 'B14007A_004E', 'B14007A_005E', 'B14007A_006E', 'B14007A_007E', 'B14007A_008E', 'B14007A_009E', 'B14007A_010E', 'B14007A_011E', 'B14007A_012E', 'B14007A_013E', 'B14007A_014E', 'B14007A_015E', 'B14007A_016E', 'B14007A_017E', 'B14007A_018E', 'B14007A_019E', 'B14007B_002E', 'B14007B_003E', 'B14007B_004E', 'B14007B_005E', 'B14007B_006E', 'B14007B_007E', 'B14007B_008E', 'B14007B_009E', 'B14007B_010E', 'B14007B_011E', 'B14007B_012E', 'B14007B_013E', 'B14007B_014E', 'B14007B_015E', 'B14007B_016E', 'B14007B_017E', 'B14007B_018E', 'B14007B_019E', 'B14007C_002E', 'B14007C_003E', 'B14007C_004E', 'B14007C_005E', 'B14007C_006E', 'B14007C_007E', 'B14007C_008E', 'B14007C_009E', 'B14007C_010E', 'B14007C_011E', 'B14007C_012E', 'B14007C_013E', 'B14007C_014E', 'B14007C_015E', 'B14007C_016E', 'B14007C_017E', 'B14007C_018E', 'B14007C_019E', 'B14007D_002E', 'B14007D_003E', 'B14007D_004E', 'B14007D_005E', 'B14007D_006E', 'B14007D_007E', 'B14007D_008E', 'B14007D_009E', 'B14007D_010E', 'B14007D_011E', 'B14007D_012E', 'B14007D_013E', 'B14007D_014E', 'B14007D_015E', 'B14007D_016E', 'B14007D_017E', 'B14007D_018E', 'B14007D_019E', 'B19054_002E', 'B19054_003E', 'B19055_002E', 'B19055_003E', 'B19056_002E', 'B19056_003E', 'B19057_002E', 'B19057_003E', 'B19058_002E', 'B19058_003E', 'B19059_002E', 'B19059_003E', 'B19060_002E', 'B19060_003E', 'B08016_002E', 'B08016_003E', 'B08016_004E', 'B08016_005E', 'B08016_006E', 'B08016_007E', 'B08016_008E', 'B08016_009E', 'B08016_010E', 'B08016_011E', 'B08016_012E', 'B08016_013E', 'B08016_014E', 'B08016_015E', 'B08016_016E', 'B08016_017E', 'B08016_018E', 'B08016_019E', 'B08016_020E', 'B08016_021E', 'B08016_022E', 'B08016_023E', 'B08017_002E', 'B08017_003E', 'B08017_004E', 'B08017_005E', 'B08017_006E', 'B08017_007E', 'B08017_008E', 'B08017_009E', 'B08017_010E', 'B08017_011E', 'B08017_012E', 'B08017_013E', 'B08017_015E', 'B08017_016E', 'B08017_017E', 'B08017_018E', 'B08017_019E', 'B08017_020E', 'B08017_021E', 'B08017_022E', 'B08017_023E', 'B08018_002E', 'B08018_003E', 'B08018_004E', 'B08018_005E', 'B08018_006E', 'B08018_007E', 'B08018_008E', 'B08101_049E', 'B08105A_007E', 'B08105B_007E', 'B08105C_007E', 'B08105D_007E', 'B08105E_007E', 'B08105F_007E', 'B08105G_007E', 'B08105H_007E', 'B08105I_007E', 'B08111_031E', 'B08113_049E', 'B08119_055E', 'B08121_007E', 'B08122_025E', 'B08122_026E', 'B08122_027E', 'B08122_028E', 'B24080_003E', 'B24080_004E', 'B24080_005E', 'B24080_006E', 'B24080_007E', 'B24080_008E', 'B24080_009E', 'B24080_010E', 'B24080_011E', 'B24080_012E', 'B24080_013E', 'B24080_014E', 'B24080_015E', 'B24080_016E', 'B24080_017E', 'B24080_018E', 'B24080_019E', 'B24080_020E', 'B24080_021E', 'B24081_001E', 'B24081_002E', 'B24081_003E', 'B24081_004E', 'B24081_005E', 'B24081_006E', 'B24081_007E', 'B24081_008E', 'B24081_009E', 'B24082_001E', 'B24082_002E', 'B24082_003E', 'B24082_004E', 'B24082_005E', 'B24082_006E', 'B24082_007E', 'B24082_008E', 'B24082_009E', 'B24082_010E', 'B24082_011E', 'B24082_012E', 'B24082_013E', 'B24082_014E', 'B24082_015E', 'B24082_016E', 'B24082_017E', 'B24082_018E', 'B24090_001E', 'B24090_002E', 'B24090_003E', 'B24090_004E', 'B24090_005E', 'B24090_006E', 'B24090_007E', 'B24090_008E', 'B24090_009E', 'B24090_010E', 'B24090_011E', 'B24090_012E', 'B24090_013E', 'B24090_014E', 'B24090_015E', 'B24090_016E', 'B24090_017E', 'B24090_018E', 'B24090_019E', 'B24090_020E', 'B24090_021E', 'B24091_001E', 'B24091_002E', 'B24091_003E', 'B24091_004E', 'B24091_005E', 'B24091_006E', 'B24091_007E', 'B24091_008E', 'B24091_009E', 'B24092_001E', 'B24092_002E', 'B24092_003E', 'B24092_004E', 'B24092_005E', 'B24092_006E', 'B24092_007E', 'B24092_008E', 'B24092_009E', 'B24092_010E', 'B24092_011E', 'B24092_012E', 'B24092_013E', 'B24092_014E', 'B24092_015E', 'B24092_016E', 'B24092_017E', 'B24092_018E', 'C24040_001E', 'C24040_002E', 'C24040_003E', 'C24040_004E', 'C24040_005E', 'C24040_006E', 'C24040_007E', 'C24040_008E', 'C24040_009E', 'C24040_010E', 'C24040_011E', 'C24040_012E', 'C24040_013E', 'C24040_014E', 'C24040_015E', 'C24040_016E', 'C24040_017E', 'C24040_018E', 'C24040_019E', 'C24040_020E', 'C24040_021E', 'C24040_022E', 'C24040_023E', 'C24040_024E', 'C24040_025E', 'C24040_026E', 'C24040_027E', 'C24040_028E', 'C24040_029E', 'C24040_030E', 'C24040_031E', 'C24040_032E', 'C24040_033E', 'C24040_034E', 'C24040_035E', 'C24040_036E', 'C24040_037E', 'C24040_038E', 'C24040_039E', 'C24040_040E', 'C24040_041E', 'C24040_042E', 'C24040_043E', 'C24040_044E', 'C24040_045E', 'C24040_046E', 'C24040_047E', 'C24040_048E', 'C24040_049E', 'C24040_050E', 'C24040_051E', 'C24040_052E', 'C24040_053E', 'C24040_054E', 'C24040_055E', 'C24050_001E', 'C24050_002E', 'C24050_003E', 'C24050_004E', 'C24050_005E', 'C24050_006E', 'C24050_007E', 'C24050_008E', 'C24050_009E', 'C24050_010E', 'C24050_011E', 'C24050_012E', 'C24050_013E', 'C24050_014E', 'C24050_015E', 'C24050_016E', 'C24050_017E', 'C24050_018E', 'C24050_019E', 'C24050_020E', 'C24050_021E', 'C24050_022E', 'C24050_023E', 'C24050_024E', 'C24050_025E', 'C24050_026E', 'C24050_027E', 'C24050_028E', 'C24050_029E', 'C24050_030E', 'C24050_031E', 'C24050_032E', 'C24050_033E', 'C24050_034E', 'C24050_035E', 'C24050_036E', 'C24050_037E', 'C24050_038E', 'C24050_039E', 'C24050_040E', 'C24050_041E', 'C24050_042E', 'C24050_043E', 'C24050_044E', 'C24050_045E', 'C24050_046E', 'C24050_047E', 'C24050_048E', 'C24050_049E', 'C24050_050E', 'C24050_051E', 'C24050_052E', 'C24050_053E', 'C24050_054E', 'C24050_055E', 'C24050_056E', 'C24050_057E', 'C24050_058E', 'C24050_059E', 'C24050_060E', 'C24050_061E', 'C24050_062E', 'C24050_063E', 'C24050_064E', 'C24050_065E', 'C24050_066E', 'C24050_067E', 'C24050_068E', 'C24050_069E', 'C24050_070E', 'C24050_071E', 'C24050_072E', 'C24050_073E', 'C24050_074E', 'C24050_075E', 'C24050_076E', 'C24050_077E', 'C24050_078E', 'C24050_079E', 'C24050_080E', 'C24050_081E', 'C24050_082E', 'C24050_083E', 'C24050_084E', 'C24060_001E', 'C24060_002E', 'C24060_003E', 'C24060_004E', 'C24060_005E', 'C24060_006E', 'C24060_007E', 'C24060_008E', 'C24060_009E', 'C24060_010E', 'C24060_011E', 'C24060_012E', 'C24060_013E', 'C24060_014E', 'C24060_015E', 'C24060_016E', 'C24060_017E', 'C24060_018E', 'C24060_019E', 'C24060_020E', 'C24060_021E', 'C24060_022E', 'C24060_023E', 'C24060_024E', 'C24060_025E', 'C24060_026E', 'C24060_027E', 'C24060_028E', 'C24060_029E', 'C24060_030E', 'C24060_031E', 'C24060_032E', 'C24060_033E', 'C24060_034E', 'C24060_035E', 'C24060_036E', 'C24070_001E', 'C24070_002E', 'C24070_003E', 'C24070_004E', 'C24070_005E', 'C24070_006E', 'C24070_007E', 'C24070_008E', 'C24070_009E', 'C24070_010E', 'C24070_011E', 'C24070_012E', 'C24070_013E', 'C24070_014E', 'C24070_015E', 'C24070_016E', 'C24070_017E', 'C24070_018E', 'C24070_019E', 'C24070_020E', 'C24070_021E', 'C24070_022E', 'C24070_023E', 'C24070_024E', 'C24070_025E', 'C24070_026E', 'C24070_027E', 'C24070_029E', 'C24070_030E', 'C24070_031E', 'C24070_032E', 'C24070_033E', 'C24070_034E', 'C24070_035E', 'C24070_036E', 'C24070_037E', 'C24070_038E', 'C24070_039E', 'C24070_040E', 'C24070_041E', 'C24070_043E', 'C24070_044E', 'C24070_045E', 'C24070_046E', 'C24070_047E', 'C24070_048E', 'C24070_049E', 'C24070_050E', 'C24070_051E', 'C24070_052E', 'C24070_053E', 'C24070_054E', 'C24070_055E', 'C24070_057E', 'C24070_058E', 'C24070_059E', 'C24070_060E', 'C24070_061E', 'C24070_062E', 'C24070_063E', 'C24070_064E', 'C24070_065E', 'C24070_066E', 'C24070_067E', 'C24070_068E', 'C24070_069E', 'C24070_070E', 'C24070_071E', 'C24070_072E', 'C24070_073E', 'C24070_074E', 'C24070_075E', 'C24070_076E', 'C24070_077E', 'C24070_078E', 'C24070_079E', 'C24070_080E', 'C24070_081E', 'C24070_082E', 'C24070_083E', 'C24070_084E', 'B27001_004E', 'B27001_005E', 'B27001_006E', 'B27001_007E', 'B27001_008E', 'B27001_009E', 'B27001_010E', 'B27001_011E', 'B27001_012E', 'B27001_013E', 'B27001_014E', 'B27001_015E', 'B27001_016E', 'B27001_017E', 'B27001_018E', 'B27001_019E', 'B27001_020E', 'B27001_021E', 'B27001_022E', 'B27001_023E', 'B27001_024E', 'B27001_025E', 'B27001_026E', 'B27001_027E', 'B27001_028E', 'B27001_029E', 'B27001_030E', 'B27001_031E', 'B27001_032E', 'B27001_033E', 'B27001_034E', 'B27001_035E', 'B27001_036E', 'B27001_037E', 'B27001_038E', 'B27001_039E', 'B27001_040E', 'B27001_041E', 'B27001_042E', 'B27001_043E', 'B27001_044E', 'B27001_045E', 'B27001_046E', 'B27001_047E', 'B27001_048E', 'B27001_049E', 'B27001_050E', 'B27001_051E', 'B27001_052E', 'B27001_053E', 'B27001_054E', 'B27001_055E', 'B27001_056E', 'B27001_057E', 'Bachelorsplus', 'Households_with_Income_lessthan35k', 'Households_with_Income_100kplus', 'Pct_of_housing_units_in_4plus_unit_buildings']
feat_cols = []
for x in feature_columns:
x.strip()
feat_cols.append(tf.feature_column.numeric_column(x))
# # Normalize data
def norm(x):
return (x - train_stats['mean']) / train_stats['std']
X_train = norm(train_dataset)
y_train = train_labels
X_test = norm(test_dataset)
y_test = test_labels
# Define the input function
BATCH_SIZE = 10
epochs = None
input_func=tf.estimator.inputs.pandas_input_fn(x=X_train,y=y_train,batch_size=BATCH_SIZE,num_epochs=None,shuffle=True)
eval_input_func = tf.estimator.inputs.pandas_input_fn(x=X_test,
y=y_test,
batch_size=10,
num_epochs=1,
shuffle=False)
test_input_func = tf.estimator.inputs.pandas_input_fn(x= X_test,
batch_size=100,
num_epochs=1,
shuffle=False)
dnn_regressor = tf.estimator.DNNRegressor(
feature_columns=feat_cols,
hidden_units=[1024, 512, 256],
optimizer=tf.train.ProximalAdagradOptimizer(
learning_rate=0.01,
l1_regularization_strength=0.01
))
# Train model
dnn_regressor.train(input_fn=input_func,steps=1000)
# Predictions
pred_input_func=tf.estimator.inputs.pandas_input_fn(x=X_test,batch_size=BATCH_SIZE,num_epochs=1,shuffle=False)
predictions=list(dnn_regressor.predict(input_fn=pred_input_func))
# Clear Cache
all_data = pd.DataFrame()
all_data4 = pd.DataFrame()
X_train = pd.DataFrame()
y_train = pd.DataFrame()
X_test = pd.DataFrame()
y_test = pd.DataFrame()
train_dataset = pd.DataFrame()
test_dataset = pd.DataFrame()
train_stats = pd.DataFrame()
train_labels = pd.DataFrame()
test_labels = pd.DataFrame()
#Normalize function
def norm(x,train_stats):
return (x - train_stats['mean']) / train_stats['std']
#Append_machine_learning_outputs
def append_ML_outputs(dataframe, year, dnn_regressor):
dataframe = dataframe[dataframe['Year'].isin([year])]
print(len(dataframe))
cols = dataframe.columns.tolist()
cols = cols[-2:] + cols[:-2]
cols.insert(0, cols.pop(cols.index('LocationplusType')))
dataframe=dataframe[cols]
dataframe = dataframe.replace([np.inf, -np.inf], np.nan)
dataframe = dataframe.fillna(0)
print(len(dataframe))
stats = dataframe.describe()
stats = stats.transpose()
dataframe3 = dataframe.drop(['LocationplusType','Tract_number','Year'],axis=1)
print(len(dataframe3))
normed_data = norm(dataframe3,stats)
normed_data = pd.merge(dataframe[['LocationplusType','Tract_number','Year']],normed_data,left_index=True,right_index=True)
dataframe4 = normed_data.drop(['LocationplusType','Year_x','Tract_number_x'],axis=1)
dataframe4 = dataframe4.drop(['3_year_appreciation'],axis=1)
print(len(dataframe4))
pred_input_func=tf.estimator.inputs.pandas_input_fn(x=dataframe4,batch_size=BATCH_SIZE,num_epochs=1,shuffle=False)
example_result = pd.DataFrame(dnn_regressor.predict(input_fn=pred_input_func))
orig_data = dataframe.reset_index(drop=True)
df_test = pd.merge(orig_data[['LocationplusType','Year']],example_result,left_index=True,right_index=True)
df_test.rename(columns={0:'Predicted Growth Rank'}, inplace=True)
return df_test
all_data.columns = all_data.columns.str.replace('+', 'plus')
all_data.columns = all_data.columns.str.replace(')', ' ')
all_data.columns = all_data.columns.str.replace('!', ' ')
all_data.columns = all_data.columns.str.replace('(', ' ')
all_data.columns = all_data.columns.str.replace(',', ' ')
all_data.columns = all_data.columns.str.replace(' ', '_')
all_data.columns = all_data.columns.str.replace('__', '_')
all_data.columns = all_data.columns.str.replace('%', 'percentage')
all_data.columns = all_data.columns.str.replace('$', '')
all_data.columns = all_data.columns.str.replace('<', 'lessthan')
# len(df)
df_list=[]
for year in all_data['Year'].unique():
df_list.append(append_ML_outputs(all_data, year, dnn_regressor))
df_final = pd.concat(df_list)
# Uncomment line below to write a new file
df_final.to_csv('predicted_values.csv',index=False)
EDIT:
I am now including a GitHub repo which you can find here. This will include the data and the source code. To clarify the issue is in regards to the last block of code in the notebook in which I after a certain number of predicted values I get the same predicted values.
UPDATED EDIT:
I realized the large ML_DATA.csv file was not in the github link I provided. The file is 3.6 GB so I had to zip it into a file and then push it. All the data should be there now.

Related

How to process dataframe from a list of csv with pandas

I am writing a program to process a set of answers from a csv. The csv is constructed like so:
I have written a program that loads the data:
positive results,resources,priorities,team focus,benefits,help,action steps,today,tomorrow,yesterday
[studied],[schaums outlines], [Continue working on proof of concept for commitment process],[Continue working on proof of concept for commitment process],[By completing the commitment process demonstration I will have something to show stakeholders and I will feel good],[I need resources advice support and financial assistance so that I can continue to develop my project.],[Schedule time to study],[11:00AM review the Art of Discipline
12:00pm lunch
- contact BOA
1:00pm (break and catch up)
1:15pm continue developing the commitment process.
- follow up with XXX
3:00pm break (snack)
3:15pm prepare for meeting with Dr. XXX
3:30pm meeting with Dr. XXX
4:00pm PDMP
5:00pm Run
6:00pm Dinner
6:30pm work on communication skills
7:00pm reading
8:00pm Journaling
8:30pm reading],[6:00-7:30am study
7:30am PDMP
8:00am review email
9:00am review priorities
9:30am meeting with XXXX
12:00pm lunch
5:00pm run
6:00pm dinner
6:30pm PDMP
8:00pm journaling
8:30pm reading]
,[
5:00AM woke up
6:00am reading/ developing communication skills
7:30-12:00pm
1:00pm lunch, dropped off suit to get fitted
2:00pm weekly planning
3:00pm leaving for XXXX
4:00pm dinner
4:30pm budgeting review
5:00pm drove home
5:30-6:30pm planning
7:30pm pack up belongings
8:00pm journaling
8:30pm read]
import spacy
nlp = spacy.load('en_core_web_sm')
punctuations = string.punctuation
def cleanup_text(docs, logging=False):
texts = []
counter = 1
for doc in docs:
if counter % 1000 == 0 and logging:
print("Processed %d out of %d documents." % (counter, len(docs)))
counter += 1
doc = nlp(doc, disable=['parser', 'ner'])
tokens = [tok.lemma_.lower().strip() for tok in doc if tok.lemma_ != '-PRON-']
tokens = [tok for tok in tokens if tok not in stopwords and tok not in punctuations]
tokens = ' '.join(tokens)
texts.append(tokens)
return pd.Series(texts)
positive_text = [text for text in train[train['benefits'] == 'good']['action steps']]
negative_text = [text for text in train[train['benefits'] == 'bad']['action steps']]
positive_clean = cleanup_text(positive_text)
positive_clean = ' '.join(positive_text).split()
negative_clean = cleanup_text(negative_text)
negative_clean = ' '.join(negative_clean).split()
# 3. Calculate total positive words and negative words
positive_counts = Counter(positive_clean)
negative_counts = Counter(negative_clean)
positive_common_words = [word[0] for word in positive_counts.most_common(20)]
negative_common_counts = [word[1] for word in negative_counts.most_common(20)]
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.base import TransformerMixin
from sklearn.pipeline import Pipeline
from sklearn.svm import LinearSVC
from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS as sklearn_stop_words
from sklearn.metrics import accuracy_score
from nltk.corpus import stopwords
import string
import re
import spacy
spacy.load('en_core_web_sm')
from spacy.lang.en import English
parser = English()
STOPLIST = set(stopwords.words('english') + list(sklearn_stop_words))
SYMBOLS = " ".join(string.punctuation).split(" ") + ["-", "...", "”", "”"]
class CleanTextTransformer(TransformerMixin):
def transform(self, X, **transform_params):
return [cleanText(text) for text in X]
def fit(self, X, y=None, **fit_params):
return self
def get_params(self, deep=True):
return {}
def cleanText(text):
text = text.strip().replace("\n", " ").replace("\r", " ")
text = text.lower()
return text
def tokenizeText(sample):
tokens = parser(sample)
lemmas = []
for tok in tokens:
lemmas.append(tok.lemma_.lower().strip() if tok.lemma_ != "-PRON-" else tok.lower_)
tokens = lemmas
tokens = [tok for tok in tokens if tok not in STOPLIST]
tokens = [tok for tok in tokens if tok not in SYMBOLS]
return tokens
def printNMostInformative(vectorizer, clf, N):
feature_names = vectorizer.get_feature_names_out()
coefs_with_fns = sorted(zip(clf.coef_[0], feature_names))
topClass1 = coefs_with_fns[:N]
topClass2 = coefs_with_fns[:-(N + 1):-1]
print("Class 1 best: ")
for feat in topClass1:
print(feat)
print("Class 2 best: ")
for feat in topClass2:
print(feat)
vectorizer = CountVectorizer(tokenizer=tokenizeText, ngram_range=(1,1))
clf = LinearSVC()
pipe = Pipeline([('cleanText', CleanTextTransformer()), ('vectorizer', vectorizer), ('clf', clf)])
# data
train1 = train['benefits'].tolist()
labelsTrain1 = train['action steps'].tolist()
test1 = test['benefits'].tolist()
labelsTest1 = test['action steps'].tolist()
# train
pipe.fit(train1, labelsTrain1)
# test
preds = pipe.predict(test1)
print("accuracy:", accuracy_score(labelsTest1, preds))
print("Top 10 features used to predict: ")
printNMostInformative(vectorizer, clf, 10)
pipe = Pipeline([('cleanText', CleanTextTransformer()), ('vectorizer', vectorizer)])
transform = pipe.fit_transform(train1, labelsTrain1)
vocab = vectorizer.get_feature_names_out()
for i in range(len(train1)):
s = ""
indexIntoVocab = transform.indices[transform.indptr[i]:transform.indptr[i+1]]
numOccurences = transform.data[transform.indptr[i]:transform.indptr[i+1]]
for idx, num in zip(indexIntoVocab, numOccurences):
s += str((vocab[idx], num))
from sklearn import metrics
print(metrics.classification_report(labelsTest1, preds,
target_names=df['benefits'].unique()))
I would like to use spacy after loading the data to process the positive and negative sentiment from the text content.
Expected:
Data showing the text from the benefits and action steps columns.
Actual:
File "/Users/evangertis/development/PythonAutomation/IGTS/TwilioMessaging/accountability.py", line 214, in <module>
pipe.fit(train1, labelsTrain1)
File "/usr/local/lib/python3.9/site-packages/sklearn/pipeline.py", line 390, in fit
Xt = self._fit(X, y, **fit_params_steps)
File "/usr/local/lib/python3.9/site-packages/sklearn/pipeline.py", line 348, in _fit
X, fitted_transformer = fit_transform_one_cached(
File "/usr/local/lib/python3.9/site-packages/joblib/memory.py", line 352, in __call__
return self.func(*args, **kwargs)
File "/usr/local/lib/python3.9/site-packages/sklearn/pipeline.py", line 891, in _fit_transform_one
res = transformer.fit_transform(X, y, **fit_params)
File "/usr/local/lib/python3.9/site-packages/sklearn/base.py", line 847, in fit_transform
return self.fit(X, y, **fit_params).transform(X)
File "/Users/evangertis/development/PythonAutomation/IGTS/TwilioMessaging/accountability.py", line 170, in transform
return [cleanText(text) for text in X]
File "/Users/evangertis/development/PythonAutomation/IGTS/TwilioMessaging/accountability.py", line 170, in <listcomp>
return [cleanText(text) for text in X]
File "/Users/evangertis/development/PythonAutomation/IGTS/TwilioMessaging/accountability.py", line 177, in cleanText
text = text.strip().replace("\n", " ").replace("\r", " ")
AttributeError: 'float' object has no attribute 'strip'

Scikit Learn fit(): Setting an array element with a sequence fit

I am trying to call scikit learn fit functions on dataframes where the elements of each column are numpy arrays. However, I get the error "setting an array element with a sequence," presumably because I am trying to call fit on a dataframe of arrays rather than scalar values. How do I work around this? I'd really appreciate some help.
Here is my code. You can find the data I'm using here: https://competitions.codalab.org/competitions/21163
training_data = pd.read_csv('/train.tsv', sep='\t')
testing_data = pd.read_csv('/dev.tsv', sep='\t')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased',do_lower_case=True,max_length=1024)
model = BertModel.from_pretrained('bert-base-uncased')
model = model.to(device)
# These are used to map the data to their appropriate column on each pass
pomt_train_x = pd.DataFrame(columns=["claim", "reason", "category", "speaker", "checker", "tags", "claim entities", "article title"])
feature_dict = {1: "claim", 4: "reason", 5: "category", 6: "speaker", 7: "checker", 8: "tags", 9: "claim entities", 10: "article title"}
# Sort the data appropriately.
for i, data in enumerate(training_data[training_data.columns].to_numpy()):
if 'pomt' in data[0]:
appended_data = {}
for j, sentence in enumerate(data):
if j in feature_dict:
inputs = tokenizer(str(sentence), return_tensors="pt", max_length=512, pad_to_max_length=True).to(device)
outputs = model(**inputs)
appended_data[feature_dict[j]] = outputs.last_hidden_state[:,0][0].cpu().detach().numpy()
pomt_train_x = pomt_train_x.append(appended_data, ignore_index=True)
print(f"{i + 1} out of {training_data.index.stop} from training")
count = 0
# append testing data to training data
for i, data in enumerate(testing_data[testing_data.columns].to_numpy()):
if 'pomt' in data[0]:
appended_data = {}
for j, sentence in enumerate(data):
if j in feature_dict:
inputs = tokenizer(str(sentence), return_tensors="pt", max_length=512, pad_to_max_length=True).to(device)
outputs = model(**inputs)
appended_data[feature_dict[j]] = outputs.last_hidden_state[:,0][0].cpu().detach().numpy()
pomt_train_x = pomt_train_x.append(appended_data, ignore_index=True)
print(f"{i + 1} out of {testing_data.index.stop} from testing")
count += 1
# Map the possible labels to an emotion
positive_set = set(['half-true', 'correct attribution!', 'correct', 'determination: barely true', 'factscan score: true',
'correct attribution', 'mostly true', 'mostly-correct', 'truth!', 'partially true', 'half true',
'mostly truth!', 'determination: true', 'true messages', 'authorship confirmed!', 'verdict: true',
'mostly_true', 'determination: mostly true', 'confirmed authorship!', 'conclusion: accurate', 'accurate',
'true', 'partly true', 'fact', 'full flop', 'in-the-green', 'verified'])
negative_set = set({'fake news', 'verdict: false', '3 pinnochios', 'fiction!', 'bogus warning', 'we rate this claim false',
'determination: false', 'disputed!', 'false', 'fiction', 'a lot of baloney', '2 pinnochios', 'some baloney',
'mostly_false', 'cherry picks', 'miscaptioned', 'misleading!', 'misleading recommendations', 'mostly fiction!',
'mostly false', 'a little baloney', 'fiction! & satire!', 'conclusion: false', 'rating: false',
'determination: misleading', 'promise broken', '4 pinnochios', 'misleading', 'promise kept',
'misattributed', 'fake', 'previously truth! now resolved!','incorrect attribution!', 'incorrect',
'spins the facts', 'determination: a stretch', 'factscan score: misleading', 'pants on fire!',
'factscan score: false', 'exaggerates', 'outdated', 'facebook scams', 'unsupported', 'opinion!',
'verdict: unsubstantiated', 'scam', 'virus!', 'no flip', 'scam!', 'unverified', 'distorts the facts', 'outdated!'
'understated', 'no evidence', 'unproven!', 'inaccurate attribution!', 'statirical reports', 'unproven', 'exaggerated',
'determination: huckster propaganda', 'grass roots movement!', 'commentary!', 'in-the-red', 'unsubstantiated messages',})
neutral_set = set({'truth! & fiction!', 'conclusion: unclear', '1', 'unobservable', 'needs context', 'truth! & disputed!', 'half flip',
'0', 'in-between', '4', 'None', '2', 'none', 'investigation pending!','not the whole story', '10','in the works',
'truth! & misleading!', '3', 'mixture', 'not yet rated', 'legend', 'stalled', 'truth! & unproven!', 'truth! & outdated!',
'compromise'})
# Read in the labels for the appropriate data
pomt_train_y = pd.DataFrame(columns=["label"])
sign_to_append = 0
for i, data in enumerate(training_data[training_data.columns].to_numpy()):
if 'pomt' in data[0]:
if data[2] in positive_set:
sign_to_append = 1
elif data[2] in negative_set:
sign_to_append = -1
else:
sign_to_append = 0
pomt_train_y = pomt_train_y.append({'label':sign_to_append}, ignore_index=True)
print(f"{i + 1} out of {training_data.index.stop} from training")
# append testing data to training data
for i, data in enumerate(testing_data[testing_data.columns].to_numpy()):
if 'pomt' in data[0]:
if data[2] in positive_set:
sign_to_append = 1
elif data[2] in negative_set:
sign_to_append = -1
else:
sign_to_append = 0
pomt_train_y = pomt_train_y.append({'label':sign_to_append}, ignore_index=True)
print(f"{i + 1} out of {testing_data.index.stop} from testing")
pomt_X_train, pomt_X_test, pomt_Y_train, pomt_Y_test = train_test_split(pomt_train_x, pomt_train_y, test_size= (count / pomt_train_x.shape[0]), stratify=pomt_train_y)
pomt_Y_train = pomt_Y_train.astype("int")
pomt_Y_test = pomt_Y_test.astype("int")
# One Vs. One Multiclass Classification
clf = OneVsOneClassifier(SVC(C = 1, verbose=True))
# Fit to Training Data
clf.fit(pomt_X_train, pomt_Y_train)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
TypeError: only size-1 arrays can be converted to Python scalars
The above exception was the direct cause of the following exception:
ValueError Traceback (most recent call last)
<ipython-input-22-3314e23093e3> in <module>()
1 # Fit to Training Data
----> 2 clf.fit(pomt_X_train.squeeze(), pomt_Y_train)
3
4 # Training data accuracy
5 X_train_prediction = clf.predict(pomt_X_train)
4 frames
/usr/local/lib/python3.7/dist-packages/pandas/core/generic.py in __array__(self, dtype)
1991
1992 def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
-> 1993 return np.asarray(self._values, dtype=dtype)
1994
1995 def __array_wrap__(
ValueError: setting an array element with a sequence.
I figured out what to do on my own end. I basically just created a column in the dataframe to reflect each element of the list, not each list itself. It's a bit unintuitive but it works.

Split a CSV in three parts and calculating the mean

I have a file containing:
Time 60Ni 61Ni 62Ni 63Cu 64Ni 65Cu 66Zn
0. 9.13242244720459 0.406570166349411 1.326429009437561 5.754200458526611 0.4233334958553314 2.68562912940979 4.148788005113602e-002
8.390999794006348 9.187464714050293 0.4089393615722656 1.334462523460388 5.790649890899658 0.425884485244751 2.702604055404663 4.17313240468502e-002
16.78300094604492 9.254316329956055 0.4119723737239838 1.344084143638611 5.832504749298096 0.428943395614624 2.722275018692017 4.203101620078087e-002
25.17399978637695 9.19857120513916 0.4094997346401215 1.336091756820679 5.791898727416992 0.4264563024044037 2.703336715698242 4.185733571648598e-002
33.56499862670898 9.194388389587402 0.4092871248722076 1.335391044616699 5.794968605041504 0.4264419078826904 2.704529047012329 4.192239791154862e-002
41.95600128173828 9.162041664123535 0.4078944325447083 1.330722570419312 5.766440868377686 0.425002932548523 2.691519498825073 4.182799160480499e-002
50.34700012207031 9.190646171569824 0.4091125726699829 1.334963202476502 5.786285877227783 0.426413893699646 2.700882434844971 4.196327552199364e-002
58.73799896240234 9.211565971374512 0.4100649058818817 1.337916374206543 5.8003830909729 0.4273969829082489 2.707314252853394 4.207673668861389e-002
67.12799835205078 9.240947723388672 0.4113766849040985 1.342136979103088 5.822870254516602 0.4287911653518677 2.717630624771118 4.222121462225914e-002
75.51899719238281 9.208130836486816 0.4099342525005341 1.337505698204041 5.802256584167481 0.4273860156536102 2.708084583282471 4.214133694767952e-002
83.91000366210938 9.196262359619141 0.4093911945819855 1.335786700248718 5.799176692962647 0.4268693923950195 2.706451416015625 4.215647280216217e-002
92.30100250244141 9.213265419006348 0.4101545214653015 1.338128447532654 5.807514190673828 0.4277283549308777 2.71068549156189 4.221603646874428e-002
100.6920013427734 9.163029670715332 0.407885879278183 1.330831050872803 5.775251865386963 0.4254410266876221 2.695534229278565 4.204751178622246e-002
109.0839996337891 9.144490242004395 0.4070722758769989 1.328153848648071 5.764679908752441 0.4246650040149689 2.690402746200562 4.198652133345604e-002
117.4749984741211 9.114171028137207 0.4057718515396118 1.32369875907898 5.745044231414795 0.4233448505401611 2.681406497955322 4.190905019640923e-002
125.8659973144531 9.149589538574219 0.407274603843689 1.328810453414917 5.766050815582275 0.4248199760913849 2.691139459609985 4.200970754027367e-002
134.2570037841797 9.168668746948242 0.4081465899944305 1.331702351570129 5.777794361114502 0.4256783723831177 2.696741819381714 4.206346347928047e-002
142.6479949951172 9.11380672454834 0.4057287871837616 1.323864817619324 5.740524291992188 0.4232001006603241 2.67945122718811 4.187140986323357e-002
151.0390014648438 9.100893974304199 0.4051263332366943 1.321851253509522 5.729655265808106 0.4226666390895844 2.674278259277344 4.182597994804382e-002
159.4299926757813 9.072731971740723 0.4039073586463928 1.317763328552246 5.713830471038818 0.4213792979717255 2.666974782943726 4.169051349163055e-002
167.8209991455078 9.186164855957031 0.4089057147502899 1.334116697311401 5.786634922027588 0.4264728426933289 2.700879812240601 4.211126267910004e-002
176.2129974365234 9.13982105255127 0.4068569839000702 1.327479124069214 5.76115083694458 0.4244593381881714 2.688895463943481 4.199059307575226e-002
184.60400390625 9.146007537841797 0.4071221053600311 1.328468441963196 5.762693881988525 0.4247534275054932 2.689634084701538 4.1985172778368e-002
192.9949951171875 9.18150806427002 0.4086942672729492 1.333438873291016 5.785679817199707 0.4262394905090332 2.700178623199463 4.207265004515648e-002
201.3860015869141 9.134004592895508 0.4066038727760315 1.326677560806274 5.753909587860107 0.424109697341919 2.685543775558472 4.191514849662781e-002
209.7769927978516 9.192599296569824 0.4091922044754028 1.335113883018494 5.792657852172852 0.4266164898872376 2.703598737716675 4.208896681666374e-002
218.1679992675781 9.166966438293457 0.4080702364444733 1.331447958946228 5.776984214782715 0.4254603683948517 2.696239709854126 4.19912114739418e-002
226.5590057373047 9.166423797607422 0.4080766439437866 1.331416010856628 5.771696090698242 0.4254250526428223 2.693812847137451 4.191195592284203e-002
234.9510040283203 9.122139930725098 0.4060815274715424 1.325031995773315 5.74381160736084 0.4234589040279388 2.680959224700928 4.174426198005676e-002
243.3419952392578 9.178729057312012 0.4085982143878937 1.333097338676453 5.783432006835938 0.4259471595287323 2.699411153793335 4.196531698107719e-002
251.7330017089844 9.196023941040039 0.4093179702758789 1.335668444633484 5.792133331298828 0.4266210496425629 2.703416347503662 4.196692258119583e-002
260.1239929199219 9.195613861083984 0.4093446731567383 1.33561098575592 5.790852546691895 0.4264806509017944 2.702755451202393 4.19374406337738e-002
268.5150146484375 9.124658584594727 0.4061901867389679 1.325218439102173 5.749895572662354 0.4233379364013672 2.683579206466675 4.166891798377037e-002
276.906005859375 9.071592330932617 0.4038631021976471 1.317633748054504 5.711780071258545 0.4209088683128357 2.666091680526733 4.146279022097588e-002
285.2969970703125 9.090703010559082 0.4047099351882935 1.320350289344788 5.724553108215332 0.4218063056468964 2.671880960464478 4.148663952946663e-002
293.68798828125 9.049410820007324 0.4028385281562805 1.314435601234436 5.699662208557129 0.4198987782001495 2.660340070724487 4.135752841830254e-002
302.0790100097656 9.158493995666504 0.4077092707157135 1.330130934715271 5.770212650299072 0.4247544705867767 2.693133354187012 4.172087088227272e-002
310.4700012207031 9.294267654418945 0.4137440025806427 1.350019454956055 5.85582971572876 0.4307662844657898 2.733232498168945 4.217509180307388e-002
318.8609924316406 9.266000747680664 0.4124558866024017 1.34581983089447 5.838682651519775 0.429353654384613 2.724989175796509 4.206011816859245e-002
327.2520141601563 9.227903366088867 0.4107420146465302 1.340180039405823 5.813295841217041 0.4277106523513794 2.713207006454468 4.191378504037857e-002
335.6430053710938 9.248990058898926 0.4117128551006317 1.343235015869141 5.836093425750732 0.4286618232727051 2.72357988357544 4.200825467705727e-002
344.0339965820313 9.200018882751465 0.4095089137554169 1.336208343505859 5.805673122406006 0.4264824092388153 2.709526300430298 4.185647144913673e-002
352.4259948730469 9.162602424621582 0.4079090356826782 1.330750703811646 5.780079364776611 0.4248281121253967 2.697546243667603 4.17003221809864e-002
360.8169860839844 9.165441513061523 0.4079831540584564 1.331099987030029 5.780121326446533 0.424967348575592 2.697607517242432 4.169800505042076e-002
369.2070007324219 9.242767333984375 0.4114582240581513 1.342459917068481 5.828019142150879 0.4283893704414368 2.719994068145752 4.194791615009308e-002
377.5989990234375 9.211434364318848 0.4100139439105988 1.337894320487976 5.801908493041992 0.4268820583820343 2.708046913146973 4.185103997588158e-002
385.989990234375 9.168110847473145 0.4081266224384308 1.33171010017395 5.772421360015869 0.4250668585300446 2.694308280944824 4.166359454393387e-002
394.3810119628906 9.162002563476563 0.4078731238842011 1.330778479576111 5.770648956298828 0.4247135519981384 2.693532466888428 4.165602847933769e-002
402.7720031738281 9.219051361083984 0.4104039072990418 1.339054584503174 5.805272579193115 0.4273586571216583 2.709418296813965 4.186749085783958e-002
411.1640014648438 9.225748062133789 0.4106448590755463 1.340008854866028 5.808595180511475 0.4276045560836792 2.711185216903687 4.189140349626541e-002
425.0020141601563 9.11283016204834 0.4056265950202942 1.323553919792175 5.742629528045654 0.4226277768611908 2.680011749267578 4.150775447487831e-002
433.3930053710938 9.15496826171875 0.4075464010238648 1.329663395881653 5.76693058013916 0.4244976043701172 2.691663980484009 4.165017232298851e-002
441.7839965820313 9.179342269897461 0.4086317718029022 1.333258748054504 5.783347606658936 0.4256252646446228 2.699387073516846 4.177364706993103e-002
450.1759948730469 9.202337265014648 0.4096647799015045 1.336641907691956 5.799064636230469 0.4267286956310272 2.706497669219971 4.189135506749153e-002
458.5669860839844 9.126877784729004 0.4062632024288178 1.325594425201416 5.7450852394104 0.4234336316585541 2.681554317474365 4.164514690637589e-002
466.9580078125 9.130221366882324 0.4063588082790375 1.326080322265625 5.750959873199463 0.4235436022281647 2.6843581199646 4.169851914048195e-002
475.3489990234375 9.142138481140137 0.4069503247737885 1.32788360118866 5.753814697265625 0.4240946471691132 2.685687065124512 4.17218841612339e-002
483.739990234375 9.144487380981445 0.4070816040039063 1.328163623809815 5.764283180236816 0.4243338704109192 2.69016432762146 4.180238768458366e-002
492.1310119628906 9.213832855224609 0.4101627767086029 1.338177442550659 5.806262969970703 0.4273685812950134 2.709989309310913 4.204079136252403e-002
500.5220031738281 9.151962280273438 0.4073929488658905 1.329235196113586 5.765473365783691 0.4247141480445862 2.691080808639526 4.187702387571335e-002
508.9129943847656 9.133262634277344 0.4065472185611725 1.326548576354981 5.755089282989502 0.4239353835582733 2.685916900634766 4.184074699878693e-002
517.3040161132813 9.194231033325195 0.4092318415641785 1.335361480712891 5.791540622711182 0.4266365468502045 2.703181505203247 4.204431921243668e-002
525.6950073242188 9.174141883850098 0.4084053635597229 1.332433700561523 5.780707836151123 0.4258663356304169 2.697983264923096 4.203671962022781e-002
534.0869750976563 9.127938270568848 0.4063973724842072 1.325674772262573 5.753820896148682 0.4238673448562622 2.685414791107178 4.189241677522659e-002
542.4769897460938 9.228574752807617 0.4108735322952271 1.340509295463562 5.816771030426025 0.4283493161201477 2.714869976043701 4.227539896965027e-002
550.8679809570313 9.247261047363281 0.4116438031196594 1.34306275844574 5.829936504364014 0.4292499721050263 2.720824480056763 4.234698414802551e-002
559.2589721679688 9.259587287902832 0.4121484756469727 1.344773530960083 5.840207099914551 0.4296930134296417 2.725474834442139 4.239725694060326e-002
567.6500244140625 9.236879348754883 0.4112152457237244 1.341552734375 5.824738502502441 0.4288162887096405 2.718418121337891 4.232741147279739e-002
576.041015625 9.265199661254883 0.4123806655406952 1.345624566078186 5.837865352630615 0.4300332069396973 2.724727630615234 4.243086278438568e-002
584.4310302734375 9.193467140197754 0.4092609882354736 1.335316061973572 5.791056632995606 0.4267773926258087 2.702801465988159 4.214197397232056e-002
592.822021484375 9.178906440734863 0.408621221780777 1.333141565322876 5.783803462982178 0.4262367188930512 2.699366569519043 4.21367958188057e-002
601.2139892578125 9.179999351501465 0.4086976051330566 1.333412766456604 5.781562805175781 0.4262183606624603 2.698424100875855 4.212524741888046e-002
609.60498046875 9.158502578735352 0.4077076315879822 1.330240249633789 5.771774768829346 0.4252981841564179 2.693920612335205 4.206201061606407e-002
617.9949951171875 9.168906211853027 0.4081432521343231 1.331776857376099 5.777164459228516 0.4257596433162689 2.696363210678101 4.212769865989685e-002
626.385986328125 9.148199081420898 0.4072228968143463 1.328739166259766 5.764687061309815 0.4248482882976532 2.690601110458374 4.204926639795303e-002
634.7769775390625 9.153997421264648 0.4075290560722351 1.329600691795349 5.76605749130249 0.4250805974006653 2.691195011138916 4.203818738460541e-002
643.1680297851563 9.142102241516113 0.4070025384426117 1.327812790870667 5.758194923400879 0.4244733154773712 2.687539577484131 4.197685047984123e-002
651.5599975585938 9.157526016235352 0.4076575040817261 1.33014190196991 5.771289825439453 0.4252424538135529 2.693483829498291 4.207025840878487e-002
659.9509887695313 9.142055511474609 0.4069408476352692 1.327834606170654 5.75890064239502 0.4245132505893707 2.687950849533081 4.196911677718163e-002
668.3410034179688 9.163941383361816 0.4079061448574066 1.331052899360657 5.773416519165039 0.425525963306427 2.694749593734741 4.208214208483696e-002
676.7329711914063 9.214210510253906 0.4101268947124481 1.338269472122192 5.804011821746826 0.4277287721633911 2.70874834060669 4.224084317684174e-002
685.1240234375 9.221725463867188 0.410546600818634 1.33942449092865 5.808478832244873 0.4280569553375244 2.710729837417603 4.224072396755219e-002
693.5139770507813 9.195225715637207 0.4093619287014008 1.335615515708923 5.792295932769775 0.4269255101680756 2.703481912612915 4.215554893016815e-002
701.905029296875 9.236662864685059 0.4111031889915466 1.341474533081055 5.820279121398926 0.4286713898181915 2.716408491134644 4.231745004653931e-002
710.2969970703125 9.219303131103516 0.4103749394416809 1.33903431892395 5.809108257293701 0.4279004633426666 2.711240530014038 4.220414161682129e-002
718.68798828125 9.196757316589356 0.4093507528305054 1.335767865180969 5.794125556945801 0.4269102811813355 2.704240798950195 4.217429086565971e-002
727.0789794921875 9.169294357299805 0.4081831276416779 1.331677913665772 5.778267860412598 0.4257012009620667 2.696781396865845 4.20493595302105e-002
735.468994140625 9.254044532775879 0.4119507372379303 1.344122529029846 5.83418083190918 0.4294586181640625 2.722884654998779 4.238997399806976e-002
743.8610229492188 9.224509239196777 0.4105926156044006 1.339867234230042 5.812450408935547 0.4280983507633209 2.712637424468994 4.227783530950546e-002
752.2520141601563 9.167038917541504 0.4080414175987244 1.331365466117859 5.778883457183838 0.4256396591663361 2.697120428085327 4.206839948892593e-002
760.6430053710938 9.156136512756348 0.407585471868515 1.329828977584839 5.771244049072266 0.4251766502857208 2.693709135055542 4.204395413398743e-002
769.0339965820313 9.206752777099609 0.4098866879940033 1.337259769439697 5.798995018005371 0.4273804128170013 2.706660270690918 4.218916967511177e-002
777.4249877929688 9.185664176940918 0.4088890254497528 1.33407187461853 5.787529468536377 0.426471084356308 2.701387643814087 4.21074777841568e-002
785.8159790039063 9.148477554321289 0.4072705209255219 1.328797459602356 5.764423847198486 0.4247606992721558 2.690322160720825 4.200183600187302e-002
794.2069702148438 9.139849662780762 0.4068310558795929 1.327486157417297 5.760977268218994 0.4244396984577179 2.688838005065918 4.198827594518662e-002
802.5980224609375 9.198716163635254 0.409488320350647 1.336077690124512 5.797767639160156 0.4270517528057098 2.705855131149292 4.215721413493156e-002
810.989013671875 9.175697326660156 0.4084174335002899 1.332631826400757 5.781099796295166 0.425992488861084 2.698201894760132 4.206936806440353e-002
819.3800048828125 9.106189727783203 0.4053537547588348 1.322664737701416 5.740387916564941 0.4229016602039337 2.679165840148926 4.18708510696888e-002
827.77099609375 9.11962890625 0.4059470593929291 1.324671149253845 5.745753765106201 0.4235488474369049 2.681836843490601 4.189123585820198e-002
836.1619873046875 9.221225738525391 0.4104022979736328 1.33923864364624 5.813970565795898 0.4279847741127014 2.713436365127564 4.224034398794174e-002
849.9970092773438 9.109155654907227 0.4055195748806 1.323018074035645 5.738785743713379 0.4229097962379456 2.678738832473755 4.17560487985611e-002
858.3880004882813 9.081585884094238 0.4043126106262207 1.319140315055847 5.720804691314697 0.4216950535774231 2.670202732086182 4.168836399912834e-002
866.7789916992188 9.1737060546875 0.4083895683288574 1.332486510276794 5.779799461364746 0.4258598983287811 2.697497129440308 4.201843962073326e-002
875.1699829101563 9.215715408325195 0.4102407991886139 1.33849024772644 5.806502342224121 0.4276199042797089 2.710031509399414 4.214433580636978e-002
883.5609741210938 9.29750919342041 0.4138506650924683 1.350215315818787 5.858696460723877 0.4313125610351563 2.734477758407593 4.240995645523071e-002
891.9520263671875 9.251111030578613 0.411830872297287 1.343641996383667 5.826048374176025 0.4292575418949127 2.719125270843506 4.226363822817802e-002
900.343017578125 9.236968994140625 0.411191999912262 1.341637492179871 5.816394329071045 0.4285323023796082 2.71470046043396 4.218020662665367e-002
908.7340087890625 9.18012809753418 0.4086549580097199 1.333361864089966 5.780932903289795 0.4260410964488983 2.698340177536011 4.198113456368446e-002
917.125 9.18910026550293 0.4090204238891602 1.334587931632996 5.791236877441406 0.426427572965622 2.702847242355347 4.205641150474548e-002
925.5159912109375 9.163248062133789 0.4078385829925537 1.330891489982605 5.775006771087647 0.4252764880657196 2.695378065109253 4.195348545908928e-002
933.906982421875 9.184928894042969 0.4089162349700928 1.334069848060608 5.789799213409424 0.42618727684021 2.702196598052979 4.199947416782379e-002
942.2979736328125 9.157343864440918 0.4076671004295349 1.330055475234985 5.770273208618164 0.4249707460403442 2.693178653717041 4.188660532236099e-002
950.6890258789063 9.162631988525391 0.4078827202320099 1.330793499946594 5.77417516708374 0.4251722097396851 2.695005416870117 4.190302640199661e-002
959.0800170898438 9.114273071289063 0.4057436585426331 1.323749780654907 5.743786811828613 0.4230408370494843 2.680756568908691 4.173881560564041e-002
967.4710083007813 9.244811058044434 0.4115355014801025 1.34266197681427 5.823981761932373 0.4288525879383087 2.718071460723877 4.214448481798172e-002
975.8619995117188 9.219685554504395 0.4104566872119904 1.339130640029907 5.808487892150879 0.4276332259178162 2.710957288742065 4.206658154726028e-002
984.2529907226563 9.184207916259766 0.4088565707206726 1.33392071723938 5.792478561401367 0.4260831475257874 2.703508853912354 4.195259138941765e-002
992.6439819335938 9.13871955871582 0.4068254828453064 1.327333569526672 5.761001586914063 0.4240987598896027 2.688708066940308 4.179005324840546e-002
1001.034973144531 9.151439666748047 0.4073895514011383 1.329284429550171 5.767615795135498 0.4246693849563599 2.691930532455444 4.182363301515579e-002
1009.424987792969 9.19940185546875 0.409492164850235 1.335996866226196 5.800271034240723 0.4267957508563995 2.70706057548523 4.198677837848663e-002
1017.815979003906 9.255974769592285 0.4120437800884247 1.344139099121094 5.840244770050049 0.4293366670608521 2.725528001785278 4.220050573348999e-002
1026.20703125 9.220073699951172 0.4104630351066589 1.339051723480225 5.81441593170166 0.4276903867721558 2.713610172271729 4.208677262067795e-002
1034.598022460938 9.158895492553711 0.4077011644840241 1.330096125602722 5.776969432830811 0.4249850511550903 2.696006536483765 4.186514392495155e-002
1042.989013671875 9.135567665100098 0.4066715240478516 1.326890826225281 5.756415843963623 0.423865556716919 2.686625719070435 4.174899682402611e-002
1051.380981445313 9.150594711303711 0.4073532521724701 1.329049825668335 5.765689849853516 0.4245824813842773 2.691075325012207 4.179978370666504e-002
1059.77197265625 9.146571159362793 0.4071609079837799 1.32847785949707 5.760791778564453 0.4242803156375885 2.688825607299805 4.17768582701683e-002
1068.162963867188 9.131063461303711 0.4064978063106537 1.326229453086853 5.752644538879395 0.4236991405487061 2.684972286224365 4.172741994261742e-002
1076.553955078125 9.098221778869629 0.4049918949604034 1.321496725082398 5.731342792510986 0.4222320318222046 2.675036668777466 4.162869602441788e-002
1084.944946289063 9.169441223144531 0.4081719219684601 1.331780910491943 5.776838779449463 0.4254011511802673 2.696260452270508 4.184866324067116e-002
1093.337036132813 9.187003135681152 0.4089777171611786 1.334323048591614 5.790809154510498 0.4261792898178101 2.702747344970703 4.196572676301003e-002
1101.72802734375 9.179986953735352 0.4086208045482636 1.333386778831482 5.783829689025879 0.4258585274219513 2.699674844741821 4.191147163510323e-002
1110.119018554688 9.200528144836426 0.4095506370067596 1.336296439170837 5.797418117523193 0.4267379641532898 2.7057945728302 4.19546514749527e-002
1118.509033203125 9.158334732055664 0.4076752066612244 1.330214262008667 5.770383834838867 0.4248470067977905 2.693165063858032 4.180992022156715e-002
1126.900024414063 9.194581985473633 0.4093466997146606 1.335410833358765 5.798298358917236 0.4264914393424988 2.706053495407105 4.194727912545204e-002
1135.291015625 9.176510810852051 0.4084961414337158 1.3328697681427 5.778421401977539 0.4256733357906342 2.697108507156372 4.18514646589756e-002
1143.682983398438 9.163573265075684 0.4079014360904694 1.330968260765076 5.773004055023193 0.4250616133213043 2.694518804550171 4.183558747172356e-002
1152.072998046875 9.159396171569824 0.4077317416667938 1.330322265625 5.771379947662354 0.4248954653739929 2.693806171417236 4.181275144219399e-002
1160.464965820313 9.165866851806641 0.4080128371715546 1.331347465515137 5.772171497344971 0.4252021610736847 2.694234848022461 4.181317612528801e-002
1168.85595703125 9.151269912719727 0.407374233007431 1.329119086265564 5.760807991027832 0.424500435590744 2.688781023025513 4.176882281899452e-002
1177.246948242188 9.141792297363281 0.4069608747959137 1.327713966369629 5.75624418258667 0.4241056740283966 2.68661379814148 4.173726961016655e-002
1185.636962890625 9.130838394165039 0.406494677066803 1.326230525970459 5.751668930053711 0.4236221015453339 2.684362649917603 4.168353974819183e-002
1194.027954101563 9.206241607666016 0.4098086059093475 1.337079763412476 5.802299022674561 0.4269396662712097 2.707928895950317 4.194400832056999e-002
1202.4189453125 9.17149543762207 0.4083086550235748 1.332085609436035 5.776546001434326 0.4253532886505127 2.696049451828003 4.180750250816345e-002
1210.81005859375 9.140050888061523 0.4068616330623627 1.327504873275757 5.760209083557129 0.4239790141582489 2.6883225440979 4.170787334442139e-002
1219.201049804688 9.165439605712891 0.4079880714416504 1.331203103065491 5.77871561050415 0.4250532984733582 2.697003841400147 4.180311039090157e-002
1227.593017578125 9.177500724792481 0.4085498750209808 1.332932233810425 5.783236026763916 0.4255987405776978 2.699163913726807 4.181493073701859e-002
1235.984008789063 9.177756309509277 0.408606618642807 1.33305811882019 5.782862663269043 0.4256067276000977 2.699074268341065 4.182154312729836e-002
1244.375 9.143049240112305 0.4070280194282532 1.327925682067871 5.766200542449951 0.4240804016590118 2.691066265106201 4.171686246991158e-002
1252.765991210938 9.110544204711914 0.4055243730545044 1.323151469230652 5.742761135101318 0.422651082277298 2.680213212966919 4.159015789628029e-002
1261.156982421875 9.153350830078125 0.4074757993221283 1.329340934753418 5.772144794464111 0.4244934320449829 2.693885564804077 4.173129424452782e-002
I want to split the file in three parts á 50 rows:
data = pd.read_csv(file, sep='\t', names=['Time', '60Ni', '61Ni', '62Ni', '63Cu', '64Ni', '65Cu', '66Zn'], skiprows=3, nrows=50, index_col=False, dtype=float)
data2 = pd.read_csv(file, sep='\t', names=['Time', '60Ni', '61Ni', '62Ni', '63Cu', '64Ni', '65Cu', '66Zn'], skiprows=53, nrows=50, index_col=False, dtype=float)
data3 = pd.read_csv(file, sep='\t', names=['Time', '60Ni', '61Ni', '62Ni', '63Cu', '64Ni', '65Cu', '66Zn'], skiprows=103, nrows=50, index_col=False, dtype=float)
Then I'm removing outliers with:
cols = list(data.drop(columns='Time').columns)
datao = pd.DataFrame({'Time':data['Time']})
datao[cols] = data[cols].where(np.abs(stats.zscore(data[cols])) < 2)
cols = list(data2.drop(columns='Time').columns)
data2o = pd.DataFrame({'Time':data2['Time']})
data2o[cols] = data2[cols].where(np.abs(stats.zscore(data2[cols])) < 2)
data2o[cols] = data2o[cols].mean()
cols = list(data3.drop(columns='Time').columns)
data3o = pd.DataFrame({'Time':data3['Time']})
data3o[cols] = data3[cols].where(np.abs(stats.zscore(data3[cols])) < 2)
data3o[cols] = data3o[cols].mean()
Does this make sense so far?
And now I would like to create a mean of datao, data2o and data3o seperately, resulting in three values for 60Ni, 61Ni, 62Ni, 63Cu, 64Ni, 65Cu, 66Zn. After that, I want to make a mean of these three values again. How should I do this?
I tried to make it this way:
mean_filtered_transposed = pd.DataFrame(data=np.mean(data)).T
mean_filtered_transposed['Time'] = pd.to_datetime(mean_filtered_transposed["Time"], unit='s')mean_filtered_transposed = pd.DataFrame(data=np.mean(data)).T
mean_filtered_transposed['Time'] = pd.to_datetime(mean_filtered_transposed["Time"], unit='s')
mean_filtered_transposed2 = pd.DataFrame(data=np.mean(data2)).T
mean_filtered_transposed2['Time'] = pd.to_datetime(mean_filtered_transposed["Time"], unit='s')
mean_filtered_transposed3 = pd.DataFrame(data=np.mean(data3)).T
mean_filtered_transposed3['Time'] = pd.to_datetime(mean_filtered_transposed3["Time"], unit='s')
mean_all = pd.concat(mean_filtered_transposed, mean_filtered_transposed2, mean_filtered_transposed3)
However, this results in:
"TypeError: first argument must be an iterable of pandas objects, you passed an object of type "DataFrame""
Based on documentation:
objs: a sequence or mapping of Series or DataFrame objects
So:
s1 = pd.Series(['a', 'b'])
s2 = pd.Series(['c', 'd'])
pd.concat([s1, s2])
result:
But:
s1 = pd.Series(['a', 'b'])
s2 = pd.Series(['c', 'd'])
pd.concat(s1, s2)
generates:

how to use the input with pandas to get all the value.count linked to this input

my dataframe looks like this:
Index(['#Organism/Name', 'TaxID', 'BioProject Accession', 'BioProject ID', 'Group', 'SubGroup', 'Size (Mb)', 'GC%', 'Replicons', 'WGS',
'Scaffolds', 'Genes', 'Proteins', 'Release Date', 'Modify Date',
'Status', 'Center', 'BioSample Accession', 'Assembly Accession',
'Reference', 'FTP Path', 'Pubmed ID', 'Strain'],
dtype='object')
I ask the user to enter the name of the species with this script :
print("bacterie species?")
species=input()
I want to look for the rows with "Organism/Name" equal to the species written by the user (input) then to calculate with "values.count" of the status column and finally to retrieve 'FTP Path'.
Here is the code that I could do but that does not work:
if (data.loc[(data["Organism/Name"]==species)
print(Data['Status'].value_counts())
else:
print("This species not found")
if (data.loc[(data["Organism/Name"]==species)
print(Data['Status'].value_counts())
else:
print(Data.get["FTP Path"]
If I understand your question correctly, this is what you're trying to achieve:
import wget
import numpy as np
import pandas as pd
URL='https://ftp.ncbi.nlm.nih.gov/genomes/GENOME_REPORTS/prokaryotes.txt'
data = pd.read_csv(wget.download(URL) , sep = '\t', header = 0)
species = input("Enter the bacteria species: ")
if data["#Organism/Name"].str.contains(species, case = False).any():
print(data.loc[data["#Organism/Name"].str.contains(species, case = False)]['Status'].value_counts())
FTP_list = data.loc[data["#Organism/Name"].str.contains(species, case = False)]["FTP Path"].values
else:
print("This species not found")
To wite all the FTP_Path urls into a txt file, you can do this:
with open('/path/urls.txt', mode='wt') as file:
file.write('\n'.join(FTP_list))

How do i select only certain rows based on label in pandas?

Here is my function:
def get_historical_closes(ticker, start_date, end_date):
my_dir = '/home/manish/Desktop/Equity/subset'
os.chdir(my_dir)
dfs = []
for files in glob.glob('*.txt'):
dfs.append(pd.read_csv(files, names = ['Ticker', 'Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Null'], parse_dates = [1]))
p = pd.concat(dfs)
d = p.reset_index(['Date', 'Ticker', 'Close'])
pivoted = d.pivot_table(index = ['Date'], columns =['Ticker'])
pivoted.columns = pivoted.columns.droplevel(0)
return pivoted
closes = get_historical_closes(['LT' or 'HDFC'or 'ACC'], '1999-01-01', '2014-12-31')
My problem is I just want to get data for a few rows namely, data for LT, HDFC and ACC for all the dates, but when I execute the function, I am getting data for all the rows (approx. 1500 nos.)
How can I slice the dataframe, so that I get only selected rows and not the entire dataframe?
Raw input data is a collection of text files as so:
20MICRONS,20150401,36.5,38.95,35.8,37.35,64023,0
3IINFOTECH,20150401,5.9,6.3,5.8,6.2,1602365,0
3MINDIA,20150401,7905,7905,7850,7879.6,310,0
8KMILES,20150401,710.05,721,706,712.9,20196,0
A2ZINFRA,20150401,15.5,16.55,15.2,16,218219,0
AARTIDRUGS,20150401,648.95,665.5,639.65,648.25,42927,0
AARTIIND,20150401,348,349.4,340.3,341.85,122071,0
AARVEEDEN,20150401,42,42.9,41.55,42.3,627,0
ABAN,20150401,422,434.3,419,429.1,625857,0
ABB,20150401,1266.05,1284,1266,1277.45,70294,0
ABBOTINDIA,20150401,3979.25,4009.95,3955.3,3981.25,2677,0
ABCIL,20150401,217.8,222.95,217,221.65,11583,0
ABGSHIP,20150401,225,225,215.3,220.2,237737,0
ABIRLANUVO,20150401,1677,1677,1639.25,1666.7,106336,0
ACC,20150401,1563.7,1591.3,1553.2,1585.9,176063,0
ACCELYA,20150401,932,953.8,923,950.5,4297,0
ACE,20150401,40.1,41.7,40.05,41.15,356130,0
ACROPETAL,20150401,2.75,3,2.7,2.85,33380,0
ADANIENT,20150401,608.8,615.8,603,612.4,868006,0
ADANIPORTS,20150401,308.45,312.05,306.1,310.95,1026200,0
ADANIPOWER,20150401,46.7,48,46.7,47.75,3015649,0
ADFFOODS,20150401,60.5,60.5,58.65,59.75,23532,0
ADHUNIK,20150401,20.95,21.75,20.8,21.2,149431,0
ADORWELD,20150401,224.9,224.9,215.65,219.2,2743,0
ADSL,20150401,19,20,18.7,19.65,35053,0
ADVANIHOTR,20150401,43.1,43.1,43,43,100,0
ADVANTA,20150401,419.9,430.05,418,428,16206,0
AEGISCHEM,20150401,609,668,600,658.4,264828,0
AFL,20150401,65.25,70,65.25,68.65,9507,0
AGARIND,20150401,95,100,87.25,97.45,14387,0
AGCNET,20150401,91.95,93.75,91.4,93,2453,0
AGRITECH,20150401,5.5,6.1,5.5,5.75,540,0
AGRODUTCH,20150401,2.7,2.7,2.6,2.7,451,0
AHLEAST,20150401,196,202.4,185,192.25,357,0
AHLUCONT,20150401,249.5,258.3,246,251.3,44541,0
AHLWEST,20150401,123.9,129.85,123.9,128.35,688,0
AHMEDFORGE,20150401,229.5,237.35,228,231.45,332680,0
AIAENG,20150401,1268,1268,1204.95,1214.1,48950,0
AIL,20150401,735,747.9,725.1,734.8,31780,0
AJANTPHARM,20150401,1235,1252,1207.05,1223.3,126442,0
AJMERA,20150401,118.7,121.9,117.2,118.45,23005,0
AKSHOPTFBR,20150401,14.3,14.8,14.15,14.7,214028,0
AKZOINDIA,20150401,1403.95,1412,1392,1400.7,17115,0
ALBK,20150401,99.1,101.65,99.1,101.4,2129046,0
ALCHEM,20150401,27.9,32.5,27.15,31.6,32338,0
ALEMBICLTD,20150401,34.6,36.7,34.3,36.45,692688,0
ALICON,20150401,280,288,279.05,281.05,5937,0
ALKALI,20150401,31.6,34.2,31.6,33.95,4663,0
ALKYLAMINE,20150401,314,334,313.1,328.8,1515,0
ALLCARGO,20150401,317,323.5,315,319.15,31056,0
ALLSEC,20150401,21.65,22.5,21.6,21.6,435,0
ALMONDZ,20150401,10.6,10.95,10.5,10.75,23600,0
ALOKTEXT,20150401,7.5,8.2,7.4,7.95,8145264,0
ALPA,20150401,11.85,11.85,10.75,11.8,3600,0
ALPHAGEO,20150401,384.3,425.05,383.95,419.75,13308,0
ALPSINDUS,20150401,1.85,1.85,1.85,1.85,1050,0
ALSTOMT&D,20150401,585.85,595,576.65,588.4,49234,0
AMARAJABAT,20150401,836.5,847.75,831,843.9,121150,0
AMBIKCO,20150401,790,809,780.25,802.6,4879,0
AMBUJACEM,20150401,254.95,261.4,253.4,260.25,1346375,0
AMDIND,20150401,20.5,22.75,20.5,22.3,693,0
AMRUTANJAN,20150401,480,527.05,478.35,518.3,216407,0
AMTEKAUTO,20150401,144.5,148.45,144.2,147.45,552874,0
AMTEKINDIA,20150401,55.6,58.3,55.1,57.6,700465,0
AMTL,20150401,13.75,14.45,13.6,14.45,2111,0
ANANTRAJ,20150401,39.9,40.3,39.35,40.05,376564,0
ANDHRABANK,20150401,78.35,80.8,78.2,80.55,993038,0
ANDHRACEMT,20150401,8.85,9.3,8.75,9.1,15848,0
ANDHRSUGAR,20150401,92.05,98.95,91.55,96.15,11551,0
ANGIND,20150401,36.5,36.9,35.6,36.5,34758,0
ANIKINDS,20150401,22.95,24.05,22.95,24.05,1936,0
ANKITMETAL,20150401,2.85,3.25,2.85,3.15,29101,0
ANSALAPI,20150401,23.45,24,23.45,23.8,76723,0
ANSALHSG,20150401,29.9,29.9,28.75,29.65,7748,0
ANTGRAPHIC,20150401,0.1,0.15,0.1,0.15,23500,0
APARINDS,20150401,368.3,375.6,368.3,373.45,2719,0
APCOTEXIND,20150401,505,505,481.1,495.85,3906,0
APLAPOLLO,20150401,411.5,434,411.5,428.65,88113,0
APLLTD,20150401,458.9,464,450,454.7,72075,0
APOLLOHOSP,20150401,1351,1393.85,1351,1390,132827,0
APOLLOTYRE,20150401,169.65,175.9,169,175.2,3515274,0
APOLSINHOT,20150401,195,197,194.3,195.2,71,0
APTECHT,20150401,57.6,61,57,59.7,206475,0
ARCHIDPLY,20150401,32.95,35.8,32.5,35.35,103036,0
ARCHIES,20150401,19.05,19.4,18.8,19.25,46840,0
ARCOTECH,20150401,342.5,350,339.1,345.2,44142,0
ARIES,20150401,106.75,113.9,105,112.7,96825,0
ARIHANT,20150401,43.5,50,43.5,49.3,1647,0
AROGRANITE,20150401,61.5,62,59.55,60.15,2293,0
ARROWTEX,20150401,25.7,27.8,25.1,26.55,17431,0
ARSHIYA,20150401,39.55,41.5,39,40,69880,0
ARSSINFRA,20150401,34.65,36.5,34.6,36.3,71442,0
ARVIND,20150401,260.85,268.2,259,267.2,1169433,0
ARVINDREM,20150401,15.9,17.6,15.5,17.6,5407412,0
ASAHIINDIA,20150401,145,145,141,142.45,16240,0
ASAHISONG,20150401,113,116.7,112.15,115.85,5475,0
ASAL,20150401,45.8,45.8,38,43.95,7429,0
ASHAPURMIN,20150401,74,75.4,74,74.05,36406,0
ASHIANA,20150401,248,259,246.3,249.5,21284,0
ASHIMASYN,20150401,8.4,8.85,8.05,8.25,3253,0
ASHOKA,20150401,175.1,185.4,175.1,183.75,1319134,0
ASHOKLEY,20150401,72.7,74.75,72.7,74.05,17233199,0
ASIANHOTNR,20150401,104.45,107.8,101.1,105.15,780,0
ASIANPAINT,20150401,810,825.9,803.5,821.7,898480,0
ASIANTILES,20150401,116.25,124.4,116.25,123.05,31440,0
ASSAMCO,20150401,4.05,4.3,4.05,4.3,476091,0
ASTEC,20150401,148.5,154.5,146,149.2,322308,0
ASTRAL,20150401,447.3,451.3,435.15,448.6,64889,0
ASTRAMICRO,20150401,146.5,151.9,145.2,150.05,735681,0
ASTRAZEN,20150401,908,940.95,908,920.35,3291,0
ATFL,20150401,635,648,625.2,629.25,6202,0
ATLANTA,20150401,67.2,71,67.2,68.6,238683,0
ATLASCYCLE,20150401,203.9,210.4,203,208.05,25208,0
ATNINTER,20150401,0.2,0.2,0.2,0.2,1704,0
ATUL,20150401,1116,1160,1113,1153.05,32969,0
ATULAUTO,20150401,556.55,576.9,555.9,566.25,59117,0
AURIONPRO,20150401,192.3,224.95,191.8,217.55,115464,0
AUROPHARMA,20150401,1215,1252,1215,1247.4,1140111,0
AUSOMENT,20150401,22.6,22.6,21.7,21.7,2952,0
AUSTRAL,20150401,0.5,0.55,0.5,0.5,50407,0
AUTOAXLES,20150401,834.15,834.15,803,810.2,4054,0
AUTOIND,20150401,60,65,59.15,63.6,212036,0
AUTOLITIND,20150401,36,39,35.2,37.65,14334,0
AVTNPL,20150401,27,28,26.7,27.9,44803,0
AXISBANK,20150401,557.7,572,555.25,569.65,3753262,0
AXISCADES,20150401,335.4,345,331.4,339.65,524538,0
AXISGOLD,20150401,2473.95,2493,2461.1,2483.15,138,0
BAFNAPHARM,20150401,29.95,31.45,29.95,30.95,21136,0
BAGFILMS,20150401,3.05,3.1,2.9,3,31278,0
BAJAJ-AUTO,20150401,2027.05,2035,2002.95,2019.8,208545,0
BAJAJCORP,20150401,459,482,454,466.95,121972,0
BAJAJELEC,20150401,230,234.8,229,232.4,95432,0
BAJAJFINSV,20150401,1412,1447.5,1396,1427.55,44811,0
BAJAJHIND,20150401,14.5,14.8,14.2,14.6,671746,0
BAJAJHLDNG,20150401,1302.3,1329.85,1285.05,1299.9,24626,0
BAJFINANCE,20150401,4158,4158,4062.2,4140.05,12923,0
BALAJITELE,20150401,65.75,67.9,65.3,67.5,47063,0
BALAMINES,20150401,81.5,83.5,81.5,83.45,6674,0
BALKRISIND,20150401,649,661,640,655,16919,0
BALLARPUR,20150401,13.75,13.95,13.5,13.9,271962,0
BALMLAWRIE,20150401,568.05,580.9,562.2,576.75,17423,0
BALPHARMA,20150401,68.9,74.2,67.1,68.85,84178,0
BALRAMCHIN,20150401,50.95,50.95,49.3,50,84400,0
BANARBEADS,20150401,33,39.5,33,39.25,1077,0
BANARISUG,20150401,834.7,855,820,849.85,618,0
BANCOINDIA,20150401,105,107.5,103.25,106.8,11765,0
BANG,20150401,6.2,6.35,6.1,6.35,9639,0
BANKBARODA,20150401,162.75,170.4,162.05,168.9,2949846,0
BANKBEES,20150401,1813.45,1863,1807,1859.78,19071,0
BANKINDIA,20150401,194.6,209.8,194.05,205.75,3396490,0
BANSWRAS,20150401,65,65,60.1,63.9,6238,0
BARTRONICS,20150401,11.45,11.85,11.35,11.6,109658,0
BASF,20150401,1115,1142,1115,1124.65,14009,0
BASML,20150401,184,192,183.65,191.6,642,0
BATAINDIA,20150401,1095,1104.9,1085,1094.7,137166,0
BAYERCROP,20150401,3333,3408.3,3286.05,3304.55,8839,0
BBL,20150401,627.95,641.4,622.2,629.8,5261,0
BBTC,20150401,441,458,431.3,449.15,141334,0
BEDMUTHA,20150401,16.85,18,16.25,17.95,16412,0
BEL,20150401,3355,3595,3350,3494.2,582755,0
BEML,20150401,1100,1163.8,1086,1139.2,631231,0
BEPL,20150401,22.1,22.45,21.15,22.3,5459,0
BERGEPAINT,20150401,209.3,216.9,208.35,215.15,675963,0
BFINVEST,20150401,168.8,176.8,159.5,172.7,113352,0
BFUTILITIE,20150401,707.4,741,702.05,736.05,1048274,0
BGLOBAL,20150401,2.9,3.05,2.9,3.05,16500,0
BGRENERGY,20150401,117.35,124,117.35,122.3,207979,0
BHAGYNAGAR,20150401,17.9,17.9,16.95,17.5,1136,0
BHARATFORG,20150401,1265.05,1333.1,1265.05,1322.6,704419,0
BHARATGEAR,20150401,73.5,77.7,72.7,75.9,13730,0
BHARATRAS,20150401,810,840,800,821.4,981,0
BHARTIARTL,20150401,393.3,404.85,393.05,402.3,5494883,0
BHEL,20150401,235.8,236,229.6,230.7,3346075,0
BHUSANSTL,20150401,65.15,67.9,63.65,64,1108540,0
BIL,20150401,401.3,422,401.3,419.35,2335,0
BILENERGY,20150401,0.8,0.95,0.8,0.95,8520,0
BINANIIND,20150401,90.55,93.95,90.2,93.3,27564,0
BINDALAGRO,20150401,23.4,23.4,22.25,22.8,111558,0
BIOCON,20150401,472.5,478.85,462.7,466.05,1942983,0
BIRLACORPN,20150401,415,420,402.8,414.7,11345,0
BIRLACOT,20150401,0.05,0.1,0.05,0.1,439292,0
BIRLAERIC,20150401,52.3,54.45,52.15,53.7,9454,0
BIRLAMONEY,20150401,24.35,28.85,23.9,28.65,78710,0
BLBLIMITED,20150401,3.7,3.7,3.65,3.65,550,0
BLISSGVS,20150401,128,132.55,124.3,126.15,261958,0
BLKASHYAP,20150401,13.7,15.15,13.7,14.15,118455,0
BLUEDART,20150401,7297.35,7315,7200,7285.55,2036,0
BLUESTARCO,20150401,308.75,315,302,311.35,19046,0
BLUESTINFO,20150401,199,199.9,196.05,199.45,1268,0
BODALCHEM,20150401,34.5,34.8,33.05,34.65,65623,0
BOMDYEING,20150401,64,66.3,63.7,65.95,1168851,0
BOSCHLTD,20150401,25488,25708,25201,25570.7,16121,0
BPCL,20150401,810.95,818,796.5,804.2,1065969,0
BPL,20150401,30.55,32.5,30.55,31.75,116804,0
BRFL,20150401,146,147.9,142.45,144.3,7257,0
BRIGADE,20150401,143.8,145.15,140.25,144.05,36484,0
BRITANNIA,20150401,2155.5,2215.3,2141.35,2177.55,245908,0
BROADCAST,20150401,3.35,3.5,3.3,3.3,4298,0
BROOKS,20150401,38.4,39.5,38.4,39.3,19724,0
BSELINFRA,20150401,1.9,2.15,1.85,2.05,97575,0
BSL,20150401,29.55,31.9,27.75,31,9708,0
BSLGOLDETF,20150401,2535,2535,2501.5,2501.5,122,0
BSLIMITED,20150401,27.5,27.5,25.45,27.15,728818,0
BURNPUR,20150401,9.85,9.85,9.1,9.15,144864,0
BUTTERFLY,20150401,190.95,194,186.1,192.35,25447,0
BVCL,20150401,17.25,17.7,16.5,17.7,9993,0
CADILAHC,20150401,1755,1796.8,1737.05,1790.15,302149,0
CAIRN,20150401,213.85,215.6,211.5,213.35,841463,0
CAMLINFINE,20150401,89.5,91.4,87.5,91.1,32027,0
CANBK,20150401,366.5,383.8,365.15,381,1512605,0
CANDC,20150401,20.6,24.6,20.6,23.25,9100,0
CANFINHOME,20150401,611.1,649.95,611.1,644.7,72233,0
CANTABIL,20150401,47.6,50.5,47.6,50.25,5474,0
CAPF,20150401,398.85,427,398,421.75,224074,0
CAPLIPOINT,20150401,1020,1127.8,1020,1122.65,108731,0
CARBORUNIV,20150401,191.05,197,188.35,190,42681,0
CAREERP,20150401,151.9,156.6,149,153.25,26075,0
CARERATING,20150401,1487,1632.75,1464,1579.2,65340,0
CASTROLIND,20150401,476,476.25,465.1,467.3,185850,0
CCCL,20150401,4.2,4.7,4.2,4.65,47963,0
CCHHL,20150401,10.8,11,10.4,10.8,69325,0
CCL,20150401,178.35,185.9,176,184.3,244917,0
CEATLTD,20150401,805.25,830.8,785.75,826.7,501415,0
CEBBCO,20150401,18.3,20.25,18.1,19.85,40541,0
CELEBRITY,20150401,11.5,12.5,11.5,12.1,5169,0
CELESTIAL,20150401,59.9,61.8,59.5,60.05,128386,0
CENTENKA,20150401,152,159.9,148.2,157.1,16739,0
CENTEXT,20150401,1.5,1.5,1.2,1.25,19308,0
CENTRALBK,20150401,106,107.2,104.3,106.3,992782,0
CENTUM,20150401,756.85,805,756.8,801.9,26848,0
CENTURYPLY,20150401,234,245,234,243.45,367540,0
CENTURYTEX,20150401,633.6,682.4,631,675.35,3619413,0
CERA,20150401,2524.75,2524.75,2470,2495.3,6053,0
CEREBRAINT,20150401,15.6,16.2,14.65,14.8,348478,0
CESC,20150401,604.95,613.4,595.4,609.75,294334,0
CGCL,20150401,173,173,173,173,9,0
CHAMBLFERT,20150401,70.2,73.4,70.2,72.65,2475030,0
CHEMFALKAL,20150401,72.8,77,72,76.3,1334,0
CHENNPETRO,20150401,69,70.35,68.3,68.95,160576,0
CHESLINTEX,20150401,10.1,10.1,8.75,9.4,1668,0
CHOLAFIN,20150401,599.85,604,582.15,598.2,23125,0
CHROMATIC,20150401,3.4,4.05,3,3.3,63493,0
CIGNITITEC,20150401,433,444.95,432,440,32923,0
CIMMCO,20150401,92,94.05,91,94.05,19931,0
CINELINE,20150401,14.5,14.95,14.5,14.9,4654,0
CINEVISTA,20150401,3.3,3.3,3.3,3.3,10,0
CIPLA,20150401,714,716.5,703.85,709.6,1693796,0
CLASSIC,20150401,1.5,1.55,1.45,1.45,7770,0
CLNINDIA,20150401,824.7,837.9,819,828.8,6754,0
CLUTCHAUTO,20150401,13.75,13.75,13.6,13.6,1414,0
CMAHENDRA,20150401,9.35,9.5,8.9,9.15,1005172,0
CMC,20150401,1925.85,1925.85,1891,1907.25,153068,0
CNOVAPETRO,20150401,20,22.75,17.1,22.75,1656,0
COALINDIA,20150401,362.9,364.25,358,363,1428949,0
COLPAL,20150401,2003.4,2009.9,1990.05,2002.5,92909,0
COMPUSOFT,20150401,9.4,10.05,9,9.7,15083,0
CONCOR,20150401,1582.35,1627.3,1561,1582.85,182280,0
CONSOFINVT,20150401,36.55,40,36.5,40,439,0
CORDSCABLE,20150401,25.55,28,24.1,25.8,15651,0
COREEDUTEC,20150401,8,8.85,7.6,8.4,890455,0
COROMANDEL,20150401,268.5,271.35,266.15,268.35,42173,0
CORPBANK,20150401,52.5,55,52.05,54.1,1141752,0
COSMOFILMS,20150401,76.9,80,76.2,79.25,21020,0
COUNCODOS,20150401,1.2,1.2,1.2,1.2,2850,0
COX&KINGS,20150401,323,324.85,316.5,317.8,76998,0
CPSEETF,20150401,24.2,24.37,24.08,24.34,180315,0
CREATIVEYE,20150401,3.4,3.6,2.8,3.45,8545,0
CRISIL,20150401,2049,2052.45,2000,2030.7,3928,0
CROMPGREAV,20150401,164.85,167.4,163.2,166.1,2739478,0
CTE,20150401,18.55,18.55,16.85,17.05,8260,0
CUB,20150401,97.35,98.75,96.4,98.3,182702,0
CUMMINSIND,20150401,879,900.95,874.75,889.9,358652,0
CURATECH,20150401,10.8,11,9.75,10,755,0
CYBERTECH,20150401,28.5,33.45,28.1,33.4,103549,0
CYIENT,20150401,509.9,515,495.1,514.1,30415,0
DAAWAT,20150401,105,112.25,99.5,108.4,26689,0
DABUR,20150401,266.5,268.5,264.65,266.55,642177,0
DALMIABHA,20150401,428.15,439.9,422.5,432.65,9751,0
DALMIASUG,20150401,17.5,17.5,16.45,17.15,12660,0
DATAMATICS,20150401,66.5,75,66,72.15,119054,0
DBCORP,20150401,378,378,362.6,369.45,8799,0
DBREALTY,20150401,67,67.15,65.8,66.3,212297,0
DBSTOCKBRO,20150401,47.6,47.65,47.45,47.55,24170,0
DCBBANK,20150401,110.95,114.95,110.15,114.45,935858,0
DCM,20150401,84.5,88.75,84.1,87,34747,0
DCMSHRIRAM,20150401,107.95,114.3,107.95,112.8,29474,0
DCW,20150401,16.75,17.2,16.65,17.15,270502,0
DECCANCE,20150401,310.05,323.9,310.05,321.55,446,0
DECOLIGHT,20150401,1.45,1.45,1.4,1.4,1100,0
DEEPAKFERT,20150401,140,144,138.25,139.95,162156,0
DEEPAKNTR,20150401,68,70.65,66.4,69.95,8349,0
DEEPIND,20150401,46.6,54.4,46.3,51.9,52130,0
DELTACORP,20150401,79.95,82.75,79.75,82.35,889247,0
DELTAMAGNT,20150401,36.6,37.45,36.6,37.45,60,0
DEN,20150401,121.45,127,121.2,122.4,59512,0
DENABANK,20150401,50.8,51.5,50.1,51.35,376680,0
DENORA,20150401,136.7,136.7,131.05,133.6,743,0
DHAMPURSUG,20150401,36.8,36.95,34.85,36.35,38083,0
DHANBANK,20150401,30.8,32.1,30.5,31.75,195779,0
DHANUKA,20150401,690,690,652,660.15,24958,0
DHARSUGAR,20150401,14.15,14.7,13.8,14.45,1748,0
DHFL,20150401,468.9,474.9,461.6,467.85,448551,0
DHUNINV,20150401,97.15,103,94.5,99.85,15275,0
DIAPOWER,20150401,44.9,45.95,43.3,45.55,126085,0
DICIND,20150401,343,347,341,341.95,7745,0
DIGJAM,20150401,8,8.15,7.75,8.05,96467,0
DISHMAN,20150401,168,172.65,164.7,171.8,778414,0
DISHTV,20150401,82.2,84.85,81.35,84.15,5845850,0
DIVISLAB,20150401,1770.1,1809,1770.1,1802.35,68003,0
DLF,20150401,157,160.9,156.2,159.7,3098216,0
DLINKINDIA,20150401,165.05,168,162.2,164.75,22444,0
DOLPHINOFF,20150401,120.8,134.4,119.5,130.2,190716,0
DONEAR,20150401,15,15.95,14.5,15.35,679,0
DPL,20150401,46.6,49,44,45.45,25444,0
DPSCLTD,20150401,17.15,17.15,16.55,16.85,916,0
DQE,20150401,24.3,24.8,22.75,23.1,57807,0
DRDATSONS,20150401,5.8,6.1,5.7,6,2191357,0
DREDGECORP,20150401,374.9,403,372.65,393.4,106853,0
DRREDDY,20150401,3541,3566.8,3501.7,3533.65,282785,0
DSKULKARNI,20150401,77.6,77.6,74,77.1,3012,0
DSSL,20150401,9.5,9.5,9.5,9.5,50,0
DTIL,20150401,206.95,231.75,205.95,219.05,1437,0
DUNCANSLTD,20150401,15.55,16.3,15.3,15.85,740,0
DWARKESH,20150401,21,21,19.85,20.7,9410,0
DYNAMATECH,20150401,3868,4233,3857.1,3920.55,59412,0
DYNATECH,20150401,2.85,3,2.85,3,3002,0
EASTSILK,20150401,1.55,1.85,1.55,1.75,9437,0
EASUNREYRL,20150401,40.05,43,40.05,42.55,21925,0
ECEIND,20150401,136,148,127,133.85,43034,0
ECLERX,20150401,1603.8,1697,1595,1600.65,123468,0
EDELWEISS,20150401,63.65,67.5,63,66.6,451255,0
EDL,20150401,23.9,25,23.9,24.4,7799,0
EDUCOMP,20150401,12.45,13.55,12.35,13.55,499009,0
EICHERMOT,20150401,15929,16196.95,15830.05,16019.5,45879,0
EIDPARRY,20150401,174.05,175.8,168.65,171.2,56813,0
EIHAHOTELS,20150401,228,232.8,225,228,85,0
EIHOTEL,20150401,107.25,110,107.25,109.5,57306,0
EIMCOELECO,20150401,399,409.5,399,409.5,184,0
EKC,20150401,9.35,11.15,9.35,11.05,350782,0
ELAND,20150401,14.3,16.45,14.3,16.25,191406,0
ELDERPHARM,20150401,90.5,91.5,89.45,91.5,23450,0
ELECON,20150401,66.5,76.2,66.25,74.45,6045416,0
ELECTCAST,20150401,19.8,20.55,18.9,19.4,1956889,0
ELECTHERM,20150401,25.9,25.9,22.2,24,14611,0
ELGIEQUIP,20150401,147.5,150.4,146.4,150,9475,0
....
ZENITH, 20150401,...
I use EdChum code from his comment and add some clarification. I think the main problem is d is output dataframe d cannot be looped in cycle for, if you need one output from all *.txt files.
import pandas as pd
import glob
def get_historical_closes(ticker, start_date, end_date):
dfs = []
#create empty df for output
d = pd.DataFrame()
#glob can use path with *.txt - see http://stackoverflow.com/a/3215392/2901002
for files in glob.glob('/home/manish/Desktop/Equity/subset/*.txt'):
#added index_col for multiindex df
dfs.append(pd.read_csv(files, index_col=['Date', 'Ticker', 'Close'], names = ['Ticker', 'Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Null'], parse_dates = [1]))
p = pd.concat(dfs)
#d is output from all .txt files, so cannot be looped in cycle for
d = p.reset_index(['Date', 'Ticker', 'Close'])
d = d[(d['Ticker'].isin(ticker)) & (d['Date'] > start_date) & (d['Date'] < end_date)]
pivoted = d.pivot_table(index = ['Date'], columns =['Ticker'])
pivoted.columns = pivoted.columns.droplevel(0)
return pivoted
#function isin need list of columns, so 'or' can be replaced by ','
#arguments are changed for testing: 'HDFC' to 'AGCNET' and end_date '2014-12-31' to '2015-12-31'
closes = get_historical_closes(['LT','AGCNET','ACC'], '1999-01-01', '2015-12-31')
print closes

Categories