Encountering error during loop while creating dataframe - python

I am currently working on world religions data and hoping to organise a dataframe which gives me ['name of country', 'country most adhered religion', 'number of adherence'], however, I encountered an error message. Below is my code.
'''
import pandas as pd
import geopandas
import matplotlib.pyplot as plt
import mapclassify
import pyproj
from pyproj import Proj
from matplotlib.patches import Ellipse, Polygon
import datetime
import numpy as np
countries = geopandas.read_file('../data/world/ne_admin_0_countries.geojson')
hse_size = pd.read_csv('../data/world/houseshold_size_2018.csv', skiprows=4, header=0)
rlgn_adhere = pd.read_csv('../data/world/WRP_national.csv', header=0)
religion_cat = []
rlgn_adhere_top = list(rlgn_adhere.columns.values)
for i in range(3,38):
religion_cat.append(rlgn_adhere_top[i])
country_rlgn_adhere = rlgn_adhere.groupby(['name'], as_index=False)
lastest_rlgn_adhere = country_rlgn_adhere['year'].max()
country_latest_adhere = lastest_rlgn_adhere.merge(rlgn_adhere, on=['year', 'name'], how='left')
col_latest_rlgn_pop = ['year', 'name'] + religion_cat
latest_rlgn_pop = country_latest_adhere[col_latest_rlgn_pop]
pop_rlgn = ''
pop_rlgn_cat_num = pd.DataFrame(columns=['name', 'Country Most Adhered Religion', 'Number of Adherence'])
for x in latest_rlgn_pop['name']:
maximum = 0
a = pd.DataFrame()
a = latest_rlgn_pop[latest_rlgn_pop['name'] == x]
for y in religion_cat:
b = pd.Series([])
b = a[str(y)]
print(b[0])
if np.invert(np.isnan(b[0])):
b = int(b[0])
if (b > maximum):
maximum = b
pop_rlgn = y
a.insert(0,"Number of Adherence", maximum)
a.insert(0,"Country Most Adhered Religion", pop_rlgn)
pop_rlgn_cat_num = pop_rlgn_cat_num.append(a[['name', 'Number of Adherence', 'Country Most Adhered Religion']],sort=True)
latest_rlgn_pop = pd.merge(latest_rlgn_pop, pop_rlgn_cat_num, on=['name'])
country_hse_size = hse_size.groupby(['Country or area'], as_index=False)
latest_size = country_hse_size['Reference date (dd/mm/yyyy)'].max()
avg_hse_size = hse_size[['Country or area', 'Reference date (dd/mm/yyyy)', 'Average household size (number of members)']]
country_latest_size = latest_size.merge(avg_hse_size, on=['Country or area','Reference date (dd/mm/yyyy)'], how='left')
country_latest_size = country_latest_size.dropna()
country_latest_size_unique = country_latest_size.groupby(['Country or area'], as_index=False)
country_latest_size_unique = country_latest_size_unique['Average household size (number of members)'].mean()
countries = countries[['ADMIN', 'geometry']]
countries.columns = ['Country or area', 'geometry']
countries_household_size = countries.merge(country_latest_size_unique, on='Country or area', how='left')
'''
In my nested for loop, when line 36 'print(b[0])' is ran the second time, an error message appeared in the console:
Traceback (most recent call last):
File "C:\Users\USER\Anaconda3\envs\MaCT\lib\site-packages\pandas\core\series.py", line 1068, in __getitem__
result = self.index.get_value(self, key)
File "C:\Users\USER\Anaconda3\envs\MaCT\lib\site-packages\pandas\core\indexes\base.py", line 4730, in get_value
return self._engine.get_value(s, k, tz=getattr(series.dtype, "tz", None))
File "pandas\_libs\index.pyx", line 80, in pandas._libs.index.IndexEngine.get_value
File "pandas\_libs\index.pyx", line 88, in pandas._libs.index.IndexEngine.get_value
File "pandas\_libs\index.pyx", line 131, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\hashtable_class_helper.pxi", line 992, in pandas._libs.hashtable.Int64HashTable.get_item
File "pandas\_libs\hashtable_class_helper.pxi", line 998, in pandas._libs.hashtable.Int64HashTable.get_item
KeyError: 0
I do not yet find a clue about this error message, could someone please help me with that? Thanks.
Here is the link to datasets I've been using:
Country_Household_Religions_Dataset

Related

Scan and find the keywords in the database from the csv file, then calculate the occurrence rate of other words

I need to find the presence rate/prevalence of words in a csv file separated by comma, for words next to a certain keyword on the line.
import pandas as pd
from elasticsearch import Elasticsearch
es = Elasticsearch("http://localhost:9200")
searchDB = pd.read_csv('')
searchDB = searchDB["AllKeywords"].str.split(', ')
searchDB = searchDB.explode()
df = pd.read_csv('') // keywords to look for
for i in range(len(df)):
keywordToSearch = df.loc[i, "H"]
res = es.search(index=searchDB["AllKeywords"], body={"from":0, "size":0, "query":{"match":{"sentence": df.loc[i, "H"]}}})
I am getting an error on the last lines I'm using Elasticsearch. Can you help me?
Traceback (most recent call last):
File "/Users//PycharmProjects/DataImp/venv/lib/python3.9/site-packages/pandas/core/indexes/base.py", line 3629, in get_loc
return self._engine.get_loc(casted_key)
File "pandas/_libs/index.pyx", line 136, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/index.pyx", line 144, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/index_class_helper.pxi", line 41, in pandas._libs.index.Int64Engine._check_type
KeyError: 'AllKeywords'
Seems like your error is at index=searchDB["AllKeywords"]
Clean up your variables
import pandas as pd
from elasticsearch import Elasticsearch
es = Elasticsearch("http://localhost:9200")
df = pd.read_csv('')
keywords = df["AllKeywords"].str.split(', ')
exploded_keywords = searchDB.explode()
for i in range(len(df)):
keywordToSearch = df.loc[i, "H"]
res = es.search(index=df["AllKeywords"], body={"from":0, "size":0, "query":{"match":{"sentence": keywordToSearch}}})

MemoryError when running python script on google cloud

I am trying to use the Google cloud to run a script that makes predictions for every line of a test.csv file. I use the cloud because it looks like Google Colab is going to take some time. However, when I run it there is a memory error:
(pre_env) mikempc3#instance-1:~$ python predictSales.py
Traceback (most recent call last):
File "predictSales.py", line 7, in <module>
sales = pd.read_csv("sales_train.csv")
File "/home/mikempc3/pre_env/lib/python3.5/site-packages/pandas/io/parsers.py", line 685, in parser_f
return _read(filepath_or_buffer, kwds)
File "/home/mikempc3/pre_env/lib/python3.5/site-packages/pandas/io/parsers.py", line 463, in _read
data = parser.read(nrows)
File "/home/mikempc3/pre_env/lib/python3.5/site-packages/pandas/io/parsers.py", line 1169, in read
df = DataFrame(col_dict, columns=columns, index=index)
File "/home/mikempc3/pre_env/lib/python3.5/site-packages/pandas/core/frame.py", line 411, in __init__
mgr = init_dict(data, index, columns, dtype=dtype)
File "/home/mikempc3/pre_env/lib/python3.5/site-packages/pandas/core/internals/construction.py", line 257, in init_dict
return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
File "/home/mikempc3/pre_env/lib/python3.5/site-packages/pandas/core/internals/construction.py", line 87, in arrays_to_mgr
return create_block_manager_from_arrays(arrays, arr_names, axes)
File "/home/mikempc3/pre_env/lib/python3.5/site-packages/pandas/core/internals/managers.py", line 1694, in create_block_manager_from_arrays
blocks = form_blocks(arrays, names, axes)
File "/home/mikempc3/pre_env/lib/python3.5/site-packages/pandas/core/internals/managers.py", line 1764, in form_blocks
int_blocks = _multi_blockify(items_dict["IntBlock"])
File "/home/mikempc3/pre_env/lib/python3.5/site-packages/pandas/core/internals/managers.py", line 1846, in _multi_blockify
values, placement = _stack_arrays(list(tup_block), dtype)
File "/home/mikempc3/pre_env/lib/python3.5/site-packages/pandas/core/internals/managers.py", line 1874, in _stack_arrays
stacked = np.empty(shape, dtype=dtype)
MemoryError: Unable to allocate 67.2 MiB for an array with shape (3, 2935849) and data type int64
Here is my script:
import statsmodels.tsa.arima.model as smt
import pandas as pd
import datetime
import numpy as np
sales = pd.read_csv("sales_train.csv")
test = pd.read_csv("test.csv")
sales.date = sales.date.apply(lambda x: datetime.datetime.strptime(x, "%d.%m.%Y"))
sales_monthly = sales.groupby(
["date_block_num", "shop_id", "item_id"])["date", "item_price",
"item_cnt_day"].agg({
"date": ["min", "max"],
"item_price": "mean",
"item_cnt_day": "sum"})
array = []
for i, row in test.iterrows():
print("row['shop_id']: ", row['shop_id'], " row['item_id']: ", row['item_id'])
print(statsmodels.__version__)
ts = pd.DataFrame(sales_monthly.loc[pd.IndexSlice[:, [row['shop_id']], [row['item_id']]], :]['item_price'].values *
sales_monthly.loc[pd.IndexSlice[:, [row['shop_id']], [row['item_id']]], :][
'item_cnt_day'].values).T.iloc[0]
print(ts.values)
if ts.values != [] and len(ts.values) > 2:
best_aic = np.inf
best_order = None
best_model = None
ranges = range(1, 5)
for difference in ranges:
# try:
tmp_model = smt.ARIMA(ts.values, order=(0, 1, 0), trend='t').fit()
tmp_aic = tmp_model.aic
if tmp_aic < best_aic:
best_aic = tmp_aic
best_difference = difference
best_model = tmp_model
# except Exception as e:
# print(e)
# continue
if best_model is not None:
y_hat = best_model.forecast()[0]
if y_hat < 0:
y_hat = 0
else:
y_hat = 0
else:
y_hat = 0
print("predicted:", y_hat)
d = {'id': row['ID'], 'item_cnt_month': y_hat}
array.append(d)
print("-------------------")
df = pd.DataFrame(array)
df.to_csv("submission.csv")
You can use the Fil memory profiler (https://pythonspeed.com/fil) to figure out which lines of code are responsible for peak memory use. It will also handle out-of-memory conditions and dump a report when you run out.
Only caveat is (1) it require Python 3.6 or later and (2) will only run on Linux or macOS. We're up to 3.9 so probably time to upgrade regardless.

Problems with CSV | Stock Price Manipulation

everyone! I'm going through this course and am having issues. The line I'm having problems with is
df[f'{ticker}_{i}d'] = (df[ticker].shift(-i) - df[ticker]) / df[ticker]
You can find this in the def process_data_for_labels(ticker): function. Can anyone tell me what's going on? I copied his code exactly and am getting the same error.
import bs4 as bs
import requests
import pickle
import datetime as dt
import os
import pandas as pd
import pandas_datareader. data as web
import time
import matplotlib.pyplot as plt
from matplotlib import style
import numpy as np
from collections import Counter
style.use('dark_background')
def save_sp500_tickers():
resp = requests.get('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
soup = bs.BeautifulSoup(resp.text, 'lxml')
table = soup.find('table', {'class':'wikitable sortable'})
tickers = []
for row in table.findAll('tr')[1:]:
ticker = row.findAll('td')[0].text
tickers.append(ticker.rstrip())
with open("sp500tickers.pickle", "wb") as f:
pickle.dump(tickers, f)
print(tickers)
return tickers
#save_sp500_tickers()
def get_data_from_yahoo(reload_sp500=False):
if reload_sp500:
tickers = save_sp500_tickers()
else:
with open("sp500tickers.pickle", "rb") as f:
tickers = pickle.load(f)
if not os.path.exists('stock_dfs'):
os.makedirs('stock_dfs')
start = dt.datetime(2015, 1, 1)
end = dt.datetime(2020, 7, 1)
for ticker in tickers:
if not os.path.exists('stock_dfs/{ticker}.csv'):
if '.' in ticker:
ticker = ticker.replace('.', '-')
time.sleep(1)
print(ticker)
df = web.DataReader(ticker, 'yahoo', start, end)
df.to_csv('stock_dfs/{}.csv'.format(ticker))
else:
print(f'Already have {ticker}')
#get_data_from_yahoo()
def compile_data():
with open("sp500tickers.pickle","rb") as f:
tickers = pickle.load(f)
main_df = pd.DataFrame()
for count,ticker in enumerate(tickers):
if '.' in ticker:
ticker = ticker.replace('.', '-')
df = pd.read_csv(f'stock_dfs/{ticker}.csv')
df.set_index('Date', inplace=True)
df.rename(columns={'Adj Close':ticker}, inplace=True)
df.drop(['Open','High','Low','Close','Volume'],1,inplace=True)
if main_df.empty:
main_df = df
else:
main_df = main_df.join(df, how='outer')
if count % 10 == 0:
print(count)
print(main_df.head())
main_df.to_csv('sp500_joined_closes.csv')
#compile_data()
def visualize_data():
df = pd.read_csv('sp500_joined_closes.csv')
#df['AAPL'].plot()
#plt.show()
df_corr = df.corr()
print(df_corr.head())
data = df_corr.values
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
heatmap = ax.pcolor(data, cmap=plt.cm.RdYlGn)
fig.colorbar(heatmap)
ax.set_xticks(np.arange(data.shape[0]) + 0.5, minor=False)
ax.set_yticks(np.arange(data.shape[1]) + 0.5, minor=False)
ax.invert_yaxis()
ax.xaxis.tick_top()
column_labels = df_corr.columns
row_labels = df_corr.index
ax.set_xticklabels(column_labels)
ax.set_yticklabels(row_labels)
plt.xticks(rotation=90)
heatmap.set_clim(-1, 1)
plt.tight_layout()
plt.show()
#visualize_data()
# Machine Learning
def process_data_for_labels(ticker):
hm_days = 7
df = pd.read_csv('sp500_joined_closes.csv', index_col=0)
tickers = df.columns.values.tolist()
df.fillna(0,inplace=True)
for i in range(1, hm_days+1):
df[f'{ticker}_{i}d'] = (df[ticker].shift(-i) - df[ticker]) / df[ticker]
df.fillna(0, inplace=True)
return tickers, df
def buy_sell_hold(*args):
cols = [c for c in args]
requirement = 0.2
for col in cols:
if col > requirement:
return 1
if col < -requirement:
return -1
return 0
def extract_featuresets(ticker):
tickers, df = process_data_for_labels(ticker)
end = [eval(f"df[f'{ticker}_{i}']") for i in range(1, 8)]
df[f'{ticker}_target'] = list(map(
buy_sell_hold,
[exec(f"df[f'{ticker}_{i}']") for i in range(1, 8)]
))
vals = df[f'{ticker}_target'].values.tolist()
str_vals = [str(i) for i in vals]
print('Data spread: ', Counter(str_vals))
df.fillna(0, inplace=True)
df = df.replace([np.inf, -np.inf], np.nan)
df.dropna(inplace=True)
df_vals = df[[ticker for ticker in tickers]].pct_change()
df_vals = df_vals.replace([np.inf, -np.inf], 0)
df_vals.fillna(0, inplace=True)
X = df_vals.values
y = df['{ticker}_target'].values
return X,y,df
extract_featuresets('APPL')
Error:
Traceback (most recent call last):
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py", line 2646, in get_loc
return self._engine.get_loc(key)
File "pandas\_libs\index.pyx", line 111, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\index.pyx", line 138, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\hashtable_class_helper.pxi", line 1618, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas\_libs\hashtable_class_helper.pxi", line 1626, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'APPL'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "test.py", line 176, in <module>
extract_featuresets('APPL')
File "test.py", line 152, in extract_featuresets
tickers, df = process_data_for_labels(ticker)
File "test.py", line 132, in process_data_for_labels
df[f'{ticker}_{i}d'] = (df[ticker].shift(-i) - df[ticker]) / df[ticker]
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py", line 2800, in __getitem__
indexer = self.columns.get_loc(key)
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py", line 2648, in get_loc
return self._engine.get_loc(self._maybe_cast_indexer(key))
File "pandas\_libs\index.pyx", line 111, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\index.pyx", line 138, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\hashtable_class_helper.pxi", line 1618, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas\_libs\hashtable_class_helper.pxi", line 1626, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'APPL'
You've identified correctly where the problem begins but you need to continue to follow the breadcrumbs.
The error says "KeyError: 'APPL'", where 'APPL' is one of the stock tickers and are column names / keys to your stock price dataframe 'df' (at least the program expects that). However in this instance, 'df' doesn't contain the key / header of 'APPL'. Maybe something went wrong when loading the data where 'pd.read_csv' is used to read the CSV file? Or maybe the file itself is missing data?
Try open a python terminal and simply load the CSV file, is it what you (or the program) would expect?
Keep digging!

pandas data frame KeyError oop

The purpose of this script is to read a csv file.
The file contains forex data.
The file has 7 columns Date, Time, Open, High, Low, Close and Volume, and around 600k rows.
After scraping the date and time the script must will make some date time calculation like month and day.
Then some technical analysis using TA-LIB library.
Here is the code:
import pandas as pd
import talib
class Data:
def __init__(self):
self.df = pd.DataFrame()
self.names = ['Date', 'Time', 'Open', 'High', 'Low', 'Close', 'Volume']
self.open = self.df['Open'].astype(float)
self.high = self.df['High'].astype(float)
self.low = self.df['Low'].astype(float)
self.close = self.df['Close'].astype(float)
def file(self, file):
self.df = pd.read_csv(file, names=self.names,
parse_dates={'Release Date': ['Date', 'Time']})
return self.df
def date(self):
self.df['Release Date'] = pd.to_datetime(self.df['Release Date'])
def year(self):
self.df['year'] = pd.to_datetime(self.df['Release Date']).dt.year
def month(self):
self.df['year'] = pd.to_datetime(self.df['Release Date']).dt.month
def day(self):
self.df['year'] = pd.to_datetime(self.df['Release Date']).dt.day
def dema(self):
# DEMA - Double Exponential Moving Average
self.df['DEMA'] = talib.DEMA(self.close, timeperiod=30)
def ema(self):
# EMA - Exponential Moving Average
self.df['EMA'] = talib.EMA(self.close, timeperiod=30)
def HT_TRENDLINE(self):
# HT_TRENDLINE - Hilbert Transform - Instantaneous Trendline
self.df['HT_TRENDLINE '] = talib.HT_TRENDLINE(self.close)
def KAMA(self):
# KAMA - Kaufman Adaptive Moving Average
self.df['KAMA'] = talib.KAMA(self.close, timeperiod=30)
def ma(self):
# MA - Moving average
self.df['MA'] = talib.MA(self.close, timeperiod=30, matype=0)
def print(self):
return print(self.df.head())
x = Data()
x.file(r"D:\Projects\Project Forex\USDJPY.csv")
x.print()
Here is the error:
Traceback (most recent call last):
File "C:\Users\Sayed\miniconda3\lib\site-packages\pandas\core\indexes\base.py", line 2646, in get_loc
return self._engine.get_loc(key)
File "pandas\_libs\index.pyx", line 111, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\index.pyx", line 138, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\hashtable_class_helper.pxi", line 1619, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas\_libs\hashtable_class_helper.pxi", line 1627, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'Open'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:/Users/Sayed/PycharmProjects/project/Technical Analysis.py", line 55, in <module>
x = Data()
File "C:/Users/Sayed/PycharmProjects/project/Technical Analysis.py", line 9, in __init__
self.open = self.df['Open'].astype(float)
File "C:\Users\Sayed\miniconda3\lib\site-packages\pandas\core\frame.py", line 2800, in __getitem__
indexer = self.columns.get_loc(key)
File "C:\Users\Sayed\miniconda3\lib\site-packages\pandas\core\indexes\base.py", line 2648, in get_loc
return self._engine.get_loc(self._maybe_cast_indexer(key))
File "pandas\_libs\index.pyx", line 111, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\index.pyx", line 138, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\hashtable_class_helper.pxi", line 1619, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas\_libs\hashtable_class_helper.pxi", line 1627, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'Open'
In the __init__ function you are initializing empty DataFrame without any columns. But 1 line after, you are trying to convert Open column of the DataFrame to float.
def __init__(self):
self.df = pd.DataFrame() # No columns
self.names = ['Date', 'Time', 'Open', 'High', 'Low', 'Close', 'Volume']
self.open = self.df['Open'].astype(float) # ERROR: 'Open' column does not exist
self.high = self.df['High'].astype(float)
self.low = self.df['Low'].astype(float)
self.close = self.df['Close'].astype(float)
Change you init function to this and it should work!
def __init__(self):
self.names = ['Date', 'Time', 'Open', 'High', 'Low', 'Close', 'Volume']
self.df = pd.DataFrame(columns=self.names) # Empty dataframe with columns
self.open = self.df['Open'].astype(float) # Now 'Open' column exists
self.high = self.df['High'].astype(float)
self.low = self.df['Low'].astype(float)
self.close = self.df['Close'].astype(float)

In Python Pandas Can't add new column after datetime column

I cannot concatenate new columns to existing Pandas DataFrame if the last column of the existing DataFrame is in the type of datetime. Here is a minimal example:
import pandas as pd
import numpy as np
dates = [pd.Timestamp('2012-05-01'), pd.Timestamp('2012-05-02'),
pd.Timestamp('2012-05-03')]
ed = pd.DataFrame( dates, index = range(3), columns=['Time'])
ed['Time'] = ed['Time'].dt.tz_localize('UTC').dt.tz_convert('US/Central')
ed = pd.concat([ed, pd.DataFrame(columns = [ 'Column1', 'Column2']
)], sort = False)
Traceback (most recent call last):
File "C:\Anaconda2\lib\site-packages\IPython\core\interactiveshell.py", line 2878, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-2-c82182e48b5f>", line 8, in <module>
ed = pd.concat([ed, pd.DataFrame(columns = [ 'Column1', 'Column2'] )], sort = False)
File "C:\Anaconda2\lib\site-packages\pandas\core\reshape\concat.py", line 226, in concat
return op.get_result()
File "C:\Anaconda2\lib\site-packages\pandas\core\reshape\concat.py", line 423, in get_result
copy=self.copy)
File "C:\Anaconda2\lib\site-packages\pandas\core\internals.py", line 5421, in concatenate_block_managers
concatenate_join_units(join_units, concat_axis, copy=copy),
File "C:\Anaconda2\lib\site-packages\pandas\core\internals.py", line 5565, in concatenate_join_units
for ju in join_units]
File "C:\Anaconda2\lib\site-packages\pandas\core\internals.py", line 5851, in get_reindexed_values
if not self.block._can_consolidate:
AttributeError: 'NoneType' object has no attribute '_can_consolidate'
what if you change the ed['Time'] column after concating ed to the new df?
dates = [pd.Timestamp('2012-05-01'), pd.Timestamp('2012-05-02'), pd.Timestamp('2012-05-03')]
ed = pd.DataFrame( dates, index = range(3), columns=['Time'])
ed['Time'] = ed['Time'].dt.tz_localize('UTC').dt.tz_convert('US/Central')
ed = pd.concat([ed, pd.DataFrame(columns = [ 'Column1', 'Column2'])], sort = False, axis=1)

Categories