How to check limit range from csv file in python script - python

I am trying to fetch some values from database and need to check some lower and upper limits of a variables which are store in a text file like this and they are separated by \t. the text file looks like
Variable lower_limit upper_limit
temperature 20 40
pressure 0 100
temperature2 0 30
temperature3 20 25
and the data in database looks like
usec temperature_data temperature2_data
1456411800 25 15
1456412400 45 25
1456413000 28 19
So i start with checking first whether the variable is in the text file, if yes then i would need to check the limits of that variable. until now i am only successful in verifying the name of the variable, but i am unable to check the limits.
my code is as follow
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import datetime as dt
import sys
import time
import datetime
import calendar
import numpy as np
import mysql.connector
import datetime
import numpy as np
import pandas as pd
import mysql.connector
from mysql.connector import errorcode
# starting day, month and year
start_day = dt.datetime(2016, 02, 25)
# total number of dates to visualize
number = 11
num_total = 11
# enter limit range
upper_limit = 250 # these are hardcode values which i want to
replace and instead of hard code , i want to
check these limits values from the text file
lower_limit = 0
# start day in epoch time format
start_time = 1456411800
# variable name and filepath
filepath = '/home/robbyy/files/limit.txt'
vari_name = 'temperature2'
# database name, user and password details and query to fetch respective data
usr = 'roby'
password = 'xxxx'
db_name = 'roby_data'
insert_query = ("SELECT usec , temperature2_data "
"FROM rob_table WHERE usec >= %s "
"AND usec <= %s")
def generate_data():
num = num_total
cnx = mysql.connector.connect(user=usr, password=password,
database=db_name)
cursor = cnx.cursor()
query = insert_query
for i in range(number):
current_start_ts = (start_time + (i*86400))
current_day = datetime.datetime.fromtimestamp(current_start_ts)
# print 'cd: ', current_day
current_end_ts = (start_time + (i*86400)) + 86399
cursor.execute(query, (current_start_ts * 1000000,
current_end_ts * 1000000))
rows = cursor.fetchall()
rows_arr = np.array(rows)
# print 'rows all here li: ', rows
with open(filepath, 'r') as f:
limit_file = f.read()
limits = {}
for line in limit_file.splitlines():
print 'line to see:', line
variable, lower, upper = line.split()
if not variable == 'Variable':
limits[variable] = {'lower': int(lower),
'upper': int(upper)}
print 'limits: ', limits
if vari_name in data:
pass
if len(rows_arr) == 0:
continue
# print 'no data is here'
else:
for item, index in rows_arr:
if index >= upper_limit or index <= lower_limit:
print 'data exceeds limit: ', index
else:
continue
# print 'data is within range: ', index
else:
print 'sorry: this variable name is invalid'
start = start_day
dates = [start + dt.timedelta(days=i) for i in range(num)]
return dates
def main():
dates = generate_data()
main()
If someone helps me or guide me how to check the lower and upper limits from the text file for the required variable instead of giving hard coded values in the script. i would be grateful
thanks

just parse the limits file and for example create a dict out of it. Something like this.
def parse_limits(file):
with open(file, 'r') as f:
limit_file = f.read()
limits = {}
for line in limit_file.splitlines():
variable, lower, upper = line.split()
if not variable == 'Variable':
limits[variable] = {'lower': int(lower),
'upper': int(upper)}
return limits
That would result in a nested dict as follows:
{
'pressure': {'upper': 100, 'lower': 0},
'temperature2': {'upper': 30, 'lower': 0},
'temperature': {'upper': 40, 'lower': 20},
'temperature3': {'upper': 25, 'lower': 20}
}
Edit:
As requested your final code might look s.th. like this:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import datetime as dt
import sys
import time
import datetime
import calendar
import numpy as np
import mysql.connector
import datetime
import numpy as np
import pandas as pd
import mysql.connector
from mysql.connector import errorcode
# starting day, month and year
start_day = dt.datetime(2016, 02, 25)
# total number of dates to visualize
number = 11
num_total = 11
# enter limit range
upper_limit = 250 # these are hardcode values which i want to
replace and instead of hard code , i want to
check these limits values from the text file
lower_limit = 0
# start day in epoch time format
start_time = 1456411800
# variable name and filepath
filepath = '/home/robbyy/files/limit.txt'
vari_name = 'temperature2'
# database name, user and password details and query to fetch respective data
usr = 'roby'
password = 'xxxx'
db_name = 'roby_data'
insert_query = ("SELECT usec , temperature2_data "
"FROM rob_table WHERE usec >= %s "
"AND usec <= %s")
def parse_limits(file):
with open(file, 'r') as f:
limit_file = f.read()
limits = {}
for line in limit_file.splitlines():
variable, lower, upper = line.split()
if not variable == 'Variable':
limits[variable] = {'lower': int(lower),
'upper': int(upper)}
return limits
limits = parse_limits(filepath)
def generate_data():
num = num_total
cnx = mysql.connector.connect(user=usr, password=password,
database=db_name)
cursor = cnx.cursor()
query = insert_query
for i in range(number):
current_start_ts = (start_time + (i*86400))
current_day = datetime.datetime.fromtimestamp(current_start_ts)
# print 'cd: ', current_day
current_end_ts = (start_time + (i*86400)) + 86399
cursor.execute(query, (current_start_ts * 1000000,
current_end_ts * 1000000))
rows = cursor.fetchall()
rows_arr = np.array(rows)
# print 'rows all here li: ', rows
print 'limits: ', limits
if vari_name in data:
if len(rows_arr) == 0:
continue
# print 'no data is here'
else:
for item, index in rows_arr:
if index >= limits[vari_name]['upper'] or
index <= limits[vari_name]['lower']:
print 'data exceeds limit: ', index
else:
continue
# print 'data is within range: ', index
else:
print 'sorry: this variable name is invalid'
start = start_day
dates = [start + dt.timedelta(days=i) for i in range(num)]
return dates
def main():
dates = generate_data()
main()

Related

Add a column to a dataframe in Python

I am trying to add a few columns to a dataframe - here is the code
import import_ipynb
import talib
import numpy
import yfinance as yf
import datetime as dt
import time
from datetime import datetime, timedelta
import sqlite3
import pandas
import numpy as np
conn = sqlite3.connect('Strategy_RSI_MACD_Data.db')
c = conn.cursor()
c.execute("select distinct Stock from Universe")
tickers = c.fetchall()
for row in tickers:
if row[0]:
ticker_list.append(row[0])
stockdetails = yf.download(
tickers = ticker_list,
period = '6mo',
interval = '1d',
group_by = 'ticker',
auto_adjust = False,
prepost = False,
threads = True,
proxy = None
)
df_ta = pandas.DataFrame(data = stockdetails['Adj Close'], dtype=numpy.float64)
stockdetails['RSI'] = df_ta.apply(lambda c: talib.RSI(c, timeperiod = 14))
The last line is throwing this error:
ValueError: Wrong number of items passed 505, placement implies 1
How can I fix this?
Your lambda function is returning 505 values whereas your assignment should have just one. Try converting the output into a list-
stockdetails['RSI'] = [df_ta.apply(lambda c: talib.RSI(c, timeperiod = 14))]
I figured it out!! - I needed to insert a loop that would loop through the values:
for row in tickers:
c.execute("select [Adj Close] from StockData where Symbol = ? ", (row))
AdjClose = c.fetchall()
df_ta = pd.DataFrame(data = AdjClose, dtype=numpy.float64)
df_ta = df_ta.apply(lambda c: talib.RSI(c, timeperiod = 14))

How to compare datetime objects in single column of excel sheet using openpyxl?

I am attempting to create a python script to iterate over all the rows of a specific column in an excel spredsheet. This column contains dates, I need to compare each of these dates in order to find and return the oldest date in the excel sheet. After which, I will need to modify the data in that row.
I have tried to append the dates into a numpy array as datetime objects, this was working but I cannot traverse through the array and compare the dates. I have also tried to reformat the dates in the excel sheet to datetime objects in python and then compare but I get the following error:
AttributeError: type object 'datetime.datetime' has no attribute 'datetime'
I have tried some other unsuccessful methods. These are the ones where I got closest to achieving what I want. I'm quite lost, please help!
import openpyxl
import numpy as np
import datetime
def main():
wb = openpyxl.load_workbook("C:\\Users\\User\\Desktop\\Python Telecom Project.xlsx")
sheet = wb.active
def menuSelection():
while True:
menuChoice = input("Please select one of the following options:\n1. Add User\n2.Delete User\n3.Modify User\n")
if menuChoice not in ('1', '2', '3'):
print("The input entered is invalid, please try again")
continue
else:
break
return menuChoice
def findOldestDate():
wb = openpyxl.load_workbook("C:\\Users\\User\\Desktop\\Python Telecom Project.xlsx")
sheet = wb.active
## startMult = np.empty((0,1000), dtype='datetime64[D]')
## value = datetime.date.strftime("%Y-%m-%d")
for rowNum in range(2, sheet.max_row+1):
status = sheet.cell(row=rowNum, column=5).value
d8 = sheet.cell(row=rowNum, column=6).value
d8_2 = sheet.cell(row=rowNum+1, column=6).value
d8.value = datetime.date.strftime(d8, "%Y-%m-%d")
d8_2.value = datetime.date.strftime(d8_2, "%Y-%m-%d")
d8.number_format = 'YYYY MM DD'
d8_2.number_format = 'YYYY MM DD'
if d8 < d8_2:
oldestDate = d8
elif d8 > d8_2:
oldestDate = d8_2
else:
continue
return oldestDate
## array.append(startMult, date)
##
## while counter < len(array)-1:
##
## if array[counter] < array[counter + 1]:
##
## oldestDate = array[counter]
## counter += 1
##
## elif array[counter] > array[counter + 1]:
##
## oldestDate = array[counter + 1]
## counter += 1
##
## else:
## oldestDate = array[counter]
## continue
##
## return oldestDate
def addUser():
wb = openpyxl.load_workbook("C:\\Users\\User\\Desktop\\Python Telecom Project.xlsx")
sheet = wb.active
dateTimeObj = datetime.date.today()
print("Please enter the following information:\n")
inputName = input("Name: ")
inputNTID = input("NTID: ")
inputRATSID = input("RATSID: ")
inputStatus = input("Status: ")
inputTaskNum = input("Task #: ")
for rowVal in range(2, sheet.max_row+1):
oldestDate = findOldDate()
phoneNum = sheet.cell(row=rowVal, column=1).value
name = sheet.cell(row=rowVal, column=2).value
ntID = sheet.cell(row=rowVal, column=3).value
ratsID = sheet.cell(row=rowVal, column=4).value
status = sheet.cell(row=rowVal, column=5).value
date = sheet.cell(row=rowVal, column=6).value
if date == oldestDate:
name = inputName
ntID = inputNTID
ratsID = inputRATSID
status = inputStatus
date = dateTimeObj
print("\nChanges have been implemented successfully!")
##def deleteUser():
##
##
##
##def modifyUser():
addUser()
This is the current error message:
AttributeError: type object 'datetime.datetime' has no attribute 'datetime'
Prior to this one, I was getting:
can't compare 'str' to 'datetime'
What I want is the oldest date in the column to be returned from this function.
Finding the oldest date can be achieved with a one-liner like the following:
from datetime import datetime as dt
from re import match
def oldest(sheet, column):
"""
Returns the tuple (index, timestamp) of the oldest date in the given sheet at the given column.
"""
return min([(i, dt.strptime(sheet.cell(row=i, column=column).value, '%Y %m %d').timestamp()) for i in range(2, sheet.max_row+1) if isinstance(sheet.cell(row=i, column=column).value, str) and match(r'\d{4}\s\d{2}\s\d{2}', sheet.cell(row=i, column=column).value)], key=lambda x:x[1])
The longer, slower but more readable version follow:
def oldest(sheet, column):
"""
Returns the tuple (index, timestamp) of the oldest date in the given sheet at the given column.
"""
format = '%Y %m %d'
values = list()
for i in range(2, sheet.max_row+1):
if isinstance(sheet.cell(row=i, column=column).value, str) and match(r'\d{4}\s\d{2}\s\d{2}', sheet.cell(row=i, column=column).value):
values.append((i, dt.strptime(sheet.cell(row=i, column=column).value, format).timestamp()))
return min(values, key=lambda x: x[1])
If you need that, you can convert the retrieved timestamp back in the date format you had as shown in this sample session at the python REPL:
>>> row, timestamp = oldest(sheet, 1)
>>> date = dt.utcfromtimestamp(timestamp[1]).strftime('%Y %m %d')
>>> date
'2019 10 31'
>>> row
30

Python: invalid syntax: <string>, line 1, pos 16

I have developed a code in Python in which -in order to run the program- I need to take some arguments from the command line. But I am getting continuously the same error:
Traceback (most recent call last):
File "<string>", line 1, in <fragment>
invalid syntax: <string>, line 1, pos 16
I have the faintest idea what is wrong with my code. So, I present my code below in case someone could help me:
import QSTK.qstkutil.qsdateutil as du
import QSTK.qstkutil.tsutil as tsu
import QSTK.qstkutil.DataAccess as da
import datetime as dt
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import time
import math
import copy
import QSTK.qstkstudy.EventProfiler as ep
import csv
import sys
import argparse
def readData(li_startDate, li_endDate, ls_symbols):
#Create datetime objects for Start and End dates (STL)
dt_start = dt.datetime(li_startDate[0], li_startDate[1], li_startDate[2])
dt_end = dt.datetime(li_endDate[0], li_endDate[1], li_endDate[2])
#Initialize daily timestamp: closing prices, so timestamp should be hours=16 (STL)
dt_timeofday = dt.timedelta(hours=16)
#Get a list of trading days between the start and end dates (QSTK)
ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday)
#Create an object of the QSTK-dataaccess class with Yahoo as the source (QSTK)
c_dataobj = da.DataAccess('Yahoo', cachestalltime=0)
#Keys to be read from the data
ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close']
#Read the data and map it to ls_keys via dict() (i.e. Hash Table structure)
ldf_data = c_dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys)
d_data = dict(zip(ls_keys, ldf_data))
return [d_data, dt_start, dt_end, dt_timeofday, ldt_timestamps]
def marketsim(cash,orders_file,values_file):
orders = pd.read_csv(orders_file,index_col='Date',parse_dates=True,header=None)
ls_symbols = list(set(orders['X.4'].values))
df_lastrow = len(orders) - 1
dt_start = dt.datetime(orders.get_value(0, 'X.1'),orders.get_value(0, 'X.2'),orders.get_value(0, 'X.3'))
dt_end = dt.datetime(orders.get_value(df_lastrow, 'X.1'),orders.get_value(df_lastrow, 'X.2'),orders.get_value(df_lastrow, 'X.3') + 1 )
#d_data = readData(dt_start,dt_end,ls_symbols)
#Initialize daily timestamp: closing prices, so timestamp should be hours=16 (STL)
dt_timeofday = dt.timedelta(hours=16)
#Get a list of trading days between the start and end dates (QSTK)
ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday)
#Create an object of the QSTK-dataaccess class with Yahoo as the source (QSTK)
c_dataobj = da.DataAccess('Yahoo', cachestalltime=0)
#Keys to be read from the data
ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close']
#Read the data and map it to ls_keys via dict() (i.e. Hash Table structure)
df_data = c_dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys)
d_data = dict(zip(ls_keys, ldf_data))
ls_symbols.append("_CASH")
trades = pd.Dataframe(index=list(ldt_timestamps[0]),columns=list(ls_symbols))
current_cash = cash
trades["_CASH"][ldt_timestamps[0]] = current_cash
current_stocks = dict()
for symb in ls_symbols:
current_stocks[symb] = 0
trades[symb][ldt_timestamps[0]] = 0
for row in orders.iterrows():
row_data = row[1]
current_date = dt.datetime(row_data['X.1'],row_data['X.2'],row_data['X.3'],16)
symb = row_data['X.4']
stock_value = d_data['close'][symb][current_date]
stock_amount = row_data['X.6']
if row_data['X.5'] == "Buy":
current_cash = current_cash - (stock_value*stock_amount)
trades["_CASH"][current_date] = current_cash
current_stocks[symb] = current_stocks[symb] + stock_amount
trades[symb][current_date] = current_stocks[symb]
else:
current_cash = current_cash + (stock_value*stock_amount)
trades["_CASH"][current_date] = current_cash
current_stocks[symb] = current_stocks[symb] - stock_amount
trades[symb][current_date] = current_stocks[symb]
#trades.fillna(method='ffill',inplace=True)
#trades.fillna(method='bfill',inplace=False)
trades.fillna(0)
#alt_cash = current_cash
#alt_cash = trades.cumsum()
value_data = pd.Dataframe(index=list(ldt_timestamps),columns=list("V"))
value_data = value_data.fillna(0)
value_data = value_data.cumsum(axis=0)
for day in ldt_timestamps:
value = 0
for sym in ls_symbols:
if sym == "_CASH":
value = value + trades[sym][day]
else:
value = calue + trades[sym][day]*d_data['close'][sym][day]
value_data["V"][day] = value
fileout = open(values_file,"w")
for row in value_data.iterrows():
file_out.writelines(str(row[0].strftime('%Y,%m,%d')) + ", " + str(row[1]["V"].round()) + "\n" )
fileout.close()
def main(argv):
if len(sys.argv) != 3:
print "Invalid arguments for marketsim.py. It should be of the following syntax: marketsim.py orders_file.csv values_file.csv"
sys.exit(0)
#initial_cash = int (sys.argv[1])
initial_cash = 1000000
ordersFile = str(sys.argv[1])
valuesFile = str(sys.argv[2])
marketsim(initial_cash,ordersFile,valuesFile)
if __name__ == "__main__":
main(sys.argv[1:])
The input I gave to the command line was:
python marketsim.py orders.csv values.csv
I guess that the problem lies either into the imports or probably into the main function(incl. the if below the def main(argv)
I have to point out that the files orders.csv and values.csv exist and are located into the same folder.
I hope have made everything clear.
So, I am looking forward to reading your answers community-mates! :D
Thank you!

pandas does not append to df as it should of each line of iteration

I have a df that does not behave. Please help me train it!
I need for each of my iterations that goes through to the deepest nested 'if' statement (that satisfies all my requirements), to be appended do my df 'df_comp_KPIs'.
Why does this code not work?
Any ideas
import time
import urllib.request, urllib.error, urllib.parse
import pandas as pd
import csv
import urllib.request, urllib.error, urllib.parse
from bs4 import BeautifulSoup as bs4
start = time.time() # Start script timer
# Creating the df that will save my results in the Yahoo KPI iterations
#global df_comp_KPIs
df_comp_KPIs = pd.DataFrame() #columns = ('Ticker','Mark.Cap','PriceToBook','PEG5','TrailPE12Mo','DeptToEquit.')
ofInterest = ['AAN', 'ANF', 'ANCX', 'ACE', 'ATVI', 'AET', 'AGCO', 'ATSG', 'AWH', 'ALL', 'AFAM', 'ALJ']
evenBetter = []
# add some more to powers as necessary
powers = {'M': 10 ** 6, 'B': 10 ** 9, 'T': 10 ** 12}
# Convert the string from Market Cap col to float.
def stringNoToFloat(s):
try:
power = s[-1]
return float(s[:-1]) * powers[power]
except TypeError:
return s
#df.applymap(stringNoToFloat)
def yahoostats(ticker):
try:
print ('doing',ticker)
url = 'http://finance.yahoo.com/q/ks?s='+ticker
page = urllib.request.urlopen(url)
soup = bs4(page)
page.close()
# Lookup pbr and return the next 'td' tag-content
mcap = soup.find(text='Market Cap (intraday)').findNext('td').string
# print('Market Cap:', mcap)
pbr = float(soup.find(text='Price/Book (mrq):').findNext('td').string)
# print('Ticker %s, Price to book ratio: %1.2f' %(ticker, float(pbr))) # format 2 dig.
# print()
if float(pbr) < 3:
# print ('price to book ratio:',ticker,pbr)
PEG5 = float(soup.find(text='PEG Ratio (5 yr expected)').findNext('td').string)
# print('Ticker %s, PEG Ratio (5 yr expected): %1.2f' %(ticker, PEG5)) # format 2 dig.
if 0 < float(PEG5) < 3:
#print 'PEG forward 5 years',PEG5
DE = float(soup.find(text='Total Debt/Equity (mrq):').findNext('td').string)
# print('Ticker %s, Total Debt/Equity (mrq): %1.2f' %(ticker, DE)) # format 2 dig.
#
# #if 0 < float(DE) < 2:
#
PE12 = float(soup.find(text='Trailing P/E (ttm, intraday):').findNext('td').string)
# print ('Trailing PE (12mo):', PE12)
if float(PE12) < 15:
evenBetter.append(ticker)
df_comp_KPIs = df_comp_KPIs.append({'Ticker':ticker,'Mark.Cap':mcap,'PriceToBook':pbr,'PEG5':PEG5,'TrailPE12Mo':PE12,'DeptToEquit':DE}, ignore_index = True) #
df_comp_KPIs = df_comp_KPIs.sort(['PriceToBook','PEG5'], ascending=[1,1]) # , ignore_index=True
print('____________________________')
print('')
print(ticker,'meets requirements')
print('Market Cap (intraday):', mcap)
print('price to book:',pbr)
print('PEG forward 5 years',PEG5)
print('Trailing PE (12mo):',PE12)
print('Debt to Equity:',DE)
print('____________________________')
# saving ticker KPIs to csv #
df_comp_KPIs.to_csv('df_company_KPIs.csv')
except Exception as e:
print ('failed in the main loop:',str(e))
pass
return mcap, pbr, PEG5, PE12, DE
if __name__ == '__main__':
for eachticker in ofInterest:
yahoostats(eachticker)
# time.sleep(.05)
print(evenBetter)
print()
print('Company screener finished in %.1f seconds' %(time.time()-start))
# Convert string with MarketCap to float in one go on the mcap col in df
df_comp_KPIs['Mark.Cap'].applymap(stringNoToFloat)
OK. It appears I ended up with this error because of a missing global def of my df. So the code could not see the df created outside the function.
So the beffing of my function should look like this instead of the above:
def yahoostats(ticker):
global df_comp_KPIs
try:
print ('doing',ticker)
url = 'http://finance.yahoo.com/q/ks?s='+ticker
page = urllib.request.urlopen(url)
This solves the issue and I can alter, call, or do anything else with my df.
I found the answer here:
http://eli.thegreenplace.net/2011/05/15/understanding-unboundlocalerror-in-python

Data generation incomplete: Python random

I am trying to write a script to generate data. I am using random package for this. I execute the script and everything works fine. But when I check through the results, I found out that the script fails to generate the last 100+ rows for some reason.
Can someone suggest me why this could be happening?
from __future__ import print_function
from faker import Faker;
import random;
## Vaue declaration
population = 3;
product = 3;
years = 3;
months = 13;
days = 30;
tax= 3.5;
## Define Column Header
Column_Names = "Population_ID",";","Product_Name",";","Product_ID",";","Year",";",
"Month",";","Day","Quantity_sold",";","Sales_Price",";","Discount",
";","Actual_Sales_Price",tax;
## Function to generate sales related information
def sales_data():
for x in range(0,1):
quantity_sold = random.randint(5,20);
discount = random.choice(range(5,11));
sales_price = random.uniform(20,30);
return quantity_sold,round(sales_price,2),discount,round((sales_price)-(sales_price*discount)+(sales_price*tax));
## Format the month to quarter and return the value
def quarter(month):
if month >= 1 and month <= 3:
return "Q1";
elif month > 3 and month <= 6:
return "Q2";
elif month > 6 and month <= 9:
return "Q3";
else:
return "Q4";
## Generate product_id
def product_name():
str2 = "PROD";
sample2 = random.sample([1,2,3,4,5,6,7,8,9],5);
string_list = [];
for x in sample2:
string_list.append(str(x));
return (str2+''.join(string_list));
### Main starts here ###
result_log = open("C:/Users/Sangamesh.sangamad/Dropbox/Thesis/Data Preparation/GenData.csv",'w')
print (Column_Names, result_log);
### Loop and Generate Data ###
for pop in range(0,population):
pop = random.randint(55000,85000);
for prod_id in range(0,product):
product_name2 = product_name();
for year in range(1,years):
for month in range(1,months):
for day in range(1,31):
a = sales_data();
rows = str(pop)+";"+product_name2+";"+str(prod_id)+";"+str(year)+";"+str(month)+";"+quarter(month)+";"+str(day)+";"+str(a[0])+";"+str(a[1])+";"+str(a[2])+";"+str(tax)+";"+str(a[3]);
print(rows,file=result_log);
#print (rows);
tax = tax+1;
You need to close a file to have the buffers flushed:
result_log.close()
Better still, use the file object as a context manager and have the with statement close it for you when the block exits:
filename = "C:/Users/Sangamesh.sangamad/Dropbox/Thesis/Data Preparation/GenData.csv"
with result_log = open(filename, 'w'):
# code writing to result_log
Rather than manually writing strings with delimiters in between, you should really use the csv module:
import csv
# ..
column_names = (
"Population_ID", "Product_Name", "Product_ID", "Year",
"Month", "Day", "Quantity_sold", "Sales_Price", "Discount",
"Actual_Sales_Price", tax)
# ..
with result_log = open(filename, 'wb'):
writer = csv.writer(result_log, delimiter=';')
writer.writerow(column_names)
# looping
row = [pop, product_name2, prod_id, year, month, quarter(month), day,
a[0], a[1], a[2], tax, a[3]]
writer.writerow(row)

Categories