We are getting this below error while migrating the data from slack channel to a file, when we execute the script for fetching the data for one day, it executing perfectly.
But when we execute the script for 2 months data, it gives 10 days data in separate file but getting throwing an error on particular date. It might be possible that the source data on slack is bit different from expected
Traceback (most recent call last):
File "C:\Users\Slack SCript\script.py", line 218, in <module>
main()
File "C:\Users\Slack SCript\script.py", line 201, in main
parse(message['text'])
File "C:\Users\Slack SCript\script.py", line 114, in parse
size = float(elements[1])
ValueError: could not convert string to float:
As per the source data we found that some value is 0 maybe the error we got because of this value. is there any way to skip or continue future.
from slackclient import SlackClient
import time
import os
import sys
import datetime
from dateutil.relativedelta import relativedelta
servers = ("fd2a", "ff1a", "hh3b", "kw1a", "kw1b", "lo8a", "os5a", "os5b", "sg2a", "sg2b", 'sy1a', 'va1a', 'va1b')
types = ("", "nfs", "cluster")
currser = "d"
currtype = ""
used = {}
total = {}
available = {}
ts = 0
dir_name = "data"
def savedata(dir_path, filename, data):
f = open(dir_path + filename, "w") # opens file with name of "test.txt"
print(dir_path + filename)
f.write(data)
f.close()
def reset_data():
print("datareset")
for i in range(0, len(servers)):
for j in range(0, len(types)):
used[servers[i] + types[j]] = 0
total[servers[i] + types[j]] = 0
available[servers[i] + types[j]] = 0
def write_data(ts):
datastr = ''
global used
global total
ttotaltotalsum = 0
for j in range(0, len(types)):
datastr += types[j] + '\n'
datastr += "Name\t" + "Region\t" + "total(TB)\t" + "used(TB)\t" + "available(TB)\t" + "Used(%)\n"
for i in range(0, len(servers)):
tused = used[servers[i] + types[j]]
ttotal = total[servers[i] + types[j]]
ttotaltotalsum += ttotal
if (ttotal != 0):
datastr += (
servers[i][0:len(servers[i]) - 1] + "\t\t" +
servers[i][len(servers[i]) - 1] + "\t\t" +
"{:.1f}".format(ttotal / 1024) + " \t\t" +
"{:.1f}".format(tused / 1024) + " \t\t" +
"{:.1f}".format((ttotal - tused) / 1024) +"\t\t"+
"{:.1f}".format(tused / ttotal * 100) + " \t\t" +
" \n")
print("..")
if (ttotaltotalsum > 0):
hour= datetime.datetime.fromtimestamp(int(ts)).hour
day= datetime.datetime.fromtimestamp(int(ts)).day
month= datetime.datetime.fromtimestamp(int(ts)).month
year=datetime.datetime.fromtimestamp(int(ts)).year
if hour < 12:
savedata("data/", "Storage-Update-M-" +
str(day) + "-" +
str(month) + "-" +
str(year) + ".txt", datastr)
else:
savedata("data/", "Storage-Update-E-" +
str(day) + "-" +
str(month) + "-" +
str(year) + ".txt", datastr)
def parse(text):
global currser
global currtype
global used
global total
global available
global ts
content = text.split("\n")
for line in content:
line = line[:len(line)]
if line.__contains__("Netapp Cluster"):
for server in servers:
if line.__contains__(server):
currser = server
for type in types:
if line.__contains__(type):
currtype = type
# print(line)
if line.__contains__("Total available capacity"):
# print(line)
# print ("contains","Total available capacity------")
elements = line.split(":")
# print (elements)
size = float(elements[1])
# print(size)
total[currser + currtype] += size
# print(size,"TOTAL capacity",total)
elif line.__contains__("size provisioned"):
# print(line)
# print("contains", "Total LUN size provisioned------- ")
elements = line.split(":")
# print(elements)
size = float(elements[1])
# print(size)
used[currser + currtype] += size
# print(size, "Used", used)
# print( currser)
# print( currtype)
# print( used)
# print(total)
# print(available)
return (used, total)
def make_dir(dir_name):
if not os.path.exists(dir_name):
os.makedirs(dir_name)
def main():
slack_token = ""
channel_name = ''
time_on_last_message = time.time()
channel_id = ""
ts = 0.000
threshmins = 20
channels_call = SlackClient(slack_token).api_call("channels.list")
print(channels_call)
print(channels_call.keys())
for channel in channels_call["channels"]:
if channel["name"] == channel_name:
channel_id = channel["id"]
print(channel)
make_dir(dir_name)
print(channel_id)
reset_data()
time_since_last_update = time.time() - time_on_last_message
print("Waiting for new data....", time.time() - time_on_last_message)
if time_since_last_update > threshmins * 60:
write_data(ts)
reset_data()
sc = SlackClient(slack_token)
date_after_month = datetime.datetime.now() + relativedelta(months=-6)
date_after_month=date_after_month.timestamp()
while True:
breakflag=0
data = sc.api_call(
"channels.history",
channel=channel_id,
oldest=date_after_month,
count=1000,
)
if (data['ok'] == True):
messages = data['messages']
for message in reversed(messages):
# print(message['ts'])
if float(message['ts']) > ts:
print("difference=", float(message['ts']) - ts)
if float(message['ts']) - ts > (threshmins * 60):
print("greater diffrrece>reset................")
write_data(ts)
print(ts)
reset_data()
time_on_last_message = time.time()
ts = float(message['ts'])
parse(message['text'])
if (data["has_more"] == True):
print("has more")
date_after_month=message['ts']
else:
breakflag=1
else:
print("No data returned or error")
time.sleep(1) # in Seconds
if(breakflag==1):
break
main()
Based on the error message, elements[1] is empty. And Python cannot convert an empty string to float:
>>> float("")
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
ValueError: could not convert string to float:
The elements[1] element is a string that can't be parsed to a float. The easiest way would be to attach a debugger and investigate what is being parsed. Then change your code to parse it better.
The second easiest way would be to binary search for the record that makes it fail and fix your code to parse it better.
The totally absolutely preferred way would be to, when you found what the case was that your code didn't support, you would write a test that proves that that case was added:
def test_parse_xyz():
assert [("blablabla", None)] == parse(["blablabla: -certainly_not_a_float"])
These tests can automatically be detected by e.g. pytest:
$ pytest parser.py
I am using Python to download stock data from Yahoo. The download code is placed in csv_util.py.
The invoking script is mapper.py. After importing csv_util script, I get the following error:
Traceback (most recent call last):
('Lines:', [])
File "C:/Users/laurel.ts/Desktop/predictstock/mapper.py", line 56, in <module>
download_file_ticker(ticker,ref_ticker)
File "C:/Users/laurel.ts/Desktop/predictstock/mapper.py", line 53, in download_file_ticker
csv_util.download_csv_file_web(ticker,ref_ticker)
TypeError: unbound method download_csv_file_web() must be called with csv_util instance as first argument (got str instance instead)
Here is the code:
mapper.py
###### Mapper.py ######
import time
import sys
# New imports
import stock_predict_main_app
import predict_stock.csv_util
from predict_stock.csv_util import csv_util
predict_date = '03112016'
ticker = 'ARVIND.NS'
ref_ticker = 'MSFT'
input_default_values = {'numdays2predict': 2,
'simulations': 10,
'historicaldatalen': 0,
'tickersymbol': 'ARVIND.NS',
# 'tickersymbol': 'MSFT',
'stockdate2predict': predict_date,
'downloadstock': 1,
'plotshow': 0,
'industrytype': 'itindustry'}
# Pass ticker symbol and date
def pass_ticker_date(predict_date, input_default_values):
with open('tickerList.txt') as f:
lines = f.read().splitlines()
print(type(input_default_values))
tickersymbol = input_default_values["tickersymbol"]
print("Lines:", lines)
for tickersymbol in lines:
print("tickersymbol:", tickersymbol)
stock_predict_main_app.test_predict(tickersymbol)
# Download the file based on the ticker symbol
def download_file_ticker(ticker,ref_ticker):
# ticker= input_default_values["tickersymbol"]
# ref_ticker = input_default_values["tickersymbol"]
csv_util.download_csv_file_web(ticker,ref_ticker)
pass_ticker_date(predict_date, input_default_values)
download_file_ticker(ticker,ref_ticker)
csv_util.py
import logging
import csv
import urllib
import datetime
import numpy as np
import pandas as pd
import sys
import os
from collections import defaultdict
###custom local modules###
from datetime_util import datetime_util
from loggingpy import loggingpy
global stock_input_file,stock_input_ref_file
global loggingpy_obj,datetime_util_obj
global actual_stock_price, output_stock_price
class csv_util:
def __init__(self,actual_stock_price,output_stock_price,download_flag):
print("Class anme: __init__",self.__class__.__name__)
self.stock_input_file=""
self.stock_input_ref_file = ""
self.actual_stock_price = actual_stock_price
self.output_stock_price = output_stock_price
self.download_flag=download_flag
self.datetime_util_obj = datetime_util()
self.loggingpy_obj = loggingpy()
#datetime_util_obj = self.datetime_util_obj
#loggingpy_obj=self.loggingpy_obj
'''
METHOD: prepare_actual_data2writecsv
'''
def prepare_actual_data2writecsv(self, predict_date_wise_data_wd, predict_datewise_data_wod):
logging.info("<<prepare_actual_data2writecsv")
temp_date_ary = []
temp_date_ary = self.actual_stock_price['date']
temp_closeprice_ary = self.actual_stock_price['closeprice']
temp_dailyreturn_ary = self.actual_stock_price['dailyreturn']
# predicted_date_array = sorted(temp_predicted_values_wd.keys(),reverse=True)
# remove last element of array or appenda dummy 0 to daily returns
temp_date_ary.pop()
temp_closeprice_ary.pop()
# temp_dailyreturn_ary.append(0)
self.loggingpy_obj.log_func({'temp_date_ary': temp_date_ary, 'temp_closeprice_ary': temp_closeprice_ary,
'temp_dailyreturn_ary': temp_dailyreturn_ary})
np_column_ary = np.column_stack((temp_date_ary, temp_closeprice_ary, temp_dailyreturn_ary))
num_rows, num_columns = np_column_ary.shape
logging.info("np_column_ary:%s,Rowsdata %s,ColumnData %s", np_column_ary.size, np_column_ary[:][0],
np_column_ary[:, 0])
logging.info("NumRows:%d,Num Columns:%s", num_rows, num_columns)
counter = 0
for i in range(0, num_rows):
counter += 1
temp_temp_row_data = []
temp_row_data = np_column_ary[:][i]
temp_temp_row_data = list(temp_row_data)
temp_rowdate = temp_row_data[0]
logging.debug("[%d],Length:[%d],type:[%s],Date:%s,Rowsdata:%s", i, len(temp_row_data), type(temp_row_data),
temp_rowdate, temp_row_data)
predict_date_wise_data_wd[temp_rowdate] = (map(str, temp_temp_row_data))
predict_datewise_data_wod[temp_rowdate] = (map(str, temp_temp_row_data))
logging.info(">>prepare_actual_data2writecsv")
'''
METHOD: prepare_data2writecsv
'''
def prep_predicted_data2writecsv(self, predict_date_wise_data_wd, predict_datewise_data_wod):
logging.info("<<prep_predicted_data2writecsv")
temp_predicted_values_wd = self.actual_stock_price['predicted_vals_with_drift']
temp_predicted_values_wod = self.actual_stock_price['predicted_vals_without_drift']
self.actual_stock_price['meanwithdrift'] = []
self.actual_stock_price['meanwithoutdrift'] = []
temp_var = temp_predicted_values_wd.keys()
predicted_date_array = self.datetime_util_obj.sort_datettime_list(temp_var, False)
for eack_key in predicted_date_array:
logging.debug("WD:eack key:%s", eack_key)
temp_string_val_wd = []
temp_string_val_wod = []
temp_string_val_wd = temp_predicted_values_wd[eack_key]
temp_string_val_wod = temp_predicted_values_wod[eack_key]
mean_wd = np.mean(temp_string_val_wd)
mean_wod = np.mean(temp_string_val_wod)
# Store mean in global variable
self.actual_stock_price['meanwithdrift'].append(mean_wd)
self.actual_stock_price['meanwithoutdrift'].append(mean_wod)
logging.debug("meanwithdrift:%s,meanwithoutdrift:%s", mean_wd, mean_wod)
logging.debug("temp_string_val_wd:len:%d,type:%s", len(temp_string_val_wd), type(temp_string_val_wd))
logging.debug("temp_string_val_wd:len:%d,type:%s", len(temp_string_val_wod), type(temp_string_val_wod))
temp_string_wd = []
temp_string_wod = []
if not predict_datewise_data_wod.has_key(eack_key):
predict_datewise_data_wod[eack_key] = []
predict_date_wise_data_wd[eack_key] = []
temp_string_wd = [eack_key, "", ""]
temp_string_wod = [eack_key, "", ""]
temp_string_wd.append(mean_wd)
temp_string_wod.append(mean_wod)
temp_string_wd.extend(temp_string_val_wd)
temp_string_wod.extend(temp_string_val_wod)
logging.debug("temp_string_wd:len:%d,type:%s,Data:%s", len(temp_string_wd), type(temp_string_wd),
temp_string_wd)
logging.debug("temp_string_wod:len:%d,type:%s,Data:%s", len(temp_string_wod), type(temp_string_wod),
temp_string_wod)
predict_date_wise_data_wd[eack_key].extend(temp_string_wd)
predict_datewise_data_wod[eack_key].extend(temp_string_wod)
#self.loggingpy_obj.log_func({"temp_string_wd": temp_string_wd, "temp_string_wod": temp_string_wod})
logging.info(">>prepare_data2writecsv")
'''
METHOD: write2csvfile
Writes given data to the given csv absolute path filename
Input arguments: filename to be written, data to be written
'''
def write2csvfile(self,file_name,local_rows_data):
logging.info("<<:write2csvfile")
#output_data_path=self.loggingpy_obj.output_data_path
#os.chdir(output_data_path)
with open(file_name, 'w') as csv_fw:
out_csv_writer = csv.writer(csv_fw, lineterminator='\n')
out_csv_writer.writerows(local_rows_data)
logging.info("csv file[%s]writing :Sucess",file_name)
logging.info(">>:write2csvfile")
'''
# Prepare header list of columns to write to csv file
# Write predicted values to csv file
predicted_data_wod_date_val_ary
'''
##classmethod
#def write2csv_file(ticker_symbol):
def write2csv_file(self,ticker_symbol):
logging.info("<<:write2csv_file")
datetime_stamp=datetime.datetime.now().strftime("%Y%m%d_%H%M")
file_name="output_prediction_with_drift"+ticker_symbol+"_"+datetime_stamp+".csv"
file_name_wod = "output_prediction_without_drift" + ticker_symbol + "_" + datetime_stamp + ".csv"
file_name=self.loggingpy_obj.output_data_path + file_name
file_name_wod = self.loggingpy_obj.output_data_path + file_name_wod
column_headers=self.output_stock_price['column_headers']
#Prepare header list of columns to write to csv file;column_headers is a global variable
column_headers.insert(0,'Date')
column_headers.insert(1, 'Actual Prices')
column_headers.insert(2, 'Daily Return')
column_headers.insert(3, 'Mean')
logging.info("column_headers,len:%s,type:%s,data:%s", len(column_headers), type(column_headers), column_headers)
logging.info("self:column_headers,len:%s", len(self.output_stock_price['column_headers']))
predict_date_wise_data_wd = {}
predict_datewise_data_wod = {}
self.prepare_actual_data2writecsv(predict_date_wise_data_wd, predict_datewise_data_wod)
self.loggingpy_obj.log_func(
{"Before:predict data_wd": predict_date_wise_data_wd, "predict data_wod": predict_datewise_data_wod})
self.prep_predicted_data2writecsv(predict_date_wise_data_wd,predict_datewise_data_wod)
self.loggingpy_obj.log_func({"After:pred data_wd": predict_date_wise_data_wd, "pred data_wod": predict_datewise_data_wod})
temp_new_data_ary=predict_date_wise_data_wd.keys()
sorted_temp_new_data_ary = self.datetime_util_obj.sort_datettime_list(temp_new_data_ary,True)
self.loggingpy_obj.log_func({"sorted_temp_new_data_ary":sorted_temp_new_data_ary})
data2write2csv_wd = [column_headers]
data2write2csv_wod = [column_headers]
counter=1
# add headers
for each_key in sorted_temp_new_data_ary:
counter+=1
data2write2csv_wd.insert(counter, predict_date_wise_data_wd[each_key])
data2write2csv_wod.insert(counter,predict_datewise_data_wod[each_key])
self.write2csvfile(file_name, data2write2csv_wd)
self.write2csvfile(file_name_wod, data2write2csv_wod)
logging.debug("data2write2csv_wd:%s", repr(data2write2csv_wd))
logging.info("<<:write2csv_file")
#sys.exit()
# ######################### END OF METHOD write2csv_file ################################
'''
METHOD: read_csv_file
'''
##classmethod
def read_csv_file(self,file_name,ref_data_flag):
logging.debug("<<:read_csv_file")
logging.info("file_name,%s,",file_name)
if not os.path.exists(file_name):
logging.critical("File not found:Check!"+file_name)
sys.exit(2)
#actual_stock_price=self.actual_stock_price
logging.info("file_name,%s,", file_name)
data = pd.read_csv(file_name,parse_dates=False, infer_datetime_format=False,date_parser=None)
logging.info("self:::::::::::::%s",repr(self.datetime_util_obj));
logging.info("data columns,len:%d,type:%s,data:%s,",len(data.columns),type(data.columns),data.columns)
logging.info(",data.columns:%s",data.columns.values)
datetime_list_dmy = self.datetime_util_obj.convert2datettime_list(data['Date'].tolist())
#actual_stock_price=defaultdict();
actual_stock_price = self.actual_stock_price
if ref_data_flag == 1:
actual_stock_price['ref_data']={}
actual_stock_price['ref_data']['date'] = datetime_list_dmy
actual_stock_price['ref_data']['closeprice'] = data['Close'].tolist()
else:
actual_stock_price['date'] = datetime_list_dmy
actual_stock_price['closeprice'] = data['Close'].tolist()
self.loggingpy_obj.log_func({"datetime_list_dmy": datetime_list_dmy})
#logging.debug("repr self asp:%s",repr(self.actual_stock_price))
del data
logging.debug(">>:read_csv_file")
'''
METHOD: download_csv_file_web
Download stock data from web yahoofinance
'''
def download_csv_file_web(self,ticker_symbol,ref_ticker_symbol):
logging.debug("<<:download_csv_file_web")
input_data_path=self.loggingpy_obj.input_data_path
logging.info("input_data_path:%s:",input_data_path)
yahoo_url="http://real-chart.finance.yahoo.com/table.csv?s="
base_url=yahoo_url + ticker_symbol
base_url_ref = yahoo_url + ref_ticker_symbol
#datetime_stamp = datetime.datetime.now().strftime("%Y%m%d_%H%M")
datetime_stamp = datetime.datetime.now().strftime("%Y%m%d")
global stock_input_file,stock_input_ref_file
#print("File found1:", stock_input_file)
if self.download_flag == 1:
file_name = "stock_input_" + ticker_symbol + "_" + datetime_stamp + ".csv"
ref_file_name = "stock_ref_input_" + ref_ticker_symbol + "_" + datetime_stamp + ".csv"
stock_input_file = input_data_path + "\\" + file_name
stock_input_ref_file = input_data_path + "\\" + ref_file_name
self.download_file(stock_input_file,base_url)
self.download_file(stock_input_ref_file, base_url_ref)
else:
file_name = "stock_input_" + ticker_symbol + ".csv"
ref_file_name = "stock_ref_input_" + ref_ticker_symbol + ".csv"
stock_input_file = input_data_path + "\\" + file_name
stock_input_ref_file = input_data_path + "\\" + ref_file_name
if os.path.isfile(stock_input_file) and os.path.isfile(stock_input_ref_file):
logging.info("File found 3 :[%s],[%s]",stock_input_file,stock_input_ref_file)
print("File found3:",stock_input_file,stock_input_ref_file)
else:
print("File not found4:", stock_input_file,stock_input_ref_file)
logging.critical("File not found4![%s] or [%s]",stock_input_file,stock_input_ref_file)
sys.exit(2)
print("File found5:", stock_input_file,stock_input_ref_file)
logging.info("stock_input_file 5 :%s,base_url:%s,ref file name:[%s]", stock_input_file, base_url,stock_input_ref_file)
self.stock_input_file=stock_input_file
self.stock_input_ref_file=stock_input_ref_file
#sys.exit(2)
logging.debug(">>:download_csv_file_web")
'''
METHOD: download_file
Downlaod stock data from web yahoofinance
'''
def download_file(self,file_name,base_url):
logging.debug("<<:download_file")
try:
logging.info("Try Reading [:%s]",base_url)
status = urllib.urlretrieve(base_url, file_name)
logging.info("Status:%s", status)
urllib.urlcleanup()
if os.path.exists(file_name):
logging.info ("File exists, File download success!File"+file_name)
else:
logging.critical("File downloaded DOE NOT EXIST, exit.."+file_name)
sys.exit(2)
except urllib.ContentTooShortError as ctse:
print("File download: Failed, found some error")
logging.critical("File donwlaod failed from url:%s",base_url)
sys.exit(2)
#logfile_handle.write(ctse.content)
logging.debug(">>:download_file")
def download_read_csv_file(self,ticker_symbol, industry_type_ts):
logging.debug("<<:download_read_csv_file")
ref_data_flag=0
self.download_csv_file_web(ticker_symbol, industry_type_ts)
stock_input_file = self.stock_input_file
stock_input_ref_file = self.stock_input_ref_file
# download_csv_file_web("GLD")
ref_data_flag = 0
self.read_csv_file(stock_input_file, 0)
self.read_csv_file(stock_input_ref_file, 1)
#self.loggingpy_obj.log_func({"csv_util:actual_stock_price":self.actual_stock_price})
logging.debug(">>:download_read_csv_file")
loggingpy.py
import logging
import datetime
import os
import sys
global current_dir_path, input_data_path, output_data_path
class loggingpy:
def __init__(self):
loglevel=logging.INFO
self.loglevel=loglevel
self.log_config()
print("loggingpy - log __init__")
def log_config(self):
global current_dir_path, input_data_path, output_data_path
current_dir_path = os.getcwd()
input_data_path = current_dir_path + "\..\stock_data"
output_data_path = current_dir_path + "\..\stock_data"
if not os.path.exists(output_data_path):
os.mkdir(output_data_path)
print("current_dir_path:" + current_dir_path + ",input_data_path:" + input_data_path,
",output_data_path:" + output_data_path)
main_date_time_stamp = datetime.datetime.now().strftime("%Y%m%d_%H%M")
logfile = output_data_path + "\log_" + main_date_time_stamp + ".log"
#logging.basicConfig(stream=sys.stdout,level=logging.DEBUG, format='%(levelname)s:%(message)s')
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
#logging.basicConfig(stream=sys.stdout, level=self.loglevel)
#logging.basicConfig(filename=logfile, level=logging.INFO, format='%(levelname)s:%(message)s')
#console=logging.StreamHandler().setLevel(logging.DEBUG)
#logging.getLogger('abcd').addHandler(console)
self.input_data_path = input_data_path
self.output_data_path = output_data_path
self.current_dir_path = current_dir_path
logging.info("Logging test %s","HELLO TEST")
logging.info("current_dir_path:%s,input_data_path:%s,output_data_path:%s", current_dir_path,input_data_path,output_data_path)
def log_fn(self,temp_list):
for i in range(0, len(temp_list)):
log_level_set=logging.getLogger().getEffectiveLevel()
#print("log_level_set",log_level_set,logging.INFO,logging.DEBUG)
if log_level_set==logging.INFO:
logging.info(":len:%d,Type:%s", len(temp_list[i]), type(temp_list[i]))
if log_level_set == logging.DEBUG:
logging.debug("len:%d,Type:%sData:%s", len(temp_list[i]), type(temp_list[i]),temp_list[i])
def log_info(self,msg):
logging.info(msg)
def log_func(self,templog):
log_level_set = logging.getLogger().getEffectiveLevel()
#log_string_info = "Name:%s,len:%d,type:%s"
#log_string_debug = "Name:%s,len:%d,type:%s,Data:%s"
log_string_info = "Name:{0},len:{1},type:{2}"
log_string_debug = "Name:{0},len:{1},type:{2},Data:%{3}"
for var_name,var_value in templog.iteritems():
if log_level_set == logging.INFO:
#logging.info(""+log_string_info,var_name, len(var_value),type(var_value))
logging.info(log_string_info.format(var_name, len(var_value), type(var_value)))
if log_level_set == logging.DEBUG:
#logging.debug(""+log_string_debug,var_name, len(var_value), type(var_name),var_value)
logging.debug(log_string_debug.format(var_name, len(var_value), type(var_name), var_value))
What could be the reason for the error ?
import random
#get filename
name = input('Enter filename: ')
#load file
try:
input_file = open(name, 'r')
except IOError:
print('File does not exist. Program will terminate.')
#make key value
line = input_file.readline()
key = []
key_mix = []
for i in range(len(line)):
if line[i] not in key:
key.append(line[i])
for i in range(len(line)):
if line[i] not in key_mix:
key_mix.append(line[i])
random.shuffle(key_mix)
#encryption
if name.split('.')[1] == 'txt':
try:
key_file = open(name.split('.')[0] + '.key', 'w')
enc_file = open(name.split('.')[0] + '.enc', 'w')
except IOError:
print('File does not exist. Program will terminate.')
key_write = ['']
for g in range(len(key)):
key_write[0] += key_mix[g]
for i in range(len(key)):
keys = str(key[i]) + ',' + str(key_mix[i])
key_file.write(keys+'\n')
couple = {}
for k in range(len(key)):
couple[key[k]] = key_mix[k]
enc = ['']
for t in range(len(line)):
enc[0] += couple.get(line[t])
enc_file.write(enc[0])
input_file.close()
key_file.close()
enc_file.close()
#decryption
elif name.split('.')[1] == 'enc':
try:
key_file = open(name.split('.')[0] + '.key', 'r')
dec_file = open(name.split('.')[0] + '.txt', 'w')
except IOError:
print('File does not exist. Program will terminate.')
line = input_file.readline()
dec = ['']
sol = {}
while True:
sen = key_file.readline()
if not sen: break
sol.update({sen[2]:sen[0]})*Problem Here*
for m in range(len(line)):
dec[0] += sol.get(line[m])
dec_file.write(dec[0])
input_file.close()
key_file.close()
dec_file.close()
It makes error:
IndexError: string index out of range
and when I check my .key file, it comes like
t,o
h,l
e,s
r,h
i,t
s,r
,n
n,v
o,u
u,e
f,i
l,f
v,
but when I print readline, it comes like
t,o
(blank)
e,s
(blank)
i,t
(blank)
,n
(blank)
o,u
(blank)
f,i
(blank)
v,
(blank)
How can I fix it?
I'm running a piece of freely available python code used to detect CNVs in single cell sequencing data:
#!/usr/bin/env python
import sys
def main():
infilename = sys.argv[1]
outfilename = sys.argv[2]
statfilename = sys.argv[3]
chrominfo = ("/path/hg19.chrom.sizes.txt", 0)
bins = ("/path/hg19.bin.boundaries.50k.bowtie.k50.sorted.txt", 0)
INFILE = open(infilename, "r")
OUTFILE = open(outfilename, "w")
STATFILE = open(statfilename, "w")
binCounts = []
for i in range(len(bins)):
binCounts.append(0)
print len(binCounts)
print len(bins)
counter = 0
totalReads = 0
prevChrompos = ""
for x in INFILE:
arow = x.rstrip().split("\t")
thisChrom = arow[2]
thisChrompos = arow[3]
if thisChrom.find("_") > -1:
#print thisChrom
continue
if thisChrom == "chrM":
#print thisChrom
continue
if thisChrom == "":
continue
if chrominfo.has_key(thisChrom):
pass
else:
continue
totalReads += 1
thisChrominfo = chrominfo[thisChrom]
thisAbspos = long(thisChrompos) + long(thisChrominfo[2])
counter += 1
indexUp = len(bins) - 1
indexDown = 0
indexMid = int((indexUp - indexDown) / 2.0)
while True:
if thisAbspos >= long(bins[indexMid][2]):
indexDown = indexMid + 0
indexMid = int((indexUp - indexDown) / 2.0) + indexMid
else:
indexUp = indexMid + 0
indexMid = int((indexUp - indexDown) / 2.0) + indexDown
if indexUp - indexDown < 2:
break
binCounts[indexDown] += 1
prevChrompos = thisChrompos
for i in range(len(binCounts)):
thisRatio = float(binCounts[i]) / (float(counter) / float(len(bins)))
OUTFILE.write("\t".join(bins[i][0:3]))
OUTFILE.write("\t")
OUTFILE.write(str(binCounts[i]))
OUTFILE.write("\t")
OUTFILE.write(str(thisRatio))
OUTFILE.write("\n")
binCounts.sort()
STATFILE.write("TotalReads\tMedianBinCount\n")
STATFILE.write(str(totalReads))
STATFILE.write("\t")
STATFILE.write(str(binCounts[len(bins)/2]))
STATFILE.write("\n")
INFILE.close()
OUTFILE.close()
STATFILE.close()
def fileToDictionary(inputFile, indexColumn):
input = open(inputFile, "r")
rd = dict()
# input.readline()
for x in input:
arow = x.rstrip().split("\t")
id = arow[indexColumn]
if rd.has_key(id):
#rd[id].append(arow)
print "duplicate knowngene id = " + id
print "arow = " + str(arow)
print "rd[id] = " + str(rd[id])
else:
rd[id] = arow
input.close()
return(rd)
def fileToArray(inputFile, skipFirst):
input = open(inputFile, "r")
ra = []
for i in range(skipFirst):
input.readline()
for x in input:
arow = x.rstrip().split("\t")
ra.append(arow)
input.close()
return(ra)
if __name__ == "__main__":
main()
I'm getting an error on line 40:
Traceback (most recent call last):
File "/path/varbin.50k.sam.py", line 129, in <module>
main()
File "/path/varbin.50k.sam.py", line 40, in main
**if chrominfo.has_key(thisChrom):
AttributeError: 'tuple' object has no attribute 'has_key'**
I don't work regularly in Python, can someone offer a suggestion?
Where do I begin?
Your code is expecting a dictionary and getting a tuple. I think you've missed a step: You need to change
chrominfo = ("/path/hg19.chrom.sizes.txt", 0)
To
chrominfo = fileToDictionary("/path/hg19.chrom.sizes.txt", 0)
Note also that if dict.has_key(key) has been deprecated in favour of if key in dict.keys()
I have seen a couple of question related to my issue but haven't been able to get an answer.
In my program I have a .txt file that needs to be converted to PDF.
I came across this script that does the same, https://code.activestate.com/recipes/189858-python-text-to-pdf-converter/
I have imported this into my program, but I am not sure how to call and pass my txt file so that it converts it to PDF.
.txt to .pdf converter script name is txttopdf.py i have imported it as import txttopdf and it is present in the same directory
Last part of my program is trying to convert the .txt to .pdf but it gives me a Syntax error.
Below is my program
import sqlite3
import platform
import sys
import os
import re
import time
import smtplib
import mimetypes
import txttopdf
from datetime import datetime
from email.mime.multipart import MIMEMultipart
from email import encoders
from email.message import Message
from email.mime.text import MIMEText
ipstr = "unknown"
errorstr = "unknown"
gtstr = "unknown"
print "reading the file"
linuxpath = raw_input("Enter the path")
txt_file = open(linuxpath,"r")
countlines = 0
if os.stat("lastline.txt").st_size == 0:
for line in open(linuxpath):
pattern = re.compile('(([2][5][0-5]\.)|([2][0-4][0-9]\.)|([0-1]?[0-9]?[0-9]\.)){3}'+'(([2][5][0-5])|([2][0-4][0-9])|([0-1]?[0-9]?[0-9]))|[\d.]+|\:\:\d|[\w\.]+')
#([\d.]+)[\s-]+\s+"([A-Z]+)\s+(.+?)"\s+([\s\d]+)')\[([\d\/A-Za-z: -]+)\]
iprgex = pattern.search(line)
#print "hi"
countlines = countlines + 1
if iprgex:
ips = iprgex.start()
ipe = iprgex.end()
ipstr = line[ips:ipe]
#print "hi again"
#print ipstr
pattern = re.compile('[\d]+\/[A-Za-z]+\/[\d]+')
#('\[([\d\/A-Za-z: -]+)\]')
datergex = pattern.search(line)
#print "hi"
if datergex:
dates = datergex.start()
datee = datergex.end()
datestr = line[dates:datee]
#countlines = countlines + 1
#print "hi again"
#print datestr
monthstr = datestr[3:6]
#print monthstr
if monthstr == "Jan":
date_chnge = datestr.replace("Jan","01")
elif monthstr == "Feb":
date_chnge = datestr.replace("Feb","02")
elif monthstr == "Mar":
date_chnge = datestr.replace("Mar","03")
#print "am here"
#print datestr
elif monthstr == "Apr":
date_chnge = datestr.replace("Apr","04")
elif monthstr == "May":
date_chnge = datestr.replace("May","05")
elif monthstr == "Jun":
date_chnge = datestr.replace("Jun","06")
elif monthstr == "Jul":
date_chnge = datestr.replace("Jul","07")
elif monthstr == "Aug":
date_chnge = datestr.replace("Aug","08")
elif monthstr == "Sep":
date_chnge = datestr.replace("Sep","09")
elif monthstr == "Oct":
date_chnge = datestr.replace("Oct","10")
elif monthstr == "Nov":
date_chnge = datestr.replace("Nov","11")
elif monthstr == "Dec":
date_chnge = datestr.replace("Dec","12")
#print date_chnge
dt_day = date_chnge[0:2]
dt_month = date_chnge[3:5]
dt_year = date_chnge[6:]
new_date = dt_year + '-' + dt_month + '-' + dt_day
pattern = re.compile('\:[\d]+\:[\d]+\:[\d]+')
#('\[([\d\/A-Za-z: -]+)\]')
timergex = pattern.search(line)
#print "hi"
if timergex:
times = timergex.start()
timee = timergex.end()
timestr = line[times:timee]
#countlines = countlines + 1
#print "hi again"
#print timestr
extract_time = timestr[1:]
datestring = new_date + ' ' + extract_time
dt = datetime.strptime(datestring, '%Y-%m-%d %H:%M:%S')
#print dt.year, dt.month, dt.day
pattern = re.compile('"([A-Z]+)\s+(.+?)"|"\-"')
getrgex = pattern.search(line)
#print line
if getrgex:
gts = getrgex.start()
gte = getrgex.end()
gtstr = line[gts:gte]
#countlines = countlines + 1
#print "hi again"
#print gtstr
pattern = re.compile('200|401|403|404|412|500|302')
errorrgex = pattern.search(line)
#print "hi"
if errorrgex:
errors = errorrgex.start()
errore = errorrgex.end()
errorstr = line[errors:errore]
#countlines = countlines + 1
#print "hi again"
#print errorstr
file = open('parse1.txt','a')
file.write(ipstr + datestr +timestr + gtstr + errorstr + "\n")
#Analysing the get request
print countlines
#print ipstr,dt,gtstr,errorstr
with open('ALLINONE.txt','r') as f:
for cheatsheetline in f:
indexvalue = gtstr.strip().find(cheatsheetline.strip())
#print gtstr
if indexvalue > 0:
#print indexvalue
file = open('CAUTION.txt','a')
file.write(ipstr + datestr +timestr + gtstr + errorstr + "\n")
#break
file.close()
lastlinefile = open('lastline.txt','w+')
lastlinefile.write(line)
#this part should convert the txt file CAUTION.txt to PDF
#txttopdf.main()
txttopdf CAUTION.txt
The easiest way to do this is via subprocess.Popen:
Example:
import sys
from subprocess import Popen, PIPE,, STDOUT
PYTEXT2PDF = "/path/to/pytext2pdf"
def convert(filename):
print("Converting {} to PDF".format(filename))
p = Popen(
[sys.executable, PYTEXT2PDF, filename],
stdout=PIPE, stderr=STDOUT
)
stdout, _ = p.communicate()
print(stdout)
convert("filename.txt")
By the looks of it; pyText2Pdf will convert the Text file to PDF and name the output file the same "basenaem" as the input file with the extension of .pdf.