Related
C:\Python27\python.exe
C:/Users/Adekunle/PycharmProjects/d4jtest/newDp.py Traceback (most
recent call last): File
"C:/Users/Adekunle/PycharmProjects/d4jtest/newDp.py", line 166, in
<module>
main_run("C:\defect4j\TinyGP") File "C:/Users/Adekunle/PycharmProjects/d4jtest/newDp.py", line 151, in
main_run
Fitness, RawScore, Formula = main() File "C:/Users/Adekunle/PycharmProjects/d4jtest/newDp.py", line 130, in
main
halloffame=hof, verbose=True) File "C:\Python27\lib\site-packages\deap\algorithms.py", line 150, in
eaSimple
fitnesses = toolbox.map(toolbox.evaluate, invalid_ind) File "C:/Users/Adekunle/PycharmProjects/d4jtest/newDp.py", line 82, in
eval_func
spectrum = FaultVersionsDict[str(numberOfversion[version])] KeyError: '244'
Process finished with exit code 1
How can I solve this error? This is the full code:
import os
import sys
import operator
import numpy as np
import pandas as pd
import time
import pickle
import warnings
import random
import itertools
import random
from deap import algorithms
from deap import base
from deap import creator
from deap import tools
import numpy
from deap import gp
import glob
warnings.filterwarnings('ignore')
def datafile():
files = []
for filepath in glob.iglob(r'C:\defect4j\All single fault\*.txt'):
files.append(filepath)
return files
def readcsv():
for csvfiles in datafile():
nFaultVersion = len(datafile())
vFaultLocation = np.zeros(nFaultVersion)
vStatementCount = np.zeros(nFaultVersion)
i = 0
FaultVersionsDict = {}
nFirstFault = (pd.read_csv(csvfiles, sep=',', nrows=0)).columns[0]
df = pd.read_csv(csvfiles, skiprows=1, header=None).values
dset = list(list(float(elem) for elem in row) for row in df)
vFaultLocation[i] = nFirstFault
vStatementCount[i] = df.shape[0]
FaultVersionsDict[str(i)] = df
i = i + 1
return [vFaultLocation, vStatementCount, FaultVersionsDict]
def safeDiv(left, right):
try: return left / right
except ZeroDivisionError: return 0
pset = gp.PrimitiveSet("MAIN", 4)
pset.addPrimitive(numpy.add, 2, name="gp_add")
pset.addPrimitive(numpy.subtract, 2, name="gp-vsub")
pset.addPrimitive(numpy.multiply, 2, name="gp_mul")
pset.addPrimitive(numpy.sqrt, 1, name="gp_sqrt")
pset.addPrimitive(safeDiv, 2, name="gp_div")
pset.addPrimitive(numpy.negative, 1, name="gp_neg")
pset.addPrimitive(numpy.cos, 1, name="gp_cos")
pset.addPrimitive(numpy.sin, 1, name="gp_sin")
#pset.addEphemeralConstant("rand101", lambda: random.randint(-1, 1))
pset.addEphemeralConstant("rand",lambda: random.random() * 100)
pset.renameArguments(ARG0='EP', ARG1="EF", ARG2="NP", ARG3="NF")
creator.create("FitnessMin", base.Fitness, weights=(1.0,))
creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMin)
toolbox = base.Toolbox()
toolbox.register("expr", gp.genHalfAndHalf, pset=pset, min_=1, max_=2)
toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.expr)
toolbox.register("population", tools.initRepeat, gp.PrimitiveTree, toolbox.individual)
toolbox.register("compile", gp.compile, pset=pset)
def eval_func(individual):
#F = gp.compileADF(expr=individual, psets=pset)
F = toolbox.compile(expr=individual)
#F = gp.compile(expr=individual, pset=pset)
fit = []
for version in range(len(numberOfversion)):
#for version in numberOfversion:
spectrum = FaultVersionsDict[str(numberOfversion[version])]
EP = spectrum[:, 0]
EF = spectrum[:, 1]
NP = spectrum[:, 2]
NF = spectrum[:, 3]
#print(spectrum)
susp_v = eval(F)
sortedSusp_v = -np.sort(-susp_v)
faultLocation = int(vFaultLocation[numberOfversion[version]])
susForFault = susp_v[faultLocation]
tieCount = np.where(sortedSusp_v == susForFault)
# firstTie = tieCount[0].min() + 1 #zero-based
LastTie = tieCount[0].max() + 1 # the last index of a tie of faulty statement
faultPosinRank = LastTie
currentFit = 100 - (faultPosinRank / vStatementCount[numberOfversion[version]]) * 100
fit.append(currentFit)
# sortedIndexList = list(np.argsort(susp_v)[::-1])
# faultPosinRank = sortedIndexList.index(vFaultLocation[numberOfversion[version]])
# fit.append(100 - (faultPosinRank / vStatementCount[numberOfversion[version]]) * 100)
avgFiteness = np.mean(fit)
return avgFiteness
toolbox.register("evaluate", eval_func)
toolbox.register("select", tools.selAutomaticEpsilonLexicase)
toolbox.register("mate", gp.cxOnePoint)
toolbox.register("expr_mut", gp.genFull, min_=0, max_=2)
toolbox.register("mutate", gp.mutUniform, expr=toolbox.expr_mut, pset=pset)
toolbox.decorate("mate", gp.staticLimit(key=operator.attrgetter("height"), max_value=17))
toolbox.decorate("mutate", gp.staticLimit(key=operator.attrgetter("height"), max_value=17))
def main():
random.seed(318)
pop = toolbox.population(n=300)
hof = tools.HallOfFame(1)
stats_fit = tools.Statistics(lambda ind: ind.fitness.values)
stats_size = tools.Statistics(len)
mstats = tools.MultiStatistics(fitness=stats_fit, size=stats_size)
mstats.register("avg", numpy.mean)
mstats.register("std", numpy.std)
mstats.register("min", numpy.min)
mstats.register("max", numpy.max)
pop, log = algorithms.eaSimple(pop, toolbox, 0.5, 0.1, 40, stats=mstats,
halloffame=hof, verbose=True)
# print log
return pop, log, hof
def main_run(outputFolder):
if os.path.exists(outputFolder) is False:
os.mkdir(outputFolder)
outputFile_i = os.path.join(outputFolder, "formula.csv")
outputFile_v = os.path.join(outputFolder, "VersionSamples.csv")
outputFile_f = os.path.join(outputFolder, "Fiteness.csv")
file_v = open(outputFile_v, "w")
file_f = open(outputFile_f, "w")
with open(outputFile_i, "w") as file:
global numberOfversion
for i in range(30):
numberOfversion = random.sample(range(0, 255), 120)
#numberOfversion = random.sample(range(0, 92), 20) # randomly choose 20 samples from 92 faulty versions
file_v.write(str(numberOfversion) + "\n")
file_v.flush()
Fitness, RawScore, Formula = main()
file.write(Formula)
file_f.write(str(Fitness) + "," + str(RawScore) + "\n")
file_f.flush()
print(i)
file.flush()
file.close()
file_v.close()
if __name__ == '__main__':
global numberOfversion
#print(datafile())
vFaultLocation, vStatementCount, FaultVersionsDict = readcsv()
#print(readcsv())
main_run("C:\defect4j\TinyGP")
main()
#print(eval_func(individual=readcsv()))
I have made a UI in QtCreator 5. Then, I converted UI-file "Odor.ui" into "ui-main.py". Then I used Anaconda framework to write a computational part in Upiter Notebook. When, I compile this code
import os
import sys
import pandas as pd
import numpy
import scipy.stats
import xlsxwriter
import re
from PyQt5.QtWidgets import QApplication, QMainWindow, qApp, QWidget, QInputDialog, QLineEdit, QFileDialog, QMessageBox, QTableWidgetItem, QHeaderView
from PyQt5.QtCore import Qt, QEvent, QObject
from PyQt5.QtCore import pyqtSlot
from PyQt5 import QtGui
from ui_main import Ui_MainWindow
from os import walk
from os.path import expanduser as ospath
import glob
from time import gmtime, strftime
class MyMainWindow(QMainWindow, Ui_MainWindow):
def __init__(self, parent=None):
super(MyMainWindow, self).__init__(parent)
qApp.installEventFilter(self)
self.setupUi(self)
self.dataChooseBtn.clicked.connect(self.selectFile)
self.saveSample_btn.clicked.connect(self.saveSample)
self.data_processing.clicked.connect(self.process)
self.string_processing.clicked.connect(self.processString)
self.saveSampleString.clicked.connect(self.saveSampleStr)
self.refreshSample()
self.refreshSamples.clicked.connect(self.refreshSample)
self.refreshSample_2()
self.refreshSamples_2.clicked.connect(self.refreshSample_2)
self.show()
def eventFilter(self, obj, event):
if event.type() == QEvent.KeyPress:
if event.key() == Qt.Key_Escape:
self.close()
return super(MyMainWindow, self).eventFilter(obj, event)
#pyqtSlot()
def accept(self):
textboxValue = self.paramInput_field_2.text()
QMessageBox.information(self, 'Message', "Значения параметрического столбца: " + textboxValue, QMessageBox.Ok, QMessageBox.Ok)
def selectFile(self):
self.fileName = None
options = QFileDialog.Options()
options |= QFileDialog.DontUseNativeDialog
fileName, _ = QFileDialog.getOpenFileName(self,"Выберите стандартизированную выборку", "./Unprocessed samples","All Files (*);;Python Files (*.py)", options=options)
if fileName:
self.fileName = fileName
def process(self):
sample_param = pd.read_excel(self.fileName, header = None, sheetname = 1)
param = sample_param[0].tolist()
sample = pd.read_excel(self.fileName, sheetname = 0)
list_of_index = []
for i in range(len(sample.columns)):
sample2 = sample.iloc[:, lambda sample: [i]]
sample2 = sample2.columns[0]
list_of_index.append(sample2)
list_of_index
fulllist = []
for i in list_of_index:
sample3 = sample[i].tolist()
fulllist.append(sample3)
fulllist_percent = []
column_percent = []
len(fulllist)
for i in range(len(fulllist)):
for j in range(len(fulllist[i])):
percent_rank = scipy.stats.percentileofscore(fulllist[i], fulllist[i][j])
column_percent.append(percent_rank)
fulllist_percent.append(column_percent)
column_percent = []
fulllist_rank = []
for i in range(len(fulllist)):
rank = len(fulllist[i]) - scipy.stats.rankdata(fulllist[i]) + 1
fulllist_rank.append(rank)
param_rank = scipy.stats.rankdata(param).astype(int)
column_corr = []
for i in range(len(fulllist)):
correlation = scipy.stats.spearmanr(param_rank[::-1], fulllist_rank[i])
column_corr.append(correlation[0])
fulllist_click = []
for j in range (len(fulllist_percent)):
middle = []
if column_corr[j] > 0:
for i in range(len(fulllist_percent[j])):
solve = column_corr[j] * fulllist_percent[j][i]
middle.append(solve)
else:
for i in range (len(fulllist_percent[j])):
solve = abs(column_corr[j]) * (100 - fulllist_percent[j][i])
middle.append(solve)
fulllist_click.append(middle)
list_of_rowsumm = []
rowsumm = 0
fulllist_clickT = numpy.asarray(fulllist_click).T.tolist()
for i in range(len(fulllist_clickT)):
rowsumm = sum(fulllist_clickT[i])
list_of_rowsumm.append(rowsumm)
percent_rowsumm = []
for i in list_of_rowsumm:
x = scipy.stats.percentileofscore(list_of_rowsumm, i)
percent_rowsumm.append(x)
validity = scipy.stats.pearsonr(list_of_rowsumm, param)
validity = validity[0]
additional_info = []
additional_info.append(list_of_rowsumm)
additional_info.append(percent_rowsumm)
additional_info.append(param)
self.fulllist = fulllist
self.fulllist_percent = fulllist_percent
self.fulllist_click = fulllist_click
self.additional_info = additional_info
self.validity = validity
return self.fulllist, self.fulllist_percent, self.fulllist_click, self.additional_info, self.validity
def saveSample(self):
options = QFileDialog.Options()
options |= QFileDialog.DontUseNativeDialog
fileNameSave, _ = QFileDialog.getSaveFileName(self,"Сохранить данные выборки","./Samples","Excel files (*.xlsx)", options=options)
workbook = xlsxwriter.Workbook(fileNameSave + ' ['+ str(round(self.validity, 3)) + ', ' + str(len(self.fulllist)) + ', ' + str(len(self.fulllist[0])) + ']' + ".xlsx")
worksheet1 = workbook.add_worksheet()
worksheet2 = workbook.add_worksheet()
worksheet3 = workbook.add_worksheet()
worksheet4 = workbook.add_worksheet()
row = 0
for col, data in enumerate(self.fulllist):
worksheet1.write_column(row, col, data)
for col, data in enumerate(self.fulllist_percent):
worksheet2.write_column(row, col, data)
for col, data in enumerate(self.fulllist_click):
worksheet3.write_column(row, col, data)
for col, data in enumerate(self.additional_info):
worksheet4.write_column(row, col, data)
workbook.close()
def processString(self):
check = self.stringInput_field.toPlainText()
check = [float(i) for i in check.replace(',', '.').split()]
index = self.sampleChoose_list.selectedIndexes()[0].row()
sample_path = self.sample_directory[1][index]
sample_param = pd.read_excel(ospath(sample_path), header = None, sheetname = 3)
param = sample_param[2].tolist()
param_rank = scipy.stats.rankdata(param).astype(int)
sample_2 = pd.read_excel(ospath(sample_path), header = None, sheetname = 0)
fulllist_new = []
for i in range(len(sample_2.columns)):
column_new = sample_2[i].tolist()
fulllist_new.append(column_new)
for i in range(len(check)):
fulllist_new[i][0] = check[i]
fulllist_percent_new = []
column_percent_new = []
for i in range(len(fulllist_new)):
for j in range(len(fulllist_new[i])):
percent_rank = scipy.stats.percentileofscore(fulllist_new[i], fulllist_new[i][j])
column_percent_new.append(percent_rank)
fulllist_percent_new.append(column_percent_new)
column_percent_new = []
fulllist_rank_new = []
for i in range(len(fulllist_new)):
rank = len(fulllist_new[i]) - scipy.stats.rankdata(fulllist_new[i]) + 1
fulllist_rank_new.append(rank)
column_corr_new = []
for i in range(len(fulllist_new)):
correlation = scipy.stats.spearmanr(param_rank[::-1], fulllist_rank_new[i])
column_corr_new.append(correlation[0])
fulllist_click_new = []
for j in range (len(fulllist_percent_new)):
middle = []
if column_corr_new[j] > 0:
for i in range(len(fulllist_percent_new[j])):
solve = column_corr_new[j] * fulllist_percent_new[j][i]
middle.append(solve)
else:
for i in range (len(fulllist_percent_new[j])):
solve = abs(column_corr_new[j]) * (100 - fulllist_percent_new[j][i])
middle.append(solve)
fulllist_click_new.append(middle)
check_click = []
for i in range(len(check)):
if column_corr_new[i] > 0:
click = fulllist_percent_new[i][0] * column_corr_new[i]
check_click.append(click)
elif column_corr_new[i] < 0:
click = abs(column_corr_new[i]) * (100 - fulllist_percent_new[i][0])
check_click.append(click)
len(check_click)
list_of_rowsumm_new = []
rowsumm = 0
fulllist_click_newT = numpy.asarray(fulllist_click_new).T.tolist()
for i in range(len(fulllist_click_newT)):
rowsumm = sum(fulllist_click_newT[i])
list_of_rowsumm_new.append(rowsumm)
percent_rowsumm_new = []
for i in list_of_rowsumm_new:
x = scipy.stats.percentileofscore(list_of_rowsumm_new, i)
percent_rowsumm_new.append(x)
validity_new = scipy.stats.pearsonr(list_of_rowsumm_new[1:], param[1:])
validity_new = validity_new[0]
validity_list = []
validity_list.append(validity_new)
additional_info_new = []
additional_info_new.append(list_of_rowsumm_new)
additional_info_new.append(percent_rowsumm_new)
additional_info_new.append(validity_list)
additional_info_new.append(param)
self.fulllist_new = fulllist_new
self.fulllist_percent_new = fulllist_percent_new
self.fulllist_click_new = fulllist_click_new
self.additional_info_new = additional_info_new
self.validity_new = validity_new
return self.fulllist_new, self.fulllist_percent_new, self.fulllist_click_new, self.additional_info_new, self.validity_new
def saveSampleStr(self):
time = strftime("%d:%m:%Y %H-%M", gmtime())
options = QFileDialog.Options()
options |= QFileDialog.DontUseNativeDialog
fileNameSave, _ = QFileDialog.getSaveFileName(self,"Сохранить обработанную строку","./Processed","Excel files (*.xlsx)", options=options)
workbook = xlsxwriter.Workbook(fileNameSave + ' (' + time + ')' + ".xlsx")
worksheet1 = workbook.add_worksheet()
worksheet2 = workbook.add_worksheet()
worksheet3 = workbook.add_worksheet()
worksheet4 = workbook.add_worksheet()
row = 0
for col, data in enumerate(self.fulllist_new):
worksheet1.write_column(row, col, data)
for col, data in enumerate(self.fulllist_percent_new):
worksheet2.write_column(row, col, data)
for col, data in enumerate(self.fulllist_click_new):
worksheet3.write_column(row, col, data)
for col, data in enumerate(self.additional_info_new):
worksheet4.write_column(row, col, data)
workbook.close()
def refreshSample(self):
sample_directory = []
sample_files = []
for (dirpath, dirnames, filenames) in walk('./Samples'):
filenames = [f for f in filenames if not f[0] == '.']
sample_files.extend(filenames)
break
the_dir = "Samples"
paths = [os.path.abspath(os.path.join(the_dir,filename)) for filename in os.listdir(the_dir) if not filename.startswith('.')]
sample_directory.append(sample_files)
sample_directory.append(paths)
self.sample_directory = sample_directory
self.sampleChoose_list.clear()
self.sampleChoose_list.addItems(sample_directory[0])
self.sampleChoose_list.setSortingEnabled(True);
self.sampleChoose_list.sortItems()
return self.sample_directory
def refreshSample_2(self):
sample_directory_2 = []
sample_files_2 = []
for (dirpath, dirnames, filenames) in walk('./Processed'):
filenames = [f for f in filenames if not f[0] == '.']
sample_files_2.extend(filenames)
break
the_dir = "Processed"
paths_2 = [os.path.abspath(os.path.join(the_dir,filename)) for filename in os.listdir(the_dir) if not filename.startswith('.')]
sample_directory_2.append(sample_files_2)
sample_directory_2.append(paths_2)
processed_info = []
for i in range(len(sample_directory_2[0])):
file_info = []
sample_file_2 = sample_directory_2[0][i]
sample_path_2 = sample_directory_2[1][i]
sample_info_2 = pd.read_excel(ospath(sample_path_2), header = None, sheetname = 3)
sample_info_2 = sample_info_2.iloc[0][0:3]
file_info.append(sample_file_2)
sample_info_2_list = numpy.array(sample_info_2).tolist()
file_info.extend(sample_info_2_list)
processed_info.append(file_info)
self.clickSample_list.setRowCount(len(processed_info))
self.clickSample_list.setColumnCount(4)
labels = ['Имя', 'Массовые отклики', 'Процентранг, %', 'Валидность']
self.clickSample_list.setHorizontalHeaderLabels(labels)
red = QtGui.QColor(255, 5, 5);
orange = QtGui.QColor(255, 157, 0);
blue = QtGui.QColor(0, 46, 255);
for row in range(len(processed_info)):
for column in range(len(processed_info[row])):
self.clickSample_list.setItem(row, column, QTableWidgetItem(str(processed_info[row][column])))
if column == 2:
if processed_info[row][column] > 85:
color = red;
if processed_info[row][column] > 65 and processed_info[row][column] < 85:
color = orange;
if processed_info[row][column] < 65:
color = blue;
self.clickSample_list.item(row, column).setBackground(color);
self.clickSample_list.horizontalHeader().setSectionResizeMode(QHeaderView.Stretch)
if __name__ == '__main__':
app = QApplication(sys.argv)
win = MyMainWindow()
sys.exit(app.exec_())
my app is launched in a window and UI is loaded by from ui_main import Ui_MainWindow. My question is, how to make a desktop app? Do I need to do it in special Qt5 creator files? Or is possible to make it out of .ipynb-file from Anaconda? I am trying to make both MacOs and Windows versions, but I am pretty new to app-building and programming and do not know, how to begin.
You can use cx_Freeze to create a desktop app from a python program.
There's a guide to packaging a PyQt application:
cxfreeze-quickstart # generates a creation script
On OSX, you have the option of building a .dmg or a .app, by executing one of these at the prompt:
python setup.py bdist_dmg
python setup.py bdist_mac
On Windows:
python setup.py bdist_msi
There's a comparison of deployment tools here.
I'm trying to generate a single executable file with py2exe, without dependencies. With a standard setup.py file and all dependencies the exe works but when I try to modify setup.py to generate a single exe, the .exe file is generated but when I click it doesn't work.
This is the code of my python script:
# ======== Select a directory:
import Tkinter, tkFileDialog
root = Tkinter.Tk()
dirname = tkFileDialog.askdirectory(parent=root,initialdir="/",title='Please select a directory')
if len(dirname ) > 0:
print "You chose %s" % dirname
# ======== Select a file for opening:
import Tkinter,tkFileDialog
import sys
import ntpath
import os
root = Tkinter.Tk()
file = tkFileDialog.askopenfile(parent=root,mode='rb',title='Choose a file')
if file != None:
reader = file
print type(str(reader.name))
output_name= os.path.splitext(os.path.basename(str(reader.name)))[0]
writer = open(output_name+'_Elaborata.txt','w')
count = 1
countbis = 0
index = 0
c = 0
listaindex = []
firstline = 1
flag = False
a = ''.join([chr(n) for n in range(256)])
b = ''.join([n for n in a if ord(n) >= 32 and ord(n) <= 126])
c = ''.join([n for n in a if ord(n) < 32 or ord(n) > 126])
#print a
arraychar = []
#for n in range(256):
#if(n >= 32 and n <= 126):
# print n, chr(n)
#else:
#arraychar.append(chr(n))
result = "".join([(" " if n in c else n) for n in a if n not in c])
#print a
#print result
#print arraychar
listaelem=[]
with reader as f:
for _ in xrange(3):
next(f)
for line in f:
if('-------' in line):
continue
if(line.strip() != ''):
lista = line.split("|")
if(len(lista) != 10):
pass
#print len(lista), line.strip()
lista2 = []
for elem in lista:
#Pulitura singola linea da caratteri speciali
result = "".join([("" if n in c else n) for n in elem.strip() if n not in c])
lista2.append(result.strip())
if len(lista2) != 10:
#print line.strip()
pass
string = ""
for elem in lista2:
string += elem + "|"
writer.write(string[:-1]+"\n")
reader.close()
writer.close()
Which setup.py could be correct to generate only one .exe file?
this is the setup.py:
from distutils.core import setup
import py2exe, sys, os
sys.argv.append('py2exe')
setup(
options = {'py2exe': {'bundle_files': 1}},
windows = [{'script': "single.py"}],
zipfile = None,
)
I am using Python to download stock data from Yahoo. The download code is placed in csv_util.py.
The invoking script is mapper.py. After importing csv_util script, I get the following error:
Traceback (most recent call last):
('Lines:', [])
File "C:/Users/laurel.ts/Desktop/predictstock/mapper.py", line 56, in <module>
download_file_ticker(ticker,ref_ticker)
File "C:/Users/laurel.ts/Desktop/predictstock/mapper.py", line 53, in download_file_ticker
csv_util.download_csv_file_web(ticker,ref_ticker)
TypeError: unbound method download_csv_file_web() must be called with csv_util instance as first argument (got str instance instead)
Here is the code:
mapper.py
###### Mapper.py ######
import time
import sys
# New imports
import stock_predict_main_app
import predict_stock.csv_util
from predict_stock.csv_util import csv_util
predict_date = '03112016'
ticker = 'ARVIND.NS'
ref_ticker = 'MSFT'
input_default_values = {'numdays2predict': 2,
'simulations': 10,
'historicaldatalen': 0,
'tickersymbol': 'ARVIND.NS',
# 'tickersymbol': 'MSFT',
'stockdate2predict': predict_date,
'downloadstock': 1,
'plotshow': 0,
'industrytype': 'itindustry'}
# Pass ticker symbol and date
def pass_ticker_date(predict_date, input_default_values):
with open('tickerList.txt') as f:
lines = f.read().splitlines()
print(type(input_default_values))
tickersymbol = input_default_values["tickersymbol"]
print("Lines:", lines)
for tickersymbol in lines:
print("tickersymbol:", tickersymbol)
stock_predict_main_app.test_predict(tickersymbol)
# Download the file based on the ticker symbol
def download_file_ticker(ticker,ref_ticker):
# ticker= input_default_values["tickersymbol"]
# ref_ticker = input_default_values["tickersymbol"]
csv_util.download_csv_file_web(ticker,ref_ticker)
pass_ticker_date(predict_date, input_default_values)
download_file_ticker(ticker,ref_ticker)
csv_util.py
import logging
import csv
import urllib
import datetime
import numpy as np
import pandas as pd
import sys
import os
from collections import defaultdict
###custom local modules###
from datetime_util import datetime_util
from loggingpy import loggingpy
global stock_input_file,stock_input_ref_file
global loggingpy_obj,datetime_util_obj
global actual_stock_price, output_stock_price
class csv_util:
def __init__(self,actual_stock_price,output_stock_price,download_flag):
print("Class anme: __init__",self.__class__.__name__)
self.stock_input_file=""
self.stock_input_ref_file = ""
self.actual_stock_price = actual_stock_price
self.output_stock_price = output_stock_price
self.download_flag=download_flag
self.datetime_util_obj = datetime_util()
self.loggingpy_obj = loggingpy()
#datetime_util_obj = self.datetime_util_obj
#loggingpy_obj=self.loggingpy_obj
'''
METHOD: prepare_actual_data2writecsv
'''
def prepare_actual_data2writecsv(self, predict_date_wise_data_wd, predict_datewise_data_wod):
logging.info("<<prepare_actual_data2writecsv")
temp_date_ary = []
temp_date_ary = self.actual_stock_price['date']
temp_closeprice_ary = self.actual_stock_price['closeprice']
temp_dailyreturn_ary = self.actual_stock_price['dailyreturn']
# predicted_date_array = sorted(temp_predicted_values_wd.keys(),reverse=True)
# remove last element of array or appenda dummy 0 to daily returns
temp_date_ary.pop()
temp_closeprice_ary.pop()
# temp_dailyreturn_ary.append(0)
self.loggingpy_obj.log_func({'temp_date_ary': temp_date_ary, 'temp_closeprice_ary': temp_closeprice_ary,
'temp_dailyreturn_ary': temp_dailyreturn_ary})
np_column_ary = np.column_stack((temp_date_ary, temp_closeprice_ary, temp_dailyreturn_ary))
num_rows, num_columns = np_column_ary.shape
logging.info("np_column_ary:%s,Rowsdata %s,ColumnData %s", np_column_ary.size, np_column_ary[:][0],
np_column_ary[:, 0])
logging.info("NumRows:%d,Num Columns:%s", num_rows, num_columns)
counter = 0
for i in range(0, num_rows):
counter += 1
temp_temp_row_data = []
temp_row_data = np_column_ary[:][i]
temp_temp_row_data = list(temp_row_data)
temp_rowdate = temp_row_data[0]
logging.debug("[%d],Length:[%d],type:[%s],Date:%s,Rowsdata:%s", i, len(temp_row_data), type(temp_row_data),
temp_rowdate, temp_row_data)
predict_date_wise_data_wd[temp_rowdate] = (map(str, temp_temp_row_data))
predict_datewise_data_wod[temp_rowdate] = (map(str, temp_temp_row_data))
logging.info(">>prepare_actual_data2writecsv")
'''
METHOD: prepare_data2writecsv
'''
def prep_predicted_data2writecsv(self, predict_date_wise_data_wd, predict_datewise_data_wod):
logging.info("<<prep_predicted_data2writecsv")
temp_predicted_values_wd = self.actual_stock_price['predicted_vals_with_drift']
temp_predicted_values_wod = self.actual_stock_price['predicted_vals_without_drift']
self.actual_stock_price['meanwithdrift'] = []
self.actual_stock_price['meanwithoutdrift'] = []
temp_var = temp_predicted_values_wd.keys()
predicted_date_array = self.datetime_util_obj.sort_datettime_list(temp_var, False)
for eack_key in predicted_date_array:
logging.debug("WD:eack key:%s", eack_key)
temp_string_val_wd = []
temp_string_val_wod = []
temp_string_val_wd = temp_predicted_values_wd[eack_key]
temp_string_val_wod = temp_predicted_values_wod[eack_key]
mean_wd = np.mean(temp_string_val_wd)
mean_wod = np.mean(temp_string_val_wod)
# Store mean in global variable
self.actual_stock_price['meanwithdrift'].append(mean_wd)
self.actual_stock_price['meanwithoutdrift'].append(mean_wod)
logging.debug("meanwithdrift:%s,meanwithoutdrift:%s", mean_wd, mean_wod)
logging.debug("temp_string_val_wd:len:%d,type:%s", len(temp_string_val_wd), type(temp_string_val_wd))
logging.debug("temp_string_val_wd:len:%d,type:%s", len(temp_string_val_wod), type(temp_string_val_wod))
temp_string_wd = []
temp_string_wod = []
if not predict_datewise_data_wod.has_key(eack_key):
predict_datewise_data_wod[eack_key] = []
predict_date_wise_data_wd[eack_key] = []
temp_string_wd = [eack_key, "", ""]
temp_string_wod = [eack_key, "", ""]
temp_string_wd.append(mean_wd)
temp_string_wod.append(mean_wod)
temp_string_wd.extend(temp_string_val_wd)
temp_string_wod.extend(temp_string_val_wod)
logging.debug("temp_string_wd:len:%d,type:%s,Data:%s", len(temp_string_wd), type(temp_string_wd),
temp_string_wd)
logging.debug("temp_string_wod:len:%d,type:%s,Data:%s", len(temp_string_wod), type(temp_string_wod),
temp_string_wod)
predict_date_wise_data_wd[eack_key].extend(temp_string_wd)
predict_datewise_data_wod[eack_key].extend(temp_string_wod)
#self.loggingpy_obj.log_func({"temp_string_wd": temp_string_wd, "temp_string_wod": temp_string_wod})
logging.info(">>prepare_data2writecsv")
'''
METHOD: write2csvfile
Writes given data to the given csv absolute path filename
Input arguments: filename to be written, data to be written
'''
def write2csvfile(self,file_name,local_rows_data):
logging.info("<<:write2csvfile")
#output_data_path=self.loggingpy_obj.output_data_path
#os.chdir(output_data_path)
with open(file_name, 'w') as csv_fw:
out_csv_writer = csv.writer(csv_fw, lineterminator='\n')
out_csv_writer.writerows(local_rows_data)
logging.info("csv file[%s]writing :Sucess",file_name)
logging.info(">>:write2csvfile")
'''
# Prepare header list of columns to write to csv file
# Write predicted values to csv file
predicted_data_wod_date_val_ary
'''
##classmethod
#def write2csv_file(ticker_symbol):
def write2csv_file(self,ticker_symbol):
logging.info("<<:write2csv_file")
datetime_stamp=datetime.datetime.now().strftime("%Y%m%d_%H%M")
file_name="output_prediction_with_drift"+ticker_symbol+"_"+datetime_stamp+".csv"
file_name_wod = "output_prediction_without_drift" + ticker_symbol + "_" + datetime_stamp + ".csv"
file_name=self.loggingpy_obj.output_data_path + file_name
file_name_wod = self.loggingpy_obj.output_data_path + file_name_wod
column_headers=self.output_stock_price['column_headers']
#Prepare header list of columns to write to csv file;column_headers is a global variable
column_headers.insert(0,'Date')
column_headers.insert(1, 'Actual Prices')
column_headers.insert(2, 'Daily Return')
column_headers.insert(3, 'Mean')
logging.info("column_headers,len:%s,type:%s,data:%s", len(column_headers), type(column_headers), column_headers)
logging.info("self:column_headers,len:%s", len(self.output_stock_price['column_headers']))
predict_date_wise_data_wd = {}
predict_datewise_data_wod = {}
self.prepare_actual_data2writecsv(predict_date_wise_data_wd, predict_datewise_data_wod)
self.loggingpy_obj.log_func(
{"Before:predict data_wd": predict_date_wise_data_wd, "predict data_wod": predict_datewise_data_wod})
self.prep_predicted_data2writecsv(predict_date_wise_data_wd,predict_datewise_data_wod)
self.loggingpy_obj.log_func({"After:pred data_wd": predict_date_wise_data_wd, "pred data_wod": predict_datewise_data_wod})
temp_new_data_ary=predict_date_wise_data_wd.keys()
sorted_temp_new_data_ary = self.datetime_util_obj.sort_datettime_list(temp_new_data_ary,True)
self.loggingpy_obj.log_func({"sorted_temp_new_data_ary":sorted_temp_new_data_ary})
data2write2csv_wd = [column_headers]
data2write2csv_wod = [column_headers]
counter=1
# add headers
for each_key in sorted_temp_new_data_ary:
counter+=1
data2write2csv_wd.insert(counter, predict_date_wise_data_wd[each_key])
data2write2csv_wod.insert(counter,predict_datewise_data_wod[each_key])
self.write2csvfile(file_name, data2write2csv_wd)
self.write2csvfile(file_name_wod, data2write2csv_wod)
logging.debug("data2write2csv_wd:%s", repr(data2write2csv_wd))
logging.info("<<:write2csv_file")
#sys.exit()
# ######################### END OF METHOD write2csv_file ################################
'''
METHOD: read_csv_file
'''
##classmethod
def read_csv_file(self,file_name,ref_data_flag):
logging.debug("<<:read_csv_file")
logging.info("file_name,%s,",file_name)
if not os.path.exists(file_name):
logging.critical("File not found:Check!"+file_name)
sys.exit(2)
#actual_stock_price=self.actual_stock_price
logging.info("file_name,%s,", file_name)
data = pd.read_csv(file_name,parse_dates=False, infer_datetime_format=False,date_parser=None)
logging.info("self:::::::::::::%s",repr(self.datetime_util_obj));
logging.info("data columns,len:%d,type:%s,data:%s,",len(data.columns),type(data.columns),data.columns)
logging.info(",data.columns:%s",data.columns.values)
datetime_list_dmy = self.datetime_util_obj.convert2datettime_list(data['Date'].tolist())
#actual_stock_price=defaultdict();
actual_stock_price = self.actual_stock_price
if ref_data_flag == 1:
actual_stock_price['ref_data']={}
actual_stock_price['ref_data']['date'] = datetime_list_dmy
actual_stock_price['ref_data']['closeprice'] = data['Close'].tolist()
else:
actual_stock_price['date'] = datetime_list_dmy
actual_stock_price['closeprice'] = data['Close'].tolist()
self.loggingpy_obj.log_func({"datetime_list_dmy": datetime_list_dmy})
#logging.debug("repr self asp:%s",repr(self.actual_stock_price))
del data
logging.debug(">>:read_csv_file")
'''
METHOD: download_csv_file_web
Download stock data from web yahoofinance
'''
def download_csv_file_web(self,ticker_symbol,ref_ticker_symbol):
logging.debug("<<:download_csv_file_web")
input_data_path=self.loggingpy_obj.input_data_path
logging.info("input_data_path:%s:",input_data_path)
yahoo_url="http://real-chart.finance.yahoo.com/table.csv?s="
base_url=yahoo_url + ticker_symbol
base_url_ref = yahoo_url + ref_ticker_symbol
#datetime_stamp = datetime.datetime.now().strftime("%Y%m%d_%H%M")
datetime_stamp = datetime.datetime.now().strftime("%Y%m%d")
global stock_input_file,stock_input_ref_file
#print("File found1:", stock_input_file)
if self.download_flag == 1:
file_name = "stock_input_" + ticker_symbol + "_" + datetime_stamp + ".csv"
ref_file_name = "stock_ref_input_" + ref_ticker_symbol + "_" + datetime_stamp + ".csv"
stock_input_file = input_data_path + "\\" + file_name
stock_input_ref_file = input_data_path + "\\" + ref_file_name
self.download_file(stock_input_file,base_url)
self.download_file(stock_input_ref_file, base_url_ref)
else:
file_name = "stock_input_" + ticker_symbol + ".csv"
ref_file_name = "stock_ref_input_" + ref_ticker_symbol + ".csv"
stock_input_file = input_data_path + "\\" + file_name
stock_input_ref_file = input_data_path + "\\" + ref_file_name
if os.path.isfile(stock_input_file) and os.path.isfile(stock_input_ref_file):
logging.info("File found 3 :[%s],[%s]",stock_input_file,stock_input_ref_file)
print("File found3:",stock_input_file,stock_input_ref_file)
else:
print("File not found4:", stock_input_file,stock_input_ref_file)
logging.critical("File not found4![%s] or [%s]",stock_input_file,stock_input_ref_file)
sys.exit(2)
print("File found5:", stock_input_file,stock_input_ref_file)
logging.info("stock_input_file 5 :%s,base_url:%s,ref file name:[%s]", stock_input_file, base_url,stock_input_ref_file)
self.stock_input_file=stock_input_file
self.stock_input_ref_file=stock_input_ref_file
#sys.exit(2)
logging.debug(">>:download_csv_file_web")
'''
METHOD: download_file
Downlaod stock data from web yahoofinance
'''
def download_file(self,file_name,base_url):
logging.debug("<<:download_file")
try:
logging.info("Try Reading [:%s]",base_url)
status = urllib.urlretrieve(base_url, file_name)
logging.info("Status:%s", status)
urllib.urlcleanup()
if os.path.exists(file_name):
logging.info ("File exists, File download success!File"+file_name)
else:
logging.critical("File downloaded DOE NOT EXIST, exit.."+file_name)
sys.exit(2)
except urllib.ContentTooShortError as ctse:
print("File download: Failed, found some error")
logging.critical("File donwlaod failed from url:%s",base_url)
sys.exit(2)
#logfile_handle.write(ctse.content)
logging.debug(">>:download_file")
def download_read_csv_file(self,ticker_symbol, industry_type_ts):
logging.debug("<<:download_read_csv_file")
ref_data_flag=0
self.download_csv_file_web(ticker_symbol, industry_type_ts)
stock_input_file = self.stock_input_file
stock_input_ref_file = self.stock_input_ref_file
# download_csv_file_web("GLD")
ref_data_flag = 0
self.read_csv_file(stock_input_file, 0)
self.read_csv_file(stock_input_ref_file, 1)
#self.loggingpy_obj.log_func({"csv_util:actual_stock_price":self.actual_stock_price})
logging.debug(">>:download_read_csv_file")
loggingpy.py
import logging
import datetime
import os
import sys
global current_dir_path, input_data_path, output_data_path
class loggingpy:
def __init__(self):
loglevel=logging.INFO
self.loglevel=loglevel
self.log_config()
print("loggingpy - log __init__")
def log_config(self):
global current_dir_path, input_data_path, output_data_path
current_dir_path = os.getcwd()
input_data_path = current_dir_path + "\..\stock_data"
output_data_path = current_dir_path + "\..\stock_data"
if not os.path.exists(output_data_path):
os.mkdir(output_data_path)
print("current_dir_path:" + current_dir_path + ",input_data_path:" + input_data_path,
",output_data_path:" + output_data_path)
main_date_time_stamp = datetime.datetime.now().strftime("%Y%m%d_%H%M")
logfile = output_data_path + "\log_" + main_date_time_stamp + ".log"
#logging.basicConfig(stream=sys.stdout,level=logging.DEBUG, format='%(levelname)s:%(message)s')
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
#logging.basicConfig(stream=sys.stdout, level=self.loglevel)
#logging.basicConfig(filename=logfile, level=logging.INFO, format='%(levelname)s:%(message)s')
#console=logging.StreamHandler().setLevel(logging.DEBUG)
#logging.getLogger('abcd').addHandler(console)
self.input_data_path = input_data_path
self.output_data_path = output_data_path
self.current_dir_path = current_dir_path
logging.info("Logging test %s","HELLO TEST")
logging.info("current_dir_path:%s,input_data_path:%s,output_data_path:%s", current_dir_path,input_data_path,output_data_path)
def log_fn(self,temp_list):
for i in range(0, len(temp_list)):
log_level_set=logging.getLogger().getEffectiveLevel()
#print("log_level_set",log_level_set,logging.INFO,logging.DEBUG)
if log_level_set==logging.INFO:
logging.info(":len:%d,Type:%s", len(temp_list[i]), type(temp_list[i]))
if log_level_set == logging.DEBUG:
logging.debug("len:%d,Type:%sData:%s", len(temp_list[i]), type(temp_list[i]),temp_list[i])
def log_info(self,msg):
logging.info(msg)
def log_func(self,templog):
log_level_set = logging.getLogger().getEffectiveLevel()
#log_string_info = "Name:%s,len:%d,type:%s"
#log_string_debug = "Name:%s,len:%d,type:%s,Data:%s"
log_string_info = "Name:{0},len:{1},type:{2}"
log_string_debug = "Name:{0},len:{1},type:{2},Data:%{3}"
for var_name,var_value in templog.iteritems():
if log_level_set == logging.INFO:
#logging.info(""+log_string_info,var_name, len(var_value),type(var_value))
logging.info(log_string_info.format(var_name, len(var_value), type(var_value)))
if log_level_set == logging.DEBUG:
#logging.debug(""+log_string_debug,var_name, len(var_value), type(var_name),var_value)
logging.debug(log_string_debug.format(var_name, len(var_value), type(var_name), var_value))
What could be the reason for the error ?
I'm trying to run the following script, which reportedly works for some people:
"""Preprocessing script.
This script walks over the directories and dump the frames into a csv file
"""
import os
import csv
import sys
import random
import scipy
from scipy import misc as m
import numpy as np
import dicom
from skimage import io, transform
from joblib import Parallel, delayed
import dill
os.chdir("P:\\hackr\\dsb")
def mkdir(fname):
try:
os.mkdir(fname)
except:
pass
def get_frames(root_path):
"""Get path to all the frame in view SAX and contain complete frames"""
ret = []
for root, _, files in os.walk(root_path):
if len(files) == 0 or not files[0].endswith(".dcm") or root.find("sax") == -1:
continue
prefix = files[0].rsplit('-', 1)[0]
fileset = set(files)
expected = ["%s-%04d.dcm" % (prefix, i + 1) for i in range(30)]
if all(x in fileset for x in expected):
ret.append([root + "/" + x for x in expected])
# sort for reproduciblity
return sorted(ret, key = lambda x: x[0])
def get_label_map(fname):
labelmap = {}
fi = open(fname)
fi.readline()
for line in fi:
arr = line.split(',')
labelmap[int(arr[0])] = line
return labelmap
def write_label_csv(fname, frames, label_map):
fo = open(fname, "w")
for lst in frames:
index = int(lst[0].split("\\")[1]) #changed from split("/")[3]
if label_map != None:
fo.write(label_map[index])
else:
fo.write("%d,0,0\n" % index)
fo.close()
def get_data(lst,preproc):
data = []
result = []
for path in lst:
f = dicom.read_file(path)
img = preproc(f.pixel_array.astype(float) / np.max(f.pixel_array))
dst_path = path.rsplit(".", 1)[0] + ".64x64.jpg"
m.imsave(dst_path, img)
result.append(dst_path)
data.append(img)
data = np.array(data, dtype=np.uint8)
data = data.reshape(data.size)
data = np.array(data,dtype=np.str_)
data = data.reshape(data.size)
return [data,result]
def write_data_csv(fname, frames, preproc):
"""Write data to csv file"""
#if __name__ == 'builtins': #'__main__':
fdata = open(fname, "w")
dr = Parallel()(delayed(get_data)(lst,preproc) for lst in frames)
data,result = zip(*dr)
for entry in data:
fdata.write(','.join(entry)+'\r\n')
print("All finished, %d slices in total" % len(data))
fdata.close()
result = np.ravel(result)
return result
def crop_resize(img, size):
"""crop center and resize"""
if img.shape[0] < img.shape[1]:
img = img.T
# we crop image from center
short_egde = min(img.shape[:2])
yy = int((img.shape[0] - short_egde) / 2)
xx = int((img.shape[1] - short_egde) / 2)
crop_img = img[yy : yy + short_egde, xx : xx + short_egde]
# resize to 64, 64
resized_img = transform.resize(crop_img, (size, size))
resized_img *= 255
return resized_img.astype("uint8")
def local_split(train_index):
random.seed(0)
train_index = set(train_index)
all_index = sorted(train_index)
num_test = int(len(all_index) / 3)
random.shuffle(all_index)
train_set = set(all_index[num_test:])
test_set = set(all_index[:num_test])
return train_set, test_set
def split_csv(src_csv, split_to_train, train_csv, test_csv):
ftrain = open(train_csv, "w")
ftest = open(test_csv, "w")
cnt = 0
for l in open(src_csv):
if split_to_train[cnt]:
ftrain.write(l)
else:
ftest.write(l)
cnt = cnt + 1
ftrain.close()
ftest.close()
# Load the list of all the training frames, and shuffle them
# Shuffle the training frames
random.seed(10)
train_frames = get_frames("./train_tiny")
random.shuffle(train_frames)
validate_frames = get_frames("./validate_tiny")
# Write the corresponding label information of each frame into file.
write_label_csv("train-label.csv", train_frames, get_label_map("./train.csv"))
write_label_csv("validate-label.csv", validate_frames, None)
# Dump the data of each frame into a CSV file, apply crop to 64 preprocessor
train_lst = write_data_csv("train-64x64-data.csv", train_frames, lambda x: crop_resize(x, 64))
valid_lst = write_data_csv("./validate-64x64-data.csv", validate_frames, lambda x: crop_resize(x, 64))
# Generate local train/test split, which you could use to tune your model locally.
train_index = np.loadtxt("./train-label.csv", delimiter=",")[:,0].astype("int")
train_set, test_set = local_split(train_index)
split_to_train = [x in train_set for x in train_index]
split_csv("./train-label.csv", split_to_train, "./local_train-label.csv", "./local_test-label.csv")
split_csv("./train-64x64-data.csv", split_to_train, "./local_train-64x64-data.csv", "./local_test-64x64-data.csv")
Throws an error. When I run the code interactively I find that it's the write_data_csv invocation that breaks the script.
For instance, the script runs without error before using write_data_csv but the line valid_lst = write_data_csv("./validate-64x64-data.csv", validate_frames, lambda x: crop_resize(x, 64)) throws this error:
Traceback (most recent call last):
File "<input>", line 1, in <module>
File "<input>", line 5, in write_data_csv
File "E:\Users\hackr\Python\python35\lib\site-packages\joblib\parallel.py", line 804, in __call__
while self.dispatch_one_batch(iterator):
File "E:\Users\hackr\Python\python35\lib\site-packages\joblib\parallel.py", line 657, in dispatch_one_batch
tasks = BatchedCalls(itertools.islice(iterator, batch_size))
File "E:\Users\hackr\Python\python35\lib\site-packages\joblib\parallel.py", line 68, in __init__
self.items = list(iterator_slice)
File "<input>", line 5, in <genexpr>
File "E:\Users\hackr\Python\python35\lib\site-packages\joblib\parallel.py", line 161, in delayed
pickle.dumps(function)
_pickle.PicklingError: Can't pickle <function get_data at 0x0000000007B432F0>: attribute lookup get_data on __main__ failed
I'm not sure what my next move is to resolve this problem. I've found other StackOverflow questions with attribute lookup failures, but they were either substantively different or unanswered.