C:\Python27\python.exe
C:/Users/Adekunle/PycharmProjects/d4jtest/newDp.py Traceback (most
recent call last): File
"C:/Users/Adekunle/PycharmProjects/d4jtest/newDp.py", line 166, in
<module>
main_run("C:\defect4j\TinyGP") File "C:/Users/Adekunle/PycharmProjects/d4jtest/newDp.py", line 151, in
main_run
Fitness, RawScore, Formula = main() File "C:/Users/Adekunle/PycharmProjects/d4jtest/newDp.py", line 130, in
main
halloffame=hof, verbose=True) File "C:\Python27\lib\site-packages\deap\algorithms.py", line 150, in
eaSimple
fitnesses = toolbox.map(toolbox.evaluate, invalid_ind) File "C:/Users/Adekunle/PycharmProjects/d4jtest/newDp.py", line 82, in
eval_func
spectrum = FaultVersionsDict[str(numberOfversion[version])] KeyError: '244'
Process finished with exit code 1
How can I solve this error? This is the full code:
import os
import sys
import operator
import numpy as np
import pandas as pd
import time
import pickle
import warnings
import random
import itertools
import random
from deap import algorithms
from deap import base
from deap import creator
from deap import tools
import numpy
from deap import gp
import glob
warnings.filterwarnings('ignore')
def datafile():
files = []
for filepath in glob.iglob(r'C:\defect4j\All single fault\*.txt'):
files.append(filepath)
return files
def readcsv():
for csvfiles in datafile():
nFaultVersion = len(datafile())
vFaultLocation = np.zeros(nFaultVersion)
vStatementCount = np.zeros(nFaultVersion)
i = 0
FaultVersionsDict = {}
nFirstFault = (pd.read_csv(csvfiles, sep=',', nrows=0)).columns[0]
df = pd.read_csv(csvfiles, skiprows=1, header=None).values
dset = list(list(float(elem) for elem in row) for row in df)
vFaultLocation[i] = nFirstFault
vStatementCount[i] = df.shape[0]
FaultVersionsDict[str(i)] = df
i = i + 1
return [vFaultLocation, vStatementCount, FaultVersionsDict]
def safeDiv(left, right):
try: return left / right
except ZeroDivisionError: return 0
pset = gp.PrimitiveSet("MAIN", 4)
pset.addPrimitive(numpy.add, 2, name="gp_add")
pset.addPrimitive(numpy.subtract, 2, name="gp-vsub")
pset.addPrimitive(numpy.multiply, 2, name="gp_mul")
pset.addPrimitive(numpy.sqrt, 1, name="gp_sqrt")
pset.addPrimitive(safeDiv, 2, name="gp_div")
pset.addPrimitive(numpy.negative, 1, name="gp_neg")
pset.addPrimitive(numpy.cos, 1, name="gp_cos")
pset.addPrimitive(numpy.sin, 1, name="gp_sin")
#pset.addEphemeralConstant("rand101", lambda: random.randint(-1, 1))
pset.addEphemeralConstant("rand",lambda: random.random() * 100)
pset.renameArguments(ARG0='EP', ARG1="EF", ARG2="NP", ARG3="NF")
creator.create("FitnessMin", base.Fitness, weights=(1.0,))
creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMin)
toolbox = base.Toolbox()
toolbox.register("expr", gp.genHalfAndHalf, pset=pset, min_=1, max_=2)
toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.expr)
toolbox.register("population", tools.initRepeat, gp.PrimitiveTree, toolbox.individual)
toolbox.register("compile", gp.compile, pset=pset)
def eval_func(individual):
#F = gp.compileADF(expr=individual, psets=pset)
F = toolbox.compile(expr=individual)
#F = gp.compile(expr=individual, pset=pset)
fit = []
for version in range(len(numberOfversion)):
#for version in numberOfversion:
spectrum = FaultVersionsDict[str(numberOfversion[version])]
EP = spectrum[:, 0]
EF = spectrum[:, 1]
NP = spectrum[:, 2]
NF = spectrum[:, 3]
#print(spectrum)
susp_v = eval(F)
sortedSusp_v = -np.sort(-susp_v)
faultLocation = int(vFaultLocation[numberOfversion[version]])
susForFault = susp_v[faultLocation]
tieCount = np.where(sortedSusp_v == susForFault)
# firstTie = tieCount[0].min() + 1 #zero-based
LastTie = tieCount[0].max() + 1 # the last index of a tie of faulty statement
faultPosinRank = LastTie
currentFit = 100 - (faultPosinRank / vStatementCount[numberOfversion[version]]) * 100
fit.append(currentFit)
# sortedIndexList = list(np.argsort(susp_v)[::-1])
# faultPosinRank = sortedIndexList.index(vFaultLocation[numberOfversion[version]])
# fit.append(100 - (faultPosinRank / vStatementCount[numberOfversion[version]]) * 100)
avgFiteness = np.mean(fit)
return avgFiteness
toolbox.register("evaluate", eval_func)
toolbox.register("select", tools.selAutomaticEpsilonLexicase)
toolbox.register("mate", gp.cxOnePoint)
toolbox.register("expr_mut", gp.genFull, min_=0, max_=2)
toolbox.register("mutate", gp.mutUniform, expr=toolbox.expr_mut, pset=pset)
toolbox.decorate("mate", gp.staticLimit(key=operator.attrgetter("height"), max_value=17))
toolbox.decorate("mutate", gp.staticLimit(key=operator.attrgetter("height"), max_value=17))
def main():
random.seed(318)
pop = toolbox.population(n=300)
hof = tools.HallOfFame(1)
stats_fit = tools.Statistics(lambda ind: ind.fitness.values)
stats_size = tools.Statistics(len)
mstats = tools.MultiStatistics(fitness=stats_fit, size=stats_size)
mstats.register("avg", numpy.mean)
mstats.register("std", numpy.std)
mstats.register("min", numpy.min)
mstats.register("max", numpy.max)
pop, log = algorithms.eaSimple(pop, toolbox, 0.5, 0.1, 40, stats=mstats,
halloffame=hof, verbose=True)
# print log
return pop, log, hof
def main_run(outputFolder):
if os.path.exists(outputFolder) is False:
os.mkdir(outputFolder)
outputFile_i = os.path.join(outputFolder, "formula.csv")
outputFile_v = os.path.join(outputFolder, "VersionSamples.csv")
outputFile_f = os.path.join(outputFolder, "Fiteness.csv")
file_v = open(outputFile_v, "w")
file_f = open(outputFile_f, "w")
with open(outputFile_i, "w") as file:
global numberOfversion
for i in range(30):
numberOfversion = random.sample(range(0, 255), 120)
#numberOfversion = random.sample(range(0, 92), 20) # randomly choose 20 samples from 92 faulty versions
file_v.write(str(numberOfversion) + "\n")
file_v.flush()
Fitness, RawScore, Formula = main()
file.write(Formula)
file_f.write(str(Fitness) + "," + str(RawScore) + "\n")
file_f.flush()
print(i)
file.flush()
file.close()
file_v.close()
if __name__ == '__main__':
global numberOfversion
#print(datafile())
vFaultLocation, vStatementCount, FaultVersionsDict = readcsv()
#print(readcsv())
main_run("C:\defect4j\TinyGP")
main()
#print(eval_func(individual=readcsv()))
Related
.Hi all, I have 70k images saved into .h5 file and now with this script I want to read from that file and annotate text instances into .json file. When I run this script it takes very long time to annotate 1 image (cca 2h).
When I do this with 15 images then the script works fine and annotate all 15 images about a few seconds.
Now with 70k images -> .h5 file is 51gb.
I don't know is problem in code or the h5 file is too big? Because code works fine with small amount of images, but I'm working on some project where I need 70k or 700k images.
from __future__ import division
import os
import os.path as osp
from re import U
import numpy as np
import matplotlib.pyplot as plt
import h5py
from common import *
import json
import cv2
import numpy as np
from itertools import cycle
import js2py
#from gen import brojac
#from synthgen import imnames
global x
global y
def write_json(data, filename='annotation.json'):
with open(filename,'w') as file:
json.dump(data,file,indent=4)
DATA_PATH = 'results'
DB_FNAME = osp.join(DATA_PATH,'SynthText.h5')
def get_data():
return h5py.File(DB_FNAME,'r')
def viz_textbb(text_im, imageName, charBB_list, wordBB, textToList, alpha=1.0):
"""
text_im : image containing text
charBB_list : list of 2x4xn_i bounding-box matrices
wordBB : 2x4xm matrix of word coordinates
"""
#print("k",z, type(z))
plt.close(1)
plt.figure(1)
plt.imshow(text_im)
H,W = text_im.shape[:2]
global imnames
#print("MOLIIIM",wordBB)
#DODANO IZ MAIN-a
#**********************************************
db = h5py.File('results/SynthText.h5', 'r')
dsets = sorted(db['data'].keys())
for k in dsets:
db = get_data()
imnames = sorted(db['data'].keys())
start = 0
count = 0
coordinate = []
coordinate1 = []
name = []
name1 = []
final = []
upperList = []
downList = []
counter = 0
FinalFinal = []
imageData = { }
dictList = []
for eachWord in textToList:
length = len(eachWord)
for i in range(0,4):
for j in range(start,length+start):
coordinate.append([charBB_list[0][0][i][j], charBB_list[0][1][i][j]])
coordinate1.append((charBB_list[0][0][i][j], charBB_list[0][1][i][j]))
name.append(coordinate)
name1.append(coordinate1)
coordinate = []
for j in range(0, length):
for i in range(len(name)) :
#print(i,j, name[i][j]) ## koordinate da se snađem, treba
final.append(name[i][j])
#print(name)
#NEŠTA ZA CRTANJE, NEBITNO
if(i == 0 or i == 1):
upperList.append(name[i][j])
if(i == 2):
downList.append(name[i+1][j])
if(i == 3):
downList.append(name[i-1][j])
down = reversed(downList)
joinList = [*upperList,*down,upperList[0]]
FinalFinal.append(joinList)
imageData['transcription']=eachWord
imageData['language']="Latin"
imageData['illegibility']=False
imageData['points']=final
dictionary_copy = imageData.copy()
dictList.append(dictionary_copy)
del(dictionary_copy)
finalToList = np.array(final)
name=[]
final = []
upperList = []
downList = []
start = len(eachWord) + start
#del(dictList[0])
finalDict = {f'gt_{imageName}':dictList}
#print(type(finalDict)) --> dict
#print(imageName,finalDict)
#print(finalDict)
#print(len(textToList))
#print(textToList)
with open("annotation.json") as json_file:
data=json.load(json_file)
temp=data["annotations"]
#temp.append(finalDict)
temp.update(finalDict)
#temp['annotations'] = finalDict
write_json(data)
json_file.close()
for list in FinalFinal:
x,y = zip(*list)
plt.plot(x,y)
#print(x,y)
# points = tuple(zip(x,y))
# # boundaries of the bounding box
# left, right = min(points, key=lambda p: p[0]), max(points, key=lambda p: p[0])
# bottom, top = min(points, key=lambda p: p[1]), max(points, key=lambda p: p[1])
# # area
# base = right[0] - left[0]
# height = top[1] - bottom[1]
# A = base * height
#print(A)
for i in range(len(charBB_list)):
# #print(charBB_list) #ispisuje x-eve za jedan vrh svih instanci pojedinih slova, pa drugi, 3. i 4. i onda posebno y-one
bbs = charBB_list[i]
ni = bbs.shape[-1]
for j in range(ni):
bb = bbs[:,:,j]
bb = np.c_[bb,bb[:,0]] #ako se doda ,bb[:,0] -> printa isto kao i gornji lijevi
#plt.plot(bb[0,:], bb[1,:], 'r', alpha=alpha)
# plot the word-BB:
for i in range(wordBB.shape[-1]):
bb = wordBB[:,:,i] #koordinate wordBB-a
bb = np.c_[bb,bb[:,0]] #spaja skroz lijevu, TREBA
#plt.plot(bb[0,:], bb[1,:], 'g', alpha=alpha)
# visualize the indiv vertices:
vcol = ['r','g','b','k']
#for j in range(4):
#plt.scatter(bb[0,j],bb[1,j],color=vcol[j])
#print(bb) # ----> KOORDINATE wordBB-a
#print(bb[1,j])
plt.gca().set_xlim([0,W-1])
plt.gca().set_ylim([H-1,0])
plt.show(block=False)
def main(db_fname):
db = h5py.File(db_fname, 'r')
dsets = sorted(db['data'].keys())
print ("total number of images : ", colorize(Color.RED, len(dsets), highlight=True))
for k in dsets:
rgb = db['data'][k][...]
charBB = db['data'][k].attrs['charBB']
wordBB = db['data'][k].attrs['wordBB']
txt = db['data'][k].attrs['txt']
textToList = (db['data'][k].attrs['txt']).tolist()
#print(textToList)
viz_textbb(rgb, k,[charBB], wordBB, textToList)
print ("image name : ", colorize(Color.RED, k, bold=True))
print (" ** no. of chars : ", colorize(Color.YELLOW, charBB.shape[-1]))
print (" ** no. of words : ", colorize(Color.YELLOW, wordBB.shape[-1]))
print (" ** text : ", colorize(Color.GREEN, txt))
#print("To know", z[1], type(z[1]))
# OTKOMATI OVO DOLJE AKO ŽELIM STISKAT ENTER
# if 'q' in input("next? ('q' to exit) : "):
# break
db.close()
if __name__=='__main__':
main('results/SynthText.h5')
I am using Python to download stock data from Yahoo. The download code is placed in csv_util.py.
The invoking script is mapper.py. After importing csv_util script, I get the following error:
Traceback (most recent call last):
('Lines:', [])
File "C:/Users/laurel.ts/Desktop/predictstock/mapper.py", line 56, in <module>
download_file_ticker(ticker,ref_ticker)
File "C:/Users/laurel.ts/Desktop/predictstock/mapper.py", line 53, in download_file_ticker
csv_util.download_csv_file_web(ticker,ref_ticker)
TypeError: unbound method download_csv_file_web() must be called with csv_util instance as first argument (got str instance instead)
Here is the code:
mapper.py
###### Mapper.py ######
import time
import sys
# New imports
import stock_predict_main_app
import predict_stock.csv_util
from predict_stock.csv_util import csv_util
predict_date = '03112016'
ticker = 'ARVIND.NS'
ref_ticker = 'MSFT'
input_default_values = {'numdays2predict': 2,
'simulations': 10,
'historicaldatalen': 0,
'tickersymbol': 'ARVIND.NS',
# 'tickersymbol': 'MSFT',
'stockdate2predict': predict_date,
'downloadstock': 1,
'plotshow': 0,
'industrytype': 'itindustry'}
# Pass ticker symbol and date
def pass_ticker_date(predict_date, input_default_values):
with open('tickerList.txt') as f:
lines = f.read().splitlines()
print(type(input_default_values))
tickersymbol = input_default_values["tickersymbol"]
print("Lines:", lines)
for tickersymbol in lines:
print("tickersymbol:", tickersymbol)
stock_predict_main_app.test_predict(tickersymbol)
# Download the file based on the ticker symbol
def download_file_ticker(ticker,ref_ticker):
# ticker= input_default_values["tickersymbol"]
# ref_ticker = input_default_values["tickersymbol"]
csv_util.download_csv_file_web(ticker,ref_ticker)
pass_ticker_date(predict_date, input_default_values)
download_file_ticker(ticker,ref_ticker)
csv_util.py
import logging
import csv
import urllib
import datetime
import numpy as np
import pandas as pd
import sys
import os
from collections import defaultdict
###custom local modules###
from datetime_util import datetime_util
from loggingpy import loggingpy
global stock_input_file,stock_input_ref_file
global loggingpy_obj,datetime_util_obj
global actual_stock_price, output_stock_price
class csv_util:
def __init__(self,actual_stock_price,output_stock_price,download_flag):
print("Class anme: __init__",self.__class__.__name__)
self.stock_input_file=""
self.stock_input_ref_file = ""
self.actual_stock_price = actual_stock_price
self.output_stock_price = output_stock_price
self.download_flag=download_flag
self.datetime_util_obj = datetime_util()
self.loggingpy_obj = loggingpy()
#datetime_util_obj = self.datetime_util_obj
#loggingpy_obj=self.loggingpy_obj
'''
METHOD: prepare_actual_data2writecsv
'''
def prepare_actual_data2writecsv(self, predict_date_wise_data_wd, predict_datewise_data_wod):
logging.info("<<prepare_actual_data2writecsv")
temp_date_ary = []
temp_date_ary = self.actual_stock_price['date']
temp_closeprice_ary = self.actual_stock_price['closeprice']
temp_dailyreturn_ary = self.actual_stock_price['dailyreturn']
# predicted_date_array = sorted(temp_predicted_values_wd.keys(),reverse=True)
# remove last element of array or appenda dummy 0 to daily returns
temp_date_ary.pop()
temp_closeprice_ary.pop()
# temp_dailyreturn_ary.append(0)
self.loggingpy_obj.log_func({'temp_date_ary': temp_date_ary, 'temp_closeprice_ary': temp_closeprice_ary,
'temp_dailyreturn_ary': temp_dailyreturn_ary})
np_column_ary = np.column_stack((temp_date_ary, temp_closeprice_ary, temp_dailyreturn_ary))
num_rows, num_columns = np_column_ary.shape
logging.info("np_column_ary:%s,Rowsdata %s,ColumnData %s", np_column_ary.size, np_column_ary[:][0],
np_column_ary[:, 0])
logging.info("NumRows:%d,Num Columns:%s", num_rows, num_columns)
counter = 0
for i in range(0, num_rows):
counter += 1
temp_temp_row_data = []
temp_row_data = np_column_ary[:][i]
temp_temp_row_data = list(temp_row_data)
temp_rowdate = temp_row_data[0]
logging.debug("[%d],Length:[%d],type:[%s],Date:%s,Rowsdata:%s", i, len(temp_row_data), type(temp_row_data),
temp_rowdate, temp_row_data)
predict_date_wise_data_wd[temp_rowdate] = (map(str, temp_temp_row_data))
predict_datewise_data_wod[temp_rowdate] = (map(str, temp_temp_row_data))
logging.info(">>prepare_actual_data2writecsv")
'''
METHOD: prepare_data2writecsv
'''
def prep_predicted_data2writecsv(self, predict_date_wise_data_wd, predict_datewise_data_wod):
logging.info("<<prep_predicted_data2writecsv")
temp_predicted_values_wd = self.actual_stock_price['predicted_vals_with_drift']
temp_predicted_values_wod = self.actual_stock_price['predicted_vals_without_drift']
self.actual_stock_price['meanwithdrift'] = []
self.actual_stock_price['meanwithoutdrift'] = []
temp_var = temp_predicted_values_wd.keys()
predicted_date_array = self.datetime_util_obj.sort_datettime_list(temp_var, False)
for eack_key in predicted_date_array:
logging.debug("WD:eack key:%s", eack_key)
temp_string_val_wd = []
temp_string_val_wod = []
temp_string_val_wd = temp_predicted_values_wd[eack_key]
temp_string_val_wod = temp_predicted_values_wod[eack_key]
mean_wd = np.mean(temp_string_val_wd)
mean_wod = np.mean(temp_string_val_wod)
# Store mean in global variable
self.actual_stock_price['meanwithdrift'].append(mean_wd)
self.actual_stock_price['meanwithoutdrift'].append(mean_wod)
logging.debug("meanwithdrift:%s,meanwithoutdrift:%s", mean_wd, mean_wod)
logging.debug("temp_string_val_wd:len:%d,type:%s", len(temp_string_val_wd), type(temp_string_val_wd))
logging.debug("temp_string_val_wd:len:%d,type:%s", len(temp_string_val_wod), type(temp_string_val_wod))
temp_string_wd = []
temp_string_wod = []
if not predict_datewise_data_wod.has_key(eack_key):
predict_datewise_data_wod[eack_key] = []
predict_date_wise_data_wd[eack_key] = []
temp_string_wd = [eack_key, "", ""]
temp_string_wod = [eack_key, "", ""]
temp_string_wd.append(mean_wd)
temp_string_wod.append(mean_wod)
temp_string_wd.extend(temp_string_val_wd)
temp_string_wod.extend(temp_string_val_wod)
logging.debug("temp_string_wd:len:%d,type:%s,Data:%s", len(temp_string_wd), type(temp_string_wd),
temp_string_wd)
logging.debug("temp_string_wod:len:%d,type:%s,Data:%s", len(temp_string_wod), type(temp_string_wod),
temp_string_wod)
predict_date_wise_data_wd[eack_key].extend(temp_string_wd)
predict_datewise_data_wod[eack_key].extend(temp_string_wod)
#self.loggingpy_obj.log_func({"temp_string_wd": temp_string_wd, "temp_string_wod": temp_string_wod})
logging.info(">>prepare_data2writecsv")
'''
METHOD: write2csvfile
Writes given data to the given csv absolute path filename
Input arguments: filename to be written, data to be written
'''
def write2csvfile(self,file_name,local_rows_data):
logging.info("<<:write2csvfile")
#output_data_path=self.loggingpy_obj.output_data_path
#os.chdir(output_data_path)
with open(file_name, 'w') as csv_fw:
out_csv_writer = csv.writer(csv_fw, lineterminator='\n')
out_csv_writer.writerows(local_rows_data)
logging.info("csv file[%s]writing :Sucess",file_name)
logging.info(">>:write2csvfile")
'''
# Prepare header list of columns to write to csv file
# Write predicted values to csv file
predicted_data_wod_date_val_ary
'''
##classmethod
#def write2csv_file(ticker_symbol):
def write2csv_file(self,ticker_symbol):
logging.info("<<:write2csv_file")
datetime_stamp=datetime.datetime.now().strftime("%Y%m%d_%H%M")
file_name="output_prediction_with_drift"+ticker_symbol+"_"+datetime_stamp+".csv"
file_name_wod = "output_prediction_without_drift" + ticker_symbol + "_" + datetime_stamp + ".csv"
file_name=self.loggingpy_obj.output_data_path + file_name
file_name_wod = self.loggingpy_obj.output_data_path + file_name_wod
column_headers=self.output_stock_price['column_headers']
#Prepare header list of columns to write to csv file;column_headers is a global variable
column_headers.insert(0,'Date')
column_headers.insert(1, 'Actual Prices')
column_headers.insert(2, 'Daily Return')
column_headers.insert(3, 'Mean')
logging.info("column_headers,len:%s,type:%s,data:%s", len(column_headers), type(column_headers), column_headers)
logging.info("self:column_headers,len:%s", len(self.output_stock_price['column_headers']))
predict_date_wise_data_wd = {}
predict_datewise_data_wod = {}
self.prepare_actual_data2writecsv(predict_date_wise_data_wd, predict_datewise_data_wod)
self.loggingpy_obj.log_func(
{"Before:predict data_wd": predict_date_wise_data_wd, "predict data_wod": predict_datewise_data_wod})
self.prep_predicted_data2writecsv(predict_date_wise_data_wd,predict_datewise_data_wod)
self.loggingpy_obj.log_func({"After:pred data_wd": predict_date_wise_data_wd, "pred data_wod": predict_datewise_data_wod})
temp_new_data_ary=predict_date_wise_data_wd.keys()
sorted_temp_new_data_ary = self.datetime_util_obj.sort_datettime_list(temp_new_data_ary,True)
self.loggingpy_obj.log_func({"sorted_temp_new_data_ary":sorted_temp_new_data_ary})
data2write2csv_wd = [column_headers]
data2write2csv_wod = [column_headers]
counter=1
# add headers
for each_key in sorted_temp_new_data_ary:
counter+=1
data2write2csv_wd.insert(counter, predict_date_wise_data_wd[each_key])
data2write2csv_wod.insert(counter,predict_datewise_data_wod[each_key])
self.write2csvfile(file_name, data2write2csv_wd)
self.write2csvfile(file_name_wod, data2write2csv_wod)
logging.debug("data2write2csv_wd:%s", repr(data2write2csv_wd))
logging.info("<<:write2csv_file")
#sys.exit()
# ######################### END OF METHOD write2csv_file ################################
'''
METHOD: read_csv_file
'''
##classmethod
def read_csv_file(self,file_name,ref_data_flag):
logging.debug("<<:read_csv_file")
logging.info("file_name,%s,",file_name)
if not os.path.exists(file_name):
logging.critical("File not found:Check!"+file_name)
sys.exit(2)
#actual_stock_price=self.actual_stock_price
logging.info("file_name,%s,", file_name)
data = pd.read_csv(file_name,parse_dates=False, infer_datetime_format=False,date_parser=None)
logging.info("self:::::::::::::%s",repr(self.datetime_util_obj));
logging.info("data columns,len:%d,type:%s,data:%s,",len(data.columns),type(data.columns),data.columns)
logging.info(",data.columns:%s",data.columns.values)
datetime_list_dmy = self.datetime_util_obj.convert2datettime_list(data['Date'].tolist())
#actual_stock_price=defaultdict();
actual_stock_price = self.actual_stock_price
if ref_data_flag == 1:
actual_stock_price['ref_data']={}
actual_stock_price['ref_data']['date'] = datetime_list_dmy
actual_stock_price['ref_data']['closeprice'] = data['Close'].tolist()
else:
actual_stock_price['date'] = datetime_list_dmy
actual_stock_price['closeprice'] = data['Close'].tolist()
self.loggingpy_obj.log_func({"datetime_list_dmy": datetime_list_dmy})
#logging.debug("repr self asp:%s",repr(self.actual_stock_price))
del data
logging.debug(">>:read_csv_file")
'''
METHOD: download_csv_file_web
Download stock data from web yahoofinance
'''
def download_csv_file_web(self,ticker_symbol,ref_ticker_symbol):
logging.debug("<<:download_csv_file_web")
input_data_path=self.loggingpy_obj.input_data_path
logging.info("input_data_path:%s:",input_data_path)
yahoo_url="http://real-chart.finance.yahoo.com/table.csv?s="
base_url=yahoo_url + ticker_symbol
base_url_ref = yahoo_url + ref_ticker_symbol
#datetime_stamp = datetime.datetime.now().strftime("%Y%m%d_%H%M")
datetime_stamp = datetime.datetime.now().strftime("%Y%m%d")
global stock_input_file,stock_input_ref_file
#print("File found1:", stock_input_file)
if self.download_flag == 1:
file_name = "stock_input_" + ticker_symbol + "_" + datetime_stamp + ".csv"
ref_file_name = "stock_ref_input_" + ref_ticker_symbol + "_" + datetime_stamp + ".csv"
stock_input_file = input_data_path + "\\" + file_name
stock_input_ref_file = input_data_path + "\\" + ref_file_name
self.download_file(stock_input_file,base_url)
self.download_file(stock_input_ref_file, base_url_ref)
else:
file_name = "stock_input_" + ticker_symbol + ".csv"
ref_file_name = "stock_ref_input_" + ref_ticker_symbol + ".csv"
stock_input_file = input_data_path + "\\" + file_name
stock_input_ref_file = input_data_path + "\\" + ref_file_name
if os.path.isfile(stock_input_file) and os.path.isfile(stock_input_ref_file):
logging.info("File found 3 :[%s],[%s]",stock_input_file,stock_input_ref_file)
print("File found3:",stock_input_file,stock_input_ref_file)
else:
print("File not found4:", stock_input_file,stock_input_ref_file)
logging.critical("File not found4![%s] or [%s]",stock_input_file,stock_input_ref_file)
sys.exit(2)
print("File found5:", stock_input_file,stock_input_ref_file)
logging.info("stock_input_file 5 :%s,base_url:%s,ref file name:[%s]", stock_input_file, base_url,stock_input_ref_file)
self.stock_input_file=stock_input_file
self.stock_input_ref_file=stock_input_ref_file
#sys.exit(2)
logging.debug(">>:download_csv_file_web")
'''
METHOD: download_file
Downlaod stock data from web yahoofinance
'''
def download_file(self,file_name,base_url):
logging.debug("<<:download_file")
try:
logging.info("Try Reading [:%s]",base_url)
status = urllib.urlretrieve(base_url, file_name)
logging.info("Status:%s", status)
urllib.urlcleanup()
if os.path.exists(file_name):
logging.info ("File exists, File download success!File"+file_name)
else:
logging.critical("File downloaded DOE NOT EXIST, exit.."+file_name)
sys.exit(2)
except urllib.ContentTooShortError as ctse:
print("File download: Failed, found some error")
logging.critical("File donwlaod failed from url:%s",base_url)
sys.exit(2)
#logfile_handle.write(ctse.content)
logging.debug(">>:download_file")
def download_read_csv_file(self,ticker_symbol, industry_type_ts):
logging.debug("<<:download_read_csv_file")
ref_data_flag=0
self.download_csv_file_web(ticker_symbol, industry_type_ts)
stock_input_file = self.stock_input_file
stock_input_ref_file = self.stock_input_ref_file
# download_csv_file_web("GLD")
ref_data_flag = 0
self.read_csv_file(stock_input_file, 0)
self.read_csv_file(stock_input_ref_file, 1)
#self.loggingpy_obj.log_func({"csv_util:actual_stock_price":self.actual_stock_price})
logging.debug(">>:download_read_csv_file")
loggingpy.py
import logging
import datetime
import os
import sys
global current_dir_path, input_data_path, output_data_path
class loggingpy:
def __init__(self):
loglevel=logging.INFO
self.loglevel=loglevel
self.log_config()
print("loggingpy - log __init__")
def log_config(self):
global current_dir_path, input_data_path, output_data_path
current_dir_path = os.getcwd()
input_data_path = current_dir_path + "\..\stock_data"
output_data_path = current_dir_path + "\..\stock_data"
if not os.path.exists(output_data_path):
os.mkdir(output_data_path)
print("current_dir_path:" + current_dir_path + ",input_data_path:" + input_data_path,
",output_data_path:" + output_data_path)
main_date_time_stamp = datetime.datetime.now().strftime("%Y%m%d_%H%M")
logfile = output_data_path + "\log_" + main_date_time_stamp + ".log"
#logging.basicConfig(stream=sys.stdout,level=logging.DEBUG, format='%(levelname)s:%(message)s')
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
#logging.basicConfig(stream=sys.stdout, level=self.loglevel)
#logging.basicConfig(filename=logfile, level=logging.INFO, format='%(levelname)s:%(message)s')
#console=logging.StreamHandler().setLevel(logging.DEBUG)
#logging.getLogger('abcd').addHandler(console)
self.input_data_path = input_data_path
self.output_data_path = output_data_path
self.current_dir_path = current_dir_path
logging.info("Logging test %s","HELLO TEST")
logging.info("current_dir_path:%s,input_data_path:%s,output_data_path:%s", current_dir_path,input_data_path,output_data_path)
def log_fn(self,temp_list):
for i in range(0, len(temp_list)):
log_level_set=logging.getLogger().getEffectiveLevel()
#print("log_level_set",log_level_set,logging.INFO,logging.DEBUG)
if log_level_set==logging.INFO:
logging.info(":len:%d,Type:%s", len(temp_list[i]), type(temp_list[i]))
if log_level_set == logging.DEBUG:
logging.debug("len:%d,Type:%sData:%s", len(temp_list[i]), type(temp_list[i]),temp_list[i])
def log_info(self,msg):
logging.info(msg)
def log_func(self,templog):
log_level_set = logging.getLogger().getEffectiveLevel()
#log_string_info = "Name:%s,len:%d,type:%s"
#log_string_debug = "Name:%s,len:%d,type:%s,Data:%s"
log_string_info = "Name:{0},len:{1},type:{2}"
log_string_debug = "Name:{0},len:{1},type:{2},Data:%{3}"
for var_name,var_value in templog.iteritems():
if log_level_set == logging.INFO:
#logging.info(""+log_string_info,var_name, len(var_value),type(var_value))
logging.info(log_string_info.format(var_name, len(var_value), type(var_value)))
if log_level_set == logging.DEBUG:
#logging.debug(""+log_string_debug,var_name, len(var_value), type(var_name),var_value)
logging.debug(log_string_debug.format(var_name, len(var_value), type(var_name), var_value))
What could be the reason for the error ?
I want to make a word cloud in Korean. My OS is Windows. I want to make the Python code into an exe file that can run at any desktop. I'm trying to use PyInstaller to make the exe file.
pyinstaller.exe --onefile --icon=Pikachu.ico wordcloud.py
But I get this error:
Traceback (most recent call last):
File "wordcloud.py", line 6, in <module>
File "C:\Users\Myungho Lee\Downloads\pyinstaller-develop\pyinstaller-develop\PyInstaller\loader\pyimod03_importers.py", line 389, in load_module
File "site-packages\pytagcloud\__init__.py", line 26, in <module>
IOError: [Errno 2] No such file or directory: 'C:\\Users\\Myungho Lee\\AppData\\Local\\Temp\\_MEI67722\\pytagcloud\\fonts\\fonts.json'
Failed to execute script wordcloud
This is my code: (wordcloud.py)
#-*- coding: utf-8 -*-
import fnmatch
import os
import random
from PyInstaller.utils.hooks import collect_data_files
import pytagcloud
import simplejson
from pytagcloud import LAYOUT_HORIZONTAL
# requires Korean font support
import csv
import pygame
def draw_cloud(tags, filename, fontname, size):
pytagcloud.create_tag_image(tags, filename, fontname = fontname, size = size, rectangular=True, layout=LAYOUT_HORIZONTAL)
r = lambda: random.randint(0,255)
color = lambda: (r(), r(), r())
fonts = pygame.font.get_fonts()
#datas = collect_data_files('fonts')
current_dir = os.path.dirname(os.path.abspath(__file__))
current_dir = current_dir.replace('\\', '/')
csv_dir = current_dir+"/csv/"
font_path = current_dir + "/fonts/"
print font_path
FONT_CACHE = simplejson.load(open(os.path.join(font_path, 'fonts.json'), 'r'))
print FONT_CACHE
path = csv_dir
file_list = [os.path.join(dirpath, f)
for dirpath, dirnames, files in os.walk(path)
for f in fnmatch.filter(files, '*.csv')]
for i in range(0, len(file_list)):
file_list[i] = file_list[i].replace('\\', '/')
file_list[i] = file_list[i].decode('cp949').encode('utf-8')
file_list[i] = unicode(file_list[i], 'utf-8')
tmp_str = file_list[i].split("/")
file_len = len(tmp_str)
for fileName in file_list:
with open(fileName, 'rb') as csvfile:
dic = []
count = False
reader = csv.reader(csvfile, delimiter = ',', quotechar = '|')
for row in reader:
try:
if(int(row[2]) > 500):
freq = int(int(row[2]) * 0.4)
count = True
elif(int(row[2]) > 400):
freq = int(int(row[2])*0.5)
count = True
elif(int(row[2])> 300):
freq = int(int(row[2]) * 0.6)
count = False
elif(int(row[2]) > 200):
freq = int(int(row[2])* 0.65)
count = False
elif(int(row[2]) > 100):
freq = int(int(row[2]) * 0.7)
count = False
else:
freq = int(int(row[2])* 0.75)
count = False
if(count):
sizediv = 3
else:
sizediv = 2
dic.append({'color': color(), 'tag': unicode(row[1], 'cp949'), 'size': freq/sizediv})
except:
continue
tags = dic
if (dic[0]['size'] > 70):
size = (600, 600)
elif (dic[0]['size'] > 60):
size = (550, 550)
elif (dic[0]['size'] > 50):
size = (450, 450)
elif (dic[0]['size'] > 40):
size = (400, 400)
elif (dic[0]['size'] > 30):
size = (350, 350)
cloudName = fileName.split('/csv/')[1]
cloudName = cloudName.split('.csv')[0]
dirName = fileName.split('/csv/')[0]
draw_cloud(tags, dirName + '/word_cloud/'+cloudName+ '.png', FONT_CACHE[0]['name'], size)
I would like for the resulting graph to be in a new window. I know the phrase, %matplotlib inline puts the graph in the console, but if I remove it it gives me the error
FigureCanvasMac' object has no attribute 'get_renderer.
Is there a way I can go around this?
import re
import ftplib
import os
from urllib.request import urlopen
import json
import matplotlib
%matplotlib inline
import matplotlib.cm as cm
import matplotlib.pyplot as plt
from matplotlib.patches import Circle, PathPatch
from matplotlib.path import Path
from matplotlib.transforms import Affine2D
import numpy as np
import pylab
import re
import sunpy.time
import numpy as np
from numpy.random import *
from matplotlib.patches import Rectangle
from adjustText import adjust_text
import pandas as pd
from scipy import interpolate
import sys
info = []
parsedFilename = []
dateAndTime = []
xcen = []
ycen = []
sciObj = []
xfov = []
yfov = []
matchingAR = []
def getNumberOfEntries(theJSON):
return len(dateAndTime)
def getInfo(counter, theJSON):
cont = True
while cont:
try:
dateAndTime.append(theJSON["Events"][counter]["date"])
xcen.append(float("%.2f" % theJSON["Events"][counter]["xCen"]))
ycen.append(float("%.2f" % theJSON["Events"][counter]["yCen"]))
xfov.append(float("%.2f" % theJSON["Events"][counter]["raster_fovx"]))
yfov.append(float("%.2f" % theJSON["Events"][counter]["raster_fovy"]))
sciObj.append(theJSON["Events"][counter]["sciObjectives"])
counter = counter + 1
getInfo(counter, theJSON)
except IndexError:
cont = False
break
def setXMax(theJSON):
xmax = xcen[0]
for i in range (0, getNumberOfEntries(theJSON)):
if xcen[i] > xmax:
xmax = xcen[i]
return round(xmax + 150,-1)
def setXMin(theJSON):
xmin = xcen[0]
for i in range (0, getNumberOfEntries(theJSON)):
if xcen[i] < xmin:
xmin = xcen[i]
return round(xmin - 150, -1)
def setYMax(theJSON):
ymax = ycen[0]
for i in range (0, getNumberOfEntries(theJSON)):
if ycen[i] > ymax:
ymax = ycen[i]
return round(ymax + 150, -1)
def setYMin(theJSON):
ymin = ycen[0]
for i in range (0, getNumberOfEntries(theJSON)):
if ycen[i] < ymin:
ymin = ycen[i]
return round(ymin - 150, -1)
# def sort():
# for i in range(len(dateAndTime)):
# for j in range(len(xcen)-1, i, -1):
# if ( xcen[j] < xcen[j-1]):
# temp1 = dateAndTime[j]
# dateAndTime[j] = dateAndTime[j-1]
# dateAndTime[j-1] = temp1
# temp2 = xcen[j]
# xcen[j] = xcen[j-1]
# xcen[j-1] = temp2
# temp3 = ycen[j]
# ycen[j] = ycen[j-1]
# ycen[j-1] = temp3
# temp4 = xfov[j]
# xfov[j] = xcen[j-1]
# xfov[j-1]=temp4
# temp5 = yfov[j]
# yfov[j] = ycen[j-1]
# yfov[j-1]=temp5
# temp6 = sciObj[j]
# sciObj[j] = sciObj[j-1]
# sciObj[j-1] = temp6
def sort():
for i in range(len(dateAndTime)):
for j in range(len(dateAndTime)-1, i, -1):
if ( dateAndTime[j] < dateAndTime[j-1]):
temp1 = dateAndTime[j]
dateAndTime[j] = dateAndTime[j-1]
dateAndTime[j-1] = temp1
temp2 = xcen[j]
xcen[j] = xcen[j-1]
xcen[j-1] = temp2
temp3 = ycen[j]
ycen[j] = ycen[j-1]
ycen[j-1] = temp3
temp4 = xfov[j]
xfov[j] = xcen[j-1]
xfov[j-1]=temp4
temp5 = yfov[j]
yfov[j] = ycen[j-1]
yfov[j-1]=temp5
temp6 = sciObj[j]
sciObj[j] = sciObj[j-1]
sciObj[j-1] = temp6
def createAnnotations(theJSON):
annotations = []
for i in range(getNumberOfEntries(theJSON)):
annotations.append('(' + str(xcen[i])+ ', '+ str(ycen[i]) + ')')
return annotations
def fixAnnotations(annotations):
texts = []
for xt, yt, s in zip(xcen, ycen, annotations):
texts.append(plt.text(xt, yt, s))
return texts
def plot(theJSON):
fig, ax = plt.subplots(figsize=(30, 20))
circle = Circle((0, 0), 980, facecolor='none', edgecolor=(0, 0.8, 0.8), linewidth=3, alpha=0.5)
ax.add_patch(circle)
plt.plot(xcen, ycen, color="red")
plt.plot(xcen, ycen, 'ro', color = 'blue')
plt.xlim([setXMin(theJSON), setXMax(theJSON)])
plt.ylim([setYMin(theJSON), setYMax(theJSON)])
ax.set_xticks(np.arange(setXMin(theJSON), setXMax(theJSON), 50))
ax.set_yticks(np.arange(setYMin(theJSON), setYMax(theJSON), 50))
for i in range(getNumberOfEntries(theJSON)):
if xfov[i] != 0:
xStart = xcen[i] - xfov[i]/20
yStart = ycen[i] - yfov[i]/20
ax.add_patch(Rectangle((xStart, yStart), xfov[i]/10, yfov[i]/10, facecolor='none'))
texts = fixAnnotations(createAnnotations(theJSON))
f = interpolate.interp1d(xcen, ycen)
x = np.linspace(min(xcen), max(ycen), 1000)
y = f(x)
adjust_text(texts, x, y, arrowprops=dict(arrowstyle="->", color='r', lw=2.0), autoalign='y', only_move={'points':'y', 'text':'y'}, expand_points=(1.2, 1.4), force_points=0.40)
plt.grid()
plt.show()
main(False)
def searchOnceMore(searchAgain):
if searchAgain == True:
noaaNmbr = input('Enter desired active region: ')
return noaaNmbr
else:
continueSearch = input('Would you like to search again?(yes/no)')
if continueSearch == 'yes':
noaaNmbr = input('Enter desired active region:')
return noaaNmbr
elif continueSearch == 'no':
sys.exit(0)
else:
print('please enter "yes" or "no"')
searchOnceMore(False)
def main(searchAgain):
noaaNmbr = searchOnceMore(searchAgain)
urlData = "http://www.lmsal.com/hek/hcr?cmd=search-events3&outputformat=json&instrument=IRIS&noaanum="+ noaaNmbr +"&hasData=true"
webUrl = urlopen(urlData)
counter = 0
if (webUrl.getcode()==200):
data = webUrl.read().decode('utf-8')
theJSON = json.loads(data)
getInfo(counter, theJSON)
else:
print ("You done messed up!!!")
sort()
for i in range (getNumberOfEntries(theJSON)):
print(dateAndTime[i])
print("(", xcen[i], ", ", ycen[i], ")")
print(sciObj[i])
print(' ')
if getNumberOfEntries(theJSON) != 0:
plot(theJSON)
else:
print('No observations for active region ' + noaaNmbr)
main()
main(True)
I have also used python and would suggest using John Zelle's graphic file. http://mcsp.wartburg.edu/zelle/python/
It's much more easier to understand and use in my opinion.
To open a new graph window:
Win1 = GraphWin("Graph Window 1", 100,100)
win2 = GraphWin("Graph Window 2", 100,150)
You can also open the python file to understand how it works. It might help understanding how to open a graph window your way. I only know how to open a new Graph Window through this file sorry, I hope it helps anyway!
I'm trying to run the following script, which reportedly works for some people:
"""Preprocessing script.
This script walks over the directories and dump the frames into a csv file
"""
import os
import csv
import sys
import random
import scipy
from scipy import misc as m
import numpy as np
import dicom
from skimage import io, transform
from joblib import Parallel, delayed
import dill
os.chdir("P:\\hackr\\dsb")
def mkdir(fname):
try:
os.mkdir(fname)
except:
pass
def get_frames(root_path):
"""Get path to all the frame in view SAX and contain complete frames"""
ret = []
for root, _, files in os.walk(root_path):
if len(files) == 0 or not files[0].endswith(".dcm") or root.find("sax") == -1:
continue
prefix = files[0].rsplit('-', 1)[0]
fileset = set(files)
expected = ["%s-%04d.dcm" % (prefix, i + 1) for i in range(30)]
if all(x in fileset for x in expected):
ret.append([root + "/" + x for x in expected])
# sort for reproduciblity
return sorted(ret, key = lambda x: x[0])
def get_label_map(fname):
labelmap = {}
fi = open(fname)
fi.readline()
for line in fi:
arr = line.split(',')
labelmap[int(arr[0])] = line
return labelmap
def write_label_csv(fname, frames, label_map):
fo = open(fname, "w")
for lst in frames:
index = int(lst[0].split("\\")[1]) #changed from split("/")[3]
if label_map != None:
fo.write(label_map[index])
else:
fo.write("%d,0,0\n" % index)
fo.close()
def get_data(lst,preproc):
data = []
result = []
for path in lst:
f = dicom.read_file(path)
img = preproc(f.pixel_array.astype(float) / np.max(f.pixel_array))
dst_path = path.rsplit(".", 1)[0] + ".64x64.jpg"
m.imsave(dst_path, img)
result.append(dst_path)
data.append(img)
data = np.array(data, dtype=np.uint8)
data = data.reshape(data.size)
data = np.array(data,dtype=np.str_)
data = data.reshape(data.size)
return [data,result]
def write_data_csv(fname, frames, preproc):
"""Write data to csv file"""
#if __name__ == 'builtins': #'__main__':
fdata = open(fname, "w")
dr = Parallel()(delayed(get_data)(lst,preproc) for lst in frames)
data,result = zip(*dr)
for entry in data:
fdata.write(','.join(entry)+'\r\n')
print("All finished, %d slices in total" % len(data))
fdata.close()
result = np.ravel(result)
return result
def crop_resize(img, size):
"""crop center and resize"""
if img.shape[0] < img.shape[1]:
img = img.T
# we crop image from center
short_egde = min(img.shape[:2])
yy = int((img.shape[0] - short_egde) / 2)
xx = int((img.shape[1] - short_egde) / 2)
crop_img = img[yy : yy + short_egde, xx : xx + short_egde]
# resize to 64, 64
resized_img = transform.resize(crop_img, (size, size))
resized_img *= 255
return resized_img.astype("uint8")
def local_split(train_index):
random.seed(0)
train_index = set(train_index)
all_index = sorted(train_index)
num_test = int(len(all_index) / 3)
random.shuffle(all_index)
train_set = set(all_index[num_test:])
test_set = set(all_index[:num_test])
return train_set, test_set
def split_csv(src_csv, split_to_train, train_csv, test_csv):
ftrain = open(train_csv, "w")
ftest = open(test_csv, "w")
cnt = 0
for l in open(src_csv):
if split_to_train[cnt]:
ftrain.write(l)
else:
ftest.write(l)
cnt = cnt + 1
ftrain.close()
ftest.close()
# Load the list of all the training frames, and shuffle them
# Shuffle the training frames
random.seed(10)
train_frames = get_frames("./train_tiny")
random.shuffle(train_frames)
validate_frames = get_frames("./validate_tiny")
# Write the corresponding label information of each frame into file.
write_label_csv("train-label.csv", train_frames, get_label_map("./train.csv"))
write_label_csv("validate-label.csv", validate_frames, None)
# Dump the data of each frame into a CSV file, apply crop to 64 preprocessor
train_lst = write_data_csv("train-64x64-data.csv", train_frames, lambda x: crop_resize(x, 64))
valid_lst = write_data_csv("./validate-64x64-data.csv", validate_frames, lambda x: crop_resize(x, 64))
# Generate local train/test split, which you could use to tune your model locally.
train_index = np.loadtxt("./train-label.csv", delimiter=",")[:,0].astype("int")
train_set, test_set = local_split(train_index)
split_to_train = [x in train_set for x in train_index]
split_csv("./train-label.csv", split_to_train, "./local_train-label.csv", "./local_test-label.csv")
split_csv("./train-64x64-data.csv", split_to_train, "./local_train-64x64-data.csv", "./local_test-64x64-data.csv")
Throws an error. When I run the code interactively I find that it's the write_data_csv invocation that breaks the script.
For instance, the script runs without error before using write_data_csv but the line valid_lst = write_data_csv("./validate-64x64-data.csv", validate_frames, lambda x: crop_resize(x, 64)) throws this error:
Traceback (most recent call last):
File "<input>", line 1, in <module>
File "<input>", line 5, in write_data_csv
File "E:\Users\hackr\Python\python35\lib\site-packages\joblib\parallel.py", line 804, in __call__
while self.dispatch_one_batch(iterator):
File "E:\Users\hackr\Python\python35\lib\site-packages\joblib\parallel.py", line 657, in dispatch_one_batch
tasks = BatchedCalls(itertools.islice(iterator, batch_size))
File "E:\Users\hackr\Python\python35\lib\site-packages\joblib\parallel.py", line 68, in __init__
self.items = list(iterator_slice)
File "<input>", line 5, in <genexpr>
File "E:\Users\hackr\Python\python35\lib\site-packages\joblib\parallel.py", line 161, in delayed
pickle.dumps(function)
_pickle.PicklingError: Can't pickle <function get_data at 0x0000000007B432F0>: attribute lookup get_data on __main__ failed
I'm not sure what my next move is to resolve this problem. I've found other StackOverflow questions with attribute lookup failures, but they were either substantively different or unanswered.