accelerate DEAP using multiprocessing report OSError - python

I want to accelerate DEAP using multiprocessing but always get OSError. Here is abbreviated version of my code:
import operator
import math
import random
import numpy as np
import pandas as pd
from deap import algorithms
from deap import base
from deap import creator
from deap import tools
from deap import gp
import multiprocessing
# protectedDiv
def protectedDiv(left, right):
try:
return left / right
except ZeroDivisionError:
return 1
# omitting some other functions
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMax)
# here is DEAP strong typed GP setting
pset = gp.PrimitiveSetTyped("MAIN", [np.ndarray] * 12, np.ndarray)
pset.addPrimitive(operator.add, [np.ndarray, np.ndarray], np.ndarray)
pset.addPrimitive(operator.sub, [np.ndarray, np.ndarray], np.ndarray)
pset.renameArguments(ARG0='close')
pset.renameArguments(ARG1='open')
# here is fitness function. My goal is maximum stock return's ICIR.
def evalSymbReg(individual):
# omitting code
return icir,
toolbox = base.Toolbox()
toolbox.register("expr", gp.genHalfAndHalf, pset=pset, min_=1, max_=3)
toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.expr)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
toolbox.register("compile", gp.compile, pset=pset)
toolbox.register("evaluate", evalSymbReg)
toolbox.register("select", tools.selTournament, tournsize=10)
toolbox.register("mate", gp.cxOnePoint)
toolbox.register("expr_mut", gp.genFull, min_=0, max_=2)
toolbox.register("mutUniform", gp.mutUniform, expr=toolbox.expr_mut, pset=pset)
toolbox.decorate("mate", gp.staticLimit(key=operator.attrgetter("height"), max_value=10))
toolbox.decorate("mutUniform", gp.staticLimit(key=operator.attrgetter("height"), max_value=10))
def main():
n_sample = 5000
n_gen = 40
cxpb = 0.6
mutUniformpb = 0.4
pop = toolbox.population(n=n_sample)
hof = tools.HallOfFame(10)
stats_fit = tools.Statistics(lambda ind: ind.fitness.values)
stats_size = tools.Statistics(len)
mstats = tools.MultiStatistics(fitness=stats_fit, size=stats_size)
mstats.register("avg", np.nanmean)
mstats.register("min", np.nanmin)
mstats.register("max", np.nanmax)
pop, log = algorithms.my_eaSimple(pop, toolbox, cxpb, mutUniformpb, mutNodeReplacementpb, mutEphemeralpb, mutShrinkpb,
n_gen, stats=mstats, halloffame=hof, verbose=True)
# print log
return pop, log, hof, info, top10
# here is my data file.
df = pd.read_csv(r'C:\Users\xxyao\research\国债期货\data\data_summary.csv')
df['pct-1'] = df['close'].pct_change().shift(-1)
df['month'] = [x[0:7] for x in df['date']]
if __name__ == "__main__":
pool = multiprocessing.Pool(processes=6)
toolbox.register('map', pool.map)
pop, log, hof, info, top10 = main()
When I run the code I got error message like this:
This message repeated show in the window quickly. I don't know where is wrong. I protect the Pool() in __name__ == __main__ as DEAP document says. But it still can't work. Can somebody help me please.

Put this code in the main and the function will work.
pool = multiprocessing.Pool(processes=6)
toolbox.register('map', pool.map)

Related

How to use multiprocess in another code and funcion? [duplicate]

This question already has answers here:
What does if __name__ == "__main__": do?
(45 answers)
Closed 6 months ago.
I am trying to multiprocess cell detection via the deepcell package. Because the deepcell detection works fine on small images. But for bigger images it does not work or takes a really really long time.
So I'm trying to cut the images into small patches, and then use multiprocessing to feed them to the cell detection.
I need to be able to run the pool_cell_detection() function in another code, and get its return value (allPoints). Whereas if I use it in a if name=='main' wrapper, then I could not get the return value. Can you suggest how I can do this?
Here is my code1
import numpy as np
import os
import matplotlib.pyplot as plt
import cv2 as cv
from multiprocessing import Pool
from deepcell.mesmer import Mesmer
import time
blevel_image = cv.imread("./images/blevel_eq_p.png",0)
app = Mesmer()
def deepcell_detection(image0, mpp):
print(type(image0))
cv.imwrite("./images/image1.png", image0)
image = np.stack((image0,image0), axis=-1)
image = np.expand_dims(image,0)
labeled_image, coords = app.predict(image, image_mpp=mpp)
print(len(coords))
return coords
def pool_cell_detection(img_channel):
blobs_log = []
r,c = img_channel.shape[0:2]
mpp = 2
rstep=r//10
cstep=c//10
patches=[]
for i in range(10):
for j in range(10):
img_patch = img_channel[i*rstep:(i+1)*rstep,j*cstep:(j+1)*cstep]
patches.append([img_patch, mpp])
with Pool(4) as p:
print("pooling")
allPoints=p.map(deepcell_detection, patches)
return allPoints
def main():
allPoints = pool_cell_detection(blevel_image)
if __name__ == '__main__':
main()
I need in code 2 something like the following:
import code1
def func_something():
#Many operations
allPoints = pool_cell_detection(blevel_image)
But I'm not sure how to write code 2 to be able to get the allpoints
As the multiprocessing documentation says, the entry point of a multiprocessing program must be wrapped in if __name__ == '__main__': when using the spawn start method, which is the only available option on Windows, which you're on.
Change the final invocation from
blobs_log = pool_cell_detection(blevel_image)
to e.g.
def main():
blobs_log = pool_cell_detection(blevel_image)
if __name__ == '__main__':
main()
Following up on the comment threads, all in all, you might have, let's say, multiprocessing_cell_detect.py:
import multiprocessing
import cv2 as cv
import numpy as np
from deepcell.mesmer import Mesmer
def deepcell_detection(image0, mpp):
cv.imwrite("./images/image1.png", image0)
image = np.stack((image0, image0), axis=-1)
image = np.expand_dims(image, 0)
labeled_image, coords = Mesmer().predict(image, image_mpp=mpp)
return coords
def pool_cell_detection(img_channel):
r, c = img_channel.shape[0:2]
mpp = 2
rstep = r // 10
cstep = c // 10
patches = []
for i in range(10):
for j in range(10):
img_patch = img_channel[i * rstep:(i + 1) * rstep, j * cstep:(j + 1) * cstep]
patches.append([img_patch, mpp])
with multiprocessing.Pool(4) as p:
return p.map(deepcell_detection, patches)
and my_cell_program.py:
from multiprocessing_cell_detect import pool_cell_detection
def main():
blevel_image = cv.imread("./images/blevel_eq_p.png", 0)
allPoints = pool_cell_detection(blevel_image)
print(allPoints)
if __name__ == '__main__':
main()
and you run python my_cell_program.py.
So long as the main entry point of the program is guarded, things will work. The module you import a multiprocessing thing from will not need to be guarded (unless you also wish to use that module stand-alone).

running two difference method at the same time

I am trying to run two methods at the same time in Python. One of them plays a sound of a sine wave and the other one measures it. Both methods work fine but I could not figure it out how to start them at the same time.
import sounddevice as sd
from numpy import linalg as LA
import numpy as np
from pysine import sine
import time
from threading import Thread
import math
duration = 10 # seconds
def print_sound(indata,outdata, frames, time, status):
volume_norm = np.linalg.norm(indata)*10
dB = 20*math.log(volume_norm)
print (int(dB))
def sine_wave():
frequency = 5000
sine(frequency = (frequency),duration = 1.0)
sine_wave()
with sd.Stream(callback=print_sound):
sd.sleep(duration * 10000)
def main():
thr1 = Thread(target=print_sound)
thr2 = Thread(target=sine_wave)
if __name__ == "__main__":
thr1.start()
thr2.start()
Your syntax is wrong, if it helps, mark it as answer. Try THIS:
import sounddevice as sd
from numpy import linalg as LA
import numpy as np
from pysine import sine
import time
from threading import Thread
import math
duration = 10 # seconds
def print_sound(indata,outdata, frames, time, status):
volume_norm = np.linalg.norm(indata)*10
dB = 20*math.log(volume_norm)
print (int(dB))
def sine_wave():
frequency = 5000
sine(frequency = (frequency),duration = 1.0)
with sd.Stream(callback=print_sound):
sd.sleep(duration * 10000)
def main():
thr1 = Thread(target=print_sound)
thr2 = Thread(target=sine_wave)
thr1.start()
thr2.start()
if __name__ == "__main__":
main()

Meshcat not showing the changes to a Free Body's Pose

I've been trying to create my own ManipulationStation for a different robot arm using Pydrake, but I've been unsuccessful so far in adding clutter to my ManipulationStation. For some odd reason, Meshcat won't show the updated poses of my objects.
import numpy as np
import glob
from pydrake.geometry import MeshcatVisualizerCpp
from pydrake.math import RigidTransform, RotationMatrix
from pydrake.systems.analysis import Simulator
from pydrake.systems.framework import DiagramBuilder
from pydrake.all import (
DiagramBuilder, FindResourceOrThrow,
SceneGraph, Diagram,
MultibodyPlant, Parser, Simulator, MeshcatVisualizerCpp,
UniformlyRandomRotationMatrix, RandomGenerator)
from pydrake.geometry import Meshcat
class DexterPPStation(Diagram):
def __init__(self, time_step, file_path):
super().__init__()
self.time_step = time_step
self.path = file_path
self.plant = MultibodyPlant(self.time_step)
self.scene_graph = SceneGraph()
self.plant.RegisterAsSourceForSceneGraph(self.scene_graph)
self.controller_plant = MultibodyPlant(self.time_step)
self.object_ids = []
self.object_poses = []
def AddObject(self, file, name, pose):
model_idx = Parser(self.plant).AddModelFromFile(file, name)
indices = self.plant.GetBodyIndices(model_idx)
self.object_ids.append(indices[0])
self.object_poses.append(pose)
return model_idx
def CreateBins(self, path, XP_B1, XP_B2):
bin1 = Parser(self.plant).AddModelFromFile(path, "bin1")
self.plant.WeldFrames(self.plant.world_frame(), self.plant.GetFrameByName("bin_base", bin1), XP_B1)
bin2 = Parser(self.plant).AddModelFromFile(path, "bin2")
self.plant.WeldFrames(self.plant.world_frame(), self.plant.GetFrameByName("bin_base", bin2), XP_B2)
def CreateRandomPickingObjects(self, n = 4):
choices = [f for f in glob.glob("/opt/drake/share/drake/manipulation/models/ycb/sdf/*.sdf")]
z = 0.1
rs = np.random.RandomState()
generator = RandomGenerator(rs.randint(1000))
for i in range(n):
obj = choices[i]
pose = RigidTransform(
UniformlyRandomRotationMatrix(generator),
[rs.uniform(.35,0.6), rs.uniform(-.2, .2), z])
model = self.AddObject(obj, obj.split("/")[-1].split(".")[0] + str(i), pose)
body_idx = self.plant.GetBodyIndices(model)[0]
self.object_ids.append(body_idx)
self.object_poses.append(pose)
z+=0.1
def SetRandomPoses(self, station_context):
plant_context = self.GetSubsystemContext(self.plant, station_context)
for i in range(len(self.object_ids)):
self.plant.SetFreeBodyPose(plant_context, self.plant.get_body(self.object_ids[i]), self.object_poses[i])
def Finalize(self):
self.plant.Finalize()
self.controller_plant.Finalize()
builder = DiagramBuilder()
builder.AddSystem(self.plant)
builder.AddSystem(self.controller_plant)
builder.AddSystem(self.scene_graph)
builder.Connect(self.plant.get_geometry_poses_output_port(), self.scene_graph.get_source_pose_port(self.plant.get_source_id()))
builder.Connect(self.scene_graph.get_query_output_port(), self.plant.get_geometry_query_input_port())
builder.ExportOutput(self.scene_graph.get_query_output_port(), "query_object")
builder.ExportOutput(self.plant.get_geometry_poses_output_port(), "geometry_poses")
builder.ExportOutput(self.scene_graph.get_query_output_port(), "geometry_query")
builder.ExportOutput(self.plant.get_contact_results_output_port(),"contact_results")
builder.ExportOutput(self.plant.get_state_output_port(),"plant_continuous_state")
builder.BuildInto(self)
To test my code, I've been running the script below.
def test():
builder = DiagramBuilder()
station = DexterPPStation(1e-4, "drake/manipulation/models/final_dexter_description/urdf/dexter.urdf")
station.CreateBins("/opt/drake/share/drake/examples/manipulation_station/models/bin.sdf", RigidTransform(np.array([0.5,0,0])), RigidTransform(np.array([0,0.5,0])))
station.CreateRandomPickingObjects(1)
station.Finalize()
builder.AddSystem(station)
station_context = station.CreateDefaultContext()
station.SetRandomPoses(station_context)
MeshcatVisualizerCpp.AddToBuilder(builder, station.GetOutputPort("query_object"), meshcat)
diagram = builder.Build()
simulator = Simulator(diagram)
simulator.set_target_realtime_rate(1.0)
simulator.AdvanceTo(0.1)
test()
I've tried to call the SetRandomPoses() function from inside my Finalize() method, but since I needed to pass in a context to the function, I wasn't sure what to do. I'm new to Drake, so any input would be greatly appreciated.
You've created a station_context and set it to the random poses, but then you don't use it anywhere. When you create the simulator, it is creating another Context (with the default values), which is getting published when you call AdvanceTo.
The solution here, I think, is to not create your own station_context, but do e.g.
simulator = Simulator(diagram)
diagram_context = simulator.get_mutable_context()
station_context = station.GetMyMutableContextFromRoot(diagram_context)
station.SetRandomPoses(station_context)
then you can call AdvanceTo.

Optimize output of a script by varying input parameters

I have a written a script that uses the code below and I would like to optimize rsi_high and rsi_low to get the best sharpe_ratio:
#
import numpy
import talib as ta
global rsi_high, rsi_low
rsi_high = 63
rsi_low = 41
def myTradingSystem(DATE, OPEN, HIGH, LOW, CLOSE, VOL, exposure, equity, settings):
''' This system uses trend following techniques to allocate capital into the desired equities'''
nMarkets = CLOSE.shape[1] # SHAPE OF NUMPY ARRAY
result, rsi_pos = numpy.apply_along_axis(rsicalc, axis=0, arr=CLOSE)
pos = numpy.asarray(rsi_pos, dtype=numpy.float64)
return pos, settings
def rsicalc(num):
# print rsi_high
try:
rsival = ta.RSI(numpy.array(num,dtype='f8'),timeperiod=14)
if rsival[14] > rsi_high: pos_rsi = 1
elif rsival[14] < rsi_low: pos_rsi = -1
else: pos_rsi = 0
except:
rsival = 0
pos_rsi = 0
return rsival, pos_rsi
def mySettings():
''' Define your trading system settings here '''
settings = {}
# Futures Contracts
settings['markets'] = ['CASH','F_AD', 'F_BO', 'F_BP', 'F_C', 'F_CC', 'F_CD',
'F_CL', 'F_CT', 'F_DX', 'F_EC', 'F_ED', 'F_ES', 'F_FC', 'F_FV', 'F_GC',
'F_HG', 'F_HO', 'F_JY', 'F_KC', 'F_LB', 'F_LC', 'F_LN', 'F_MD', 'F_MP',
'F_NG', 'F_NQ', 'F_NR', 'F_O', 'F_OJ', 'F_PA', 'F_PL', 'F_RB', 'F_RU',
'F_S', 'F_SB', 'F_SF', 'F_SI', 'F_SM', 'F_TU', 'F_TY', 'F_US', 'F_W',
'F_XX', 'F_YM']
settings['slippage'] = 0.05
settings['budget'] = 1000000
settings['beginInSample'] = '19900101'
settings['endInSample'] = '19931231'
settings['lookback'] = 504
return settings
# Evaluate trading system defined in current file.
if __name__ == '__main__':
import quantiacsToolbox
results = quantiacsToolbox.runts(__file__, plotEquity=False)
sharpe_ratio = results['stats']['sharpe']
I suspect that using something like scipy minimize function would do the trick, but I am having trouble understanding how to package my script so that it can be in a usable form.
I have tried putting everything in a function and then running all the code through a number of loops, each time incrementing values but there must be a more elegant way of doing this.
Apologies for posting all my code but I thought it would help if the responder wanted to reproduce my setup and for anyone who is new to quantiacs to see a real example who is faced with the same issue.
Thanks for your help in advance!

How to broadcast a complex class object in pyspark across the clusters

Following is the code where py_cpp_bind refers to a piece of code written in C++11 and then binded to python using boost-python (enabled pickling). In order to initialize the object it requires three arguments (filename, int, int). I wished to broadcast this object across the clusters, as this piece is required to perform a computation for each element.
However, on execution Apache Spark seems to complain with
Caused by: java.io.EOFException
at java.io.DataInputStream.readInt(DataInputStream.java:392)
at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRDD.scala:166)
... 15 more
Code:
from pyspark.serializers import BatchedSerializer, PickleSerializer
from pyspark import SparkContext, SparkConf
import py_cpp_bind
def populate_NL(n, tk2):
tk = [list(tk2[0]), tk2[1]]
res = mscore.score(tk[1], tk[0])
return res
def main(n, sc):
mscore = py_cpp_bind.score()
# following line constructs the object from the given arguments
print mscore.init("data/earthquake.csv", n, 4000)
broadcastVar = sc.broadcast(mdl)
C = [((0,), [1])]
C = sc.parallelize(C).flatMap(lambda X : populate(n, X))
print(C.collect())
if __name__ == "__main__":
conf = SparkConf().setMaster("local[*]")
conf = conf.setAppName("TEST")
sc = SparkContext(conf = conf, serializer=PickleSerializer())
n = 5
main(n, sc)

Categories