I have a python script that do deepfake stuff, and I need to execute that script into a UI program, I've tried to write it as a program, and have some issues
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Windows.Forms;
using IronPython.Hosting;
using Microsoft.Scripting.Hosting;
namespace DeeepSliz
{
public static class Program
{
[STAThread]
static void Main()
{
Application.SetHighDpiMode(HighDpiMode.SystemAware);
Application.EnableVisualStyles();
Application.SetCompatibleTextRenderingDefault(false);
Application.Run(new Form1());
}
public static void Swagger()
{
var engine = Python.CreateEngine();
var script = #"C:\sliz\demo.py";
var sourse = engine.CreateScriptSourceFromFile(script);
var argv = new List<string>();
argv.Add("");
argv.Add("--lol");
engine.GetSysModule().SetVariable("argv", argv);
///
var eIO = engine.Runtime.IO;
///
var errors = new MemoryStream();
eIO.SetErrorOutput(errors, Encoding.Default);
var result = new MemoryStream();
eIO.SetOutput(errors, Encoding.Default);
///
var scope = engine.CreateScope();
sourse.Execute(scope);
///
string str(byte[] x) => Encoding.Default.GetString(x);
Console.WriteLine("ERRORS:");
Console.WriteLine(str(errors.ToArray()));
Console.WriteLine();
Console.WriteLine("Results;");
Console.WriteLine(str(result.ToArray()));
}
}
}
This is how it looks like, and i wrote a button to execute that code
private void button3_Click(object sender, EventArgs e)
{
Program.Swagger();
}
and when i start the program, and click "button3" this happend, and tihs
and ofc the python script (that works normaly)
import matplotlib
matplotlib.use('Agg')
import os, sys
import yaml
import eel
from argparse import ArgumentParser
from tqdm import tqdm
import imageio
import numpy as np
from skimage.transform import resize
from skimage import img_as_ubyte
import torch
from sync_batchnorm import DataParallelWithCallback
from modules.generator import OcclusionAwareGenerator
from modules.keypoint_detector import KPDetector
from animate import normalize_kp
from scipy.spatial import ConvexHull
'''eel.init('web')
eel.start('main.html', size=(700, 700))'''
if sys.version_info[0] < 3:
raise Exception("You must use Python 3 or higher. Recommended version is Python 3.7")
def load_checkpoints(config_path, checkpoint_path, cpu=False):
with open(config_path) as f:
config = yaml.load(f)
generator = OcclusionAwareGenerator(**config['model_params']['generator_params'],
**config['model_params']['common_params'])
if not cpu:
generator.cuda()
kp_detector = KPDetector(**config['model_params']['kp_detector_params'],
**config['model_params']['common_params'])
if not cpu:
kp_detector.cuda()
if cpu:
checkpoint = torch.load(checkpoint_path, map_location=torch.device('cpu'))
else:
checkpoint = torch.load(checkpoint_path)
generator.load_state_dict(checkpoint['generator'])
kp_detector.load_state_dict(checkpoint['kp_detector'])
if not cpu:
generator = DataParallelWithCallback(generator)
kp_detector = DataParallelWithCallback(kp_detector)
generator.eval()
kp_detector.eval()
return generator, kp_detector
def make_animation(source_image, driving_video, generator, kp_detector, relative=True,
adapt_movement_scale=True, cpu=False):
with torch.no_grad():
predictions = []
source = torch.tensor(source_image[np.newaxis].astype(np.float32)).permute(0, 3, 1, 2)
if not cpu:
source = source.cuda()
driving = torch.tensor(np.array(driving_video)[np.newaxis].astype(np.float32)).permute(0,
4, 1, 2, 3)
kp_source = kp_detector(source)
kp_driving_initial = kp_detector(driving[:, :, 0])
for frame_idx in tqdm(range(driving.shape[2])):
driving_frame = driving[:, :, frame_idx]
if not cpu:
driving_frame = driving_frame.cuda()
kp_driving = kp_detector(driving_frame)
kp_norm = normalize_kp(kp_source=kp_source, kp_driving=kp_driving,
kp_driving_initial=kp_driving_initial,
use_relative_movement=relative,
use_relative_jacobian=relative,
adapt_movement_scale=adapt_movement_scale)
out = generator(source, kp_source=kp_source, kp_driving=kp_norm)
predictions.append(np.transpose(out['prediction'].data.cpu().numpy(), [0, 2, 3, 1])
[0])
return predictions
def find_best_frame(source, driving, cpu=False):
import face_alignment
def normalize_kp(kp):
kp = kp - kp.mean(axis=0, keepdims=True)
area = ConvexHull(kp[:, :2]).volume
area = np.sqrt(area)
kp[:, :2] = kp[:, :2] / area
return kp
fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, flip_input=True,
device='cpu' if cpu else 'cuda')
kp_source = fa.get_landmarks(255 * source)[0]
kp_source = normalize_kp(kp_source)
norm = float('inf')
frame_num = 0
for i, image in tqdm(enumerate(driving)):
kp_driving = fa.get_landmarks(255 * image)[0]
kp_driving = normalize_kp(kp_driving)
new_norm = (np.abs(kp_source - kp_driving) ** 2).sum()
if new_norm < norm:
norm = new_norm
frame_num = i
return frame_num
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument("--config", required=True, help="path to config")
parser.add_argument("--checkpoint", default='vox-cpk.pth.tar', help="path to checkpoint to
restore")
parser.add_argument("--source_image", default='sup-mat/source.png', help="path to source
image")
parser.add_argument("--driving_video", default='sup-mat/source.png', help="path to driving
video")
parser.add_argument("--result_video", default='result.mp4', help="path to output")
parser.add_argument("--relative", dest="relative", action="store_true", help="use relative
or absolute keypoint coordinates")
parser.add_argument("--adapt_scale", dest="adapt_scale", action="store_true", help="adapt
movement scale based on convex hull of keypoints")
parser.add_argument("--find_best_frame", dest="find_best_frame", action="store_true",
help="Generate from the frame that is the most aligned with source. (Only
for faces, requires face_alignment lib)")
parser.add_argument("--best_frame", dest="best_frame", type=int, default=None,
help="Set frame to start from.")
parser.add_argument("--cpu", dest="cpu", action="store_true", help="cpu mode.")
parser.set_defaults(relative=False)
parser.set_defaults(adapt_scale=False)
opt = parser.parse_args()
source_image = imageio.imread(opt.source_image)
reader = imageio.get_reader(opt.driving_video)
fps = reader.get_meta_data()['fps']
driving_video = []
try:
for im in reader:
driving_video.append(im)
except RuntimeError:
pass
reader.close()
source_image = resize(source_image, (256, 256))[..., :3]
driving_video = [resize(frame, (256, 256))[..., :3] for frame in driving_video]
generator, kp_detector = load_checkpoints(config_path=opt.config,
checkpoint_path=opt.checkpoint, cpu=opt.cpu)
if opt.find_best_frame or opt.best_frame is not None:
i = opt.best_frame if opt.best_frame is not None else find_best_frame(source_image,
driving_video, cpu=opt.cpu)
print ("Best frame: " + str(i))
driving_forward = driving_video[i:]
driving_backward = driving_video[:(i+1)][::-1]
predictions_forward = make_animation(source_image, driving_forward, generator,
kp_detector, relative=opt.relative, adapt_movement_scale=opt.adapt_scale, cpu=opt.cpu)
predictions_backward = make_animation(source_image, driving_backward, generator,
kp_detector, relative=opt.relative, adapt_movement_scale=opt.adapt_scale, cpu=opt.cpu)
predictions = predictions_backward[::-1] + predictions_forward[1:]
else:
predictions = make_animation(source_image, driving_video, generator, kp_detector,
relative=opt.relative, adapt_movement_scale=opt.adapt_scale, cpu=opt.cpu)
imageio.mimsave(opt.result_video, [img_as_ubyte(frame) for frame in predictions], fps=fps)
idk how that fix, pls help.
Caveat: I haven't done this myself, my answer is entirely from Googling.
The error is saying that only one keyword argument is allowed.
This leads me to think that
OcclusionAwareGenerator(**config['model_params']['generator_params'], **config['model_params']['common_params'])
and
KPDetector(**config['model_params']['kp_detector_params'], **config['model_params']['common_params'])
are not valid in IronPython. You may need to merge the dictionaries and pass the combined one in with the ** syntax.
Using the second case as the basis for example purposes, you can use the copy package to create a new dictionary and then populate it with the values of the two existing ones:
import copy
params = copy.deepcopy(config['model_params']['kp_detector_params'])
params.update(config['model_params']['common_params'])
KPDetector(**params)
The deep copy is usually the safest, but copy.copy is an option as well and (based on a few assumptions) will likely not cause any issues.
Another, possibly simpler, option is to use collections.ChainMap to provide a combined view of the two dictionaries:
from collections import ChainMap
KPDetector(**ChainMap(config['model_params']['kp_detector_params'], config['model_params']['common_params']))
Related
I am working on Udacity's self-driving car simulator. I am facing a problem in this when I run the drive.py file with my model as argument model.h5 nothing happens in the simulator.
The model has been trained completely without any errors but still, there is no response from the simulator.
Here is the drive.py python code and a link to the video to show what is actually happening
drive.py
import argparse
import base64
from datetime import datetime
import os
import shutil
import numpy as np
import socketio
import eventlet
import eventlet.wsgi
from PIL import Image
from flask import Flask
from io import BytesIO
from keras.models import load_model
import h5py
from keras import __version__ as keras_version
sio = socketio.Server()
app = Flask(__name__)
model = None
prev_image_array = None
class SimplePIController:
def __init__(self, Kp, Ki):
self.Kp = Kp
self.Ki = Ki
self.set_point = 0.
self.error = 0.
self.integral = 0.
def set_desired(self, desired):
self.set_point = desired
def update(self, measurement):
# proportional error
self.error = self.set_point - measurement
# integral error
self.integral += self.error
return self.Kp * self.error + self.Ki * self.integral
controller = SimplePIController(0.1, 0.002)
set_speed = 30
controller.set_desired(set_speed)
def crop_image(img, img_height=75, img_width=200):
height = img.shape[0]
width = img.shape[1]
y_start = 60
#x_start = int(width/2)-int(img_width/2)
return img[y_start:y_start+img_height, 0:width ]#x_start:x_start+img_width]
#sio.on('telemetry')
def telemetry(sid, data):
if data:
# The current steering angle of the car
steering_angle = data["steering_angle"]
# The current throttle of the car
throttle = data["throttle"]
# The current speed of the car
speed = data["speed"]
# The current image from the center camera of the car
imgString = data["image"]
image = Image.open(BytesIO(base64.b64decode(imgString)))
image_array = np.asarray(image)
image_array = crop_image(image_array)
steering_angle = float(model.predict(image_array[None, :, :, :], batch_size=1))
throttle = controller.update(float(speed))
print(steering_angle, throttle)
send_control(steering_angle, throttle)
# save frame
if args.image_folder != '':
timestamp = datetime.utcnow().strftime('%Y_%m_%d_%H_%M_%S_%f')[:-3]
image_filename = os.path.join(args.image_folder, timestamp)
image.save('{}.jpg'.format(image_filename))
else:
# NOTE: DON'T EDIT THIS.
sio.emit('manual', data={}, skip_sid=True)
#sio.on('connect')
def connect(sid, environ):
print("connect ", sid)
send_control(0, 0)
def send_control(steering_angle, throttle):
sio.emit(
"steer",
data={
'steering_angle': steering_angle.__str__(),
'throttle': throttle.__str__()
},
skip_sid=True)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Remote Driving')
parser.add_argument(
'model',
type=str,
help='Path to model h5 file. Model should be on the same path.'
)
parser.add_argument(
'image_folder',
type=str,
nargs='?',
default='',
help='Path to image folder. This is where the images from the run will be saved.'
)
args = parser.parse_args()
# check that model Keras version is same as local Keras version
f = h5py.File(args.model, mode='r')
model_version = f.attrs.get('keras_version')
keras_version = str(keras_version).encode('utf8')
if model_version != keras_version:
print('You are using Keras version ', keras_version,
', but the model was built using ', model_version)
model = load_model(args.model)
if args.image_folder != '':
print("Creating image folder at {}".format(args.image_folder))
if not os.path.exists(args.image_folder):
os.makedirs(args.image_folder)
else:
shutil.rmtree(args.image_folder)
os.makedirs(args.image_folder)
print("RECORDING THIS RUN ...")
else:
print("NOT RECORDING THIS RUN ...")
# wrap Flask application with engineio's middleware
app = socketio.Middleware(sio, app)
# deploy as an eventlet WSGI server
eventlet.wsgi.server(eventlet.listen(('', 4567)), app)
problem video link
https://youtu.be/nP8WH8pM29Q
This is due to the socketio version. Use 4.2.1, that should fix your problem
I have a question about my implementation of multi-threading.
Am I correctly implementing threading to perform SQL insertion on a separate thread while inference is being done in the main thread? The program runs, but I am occasionally getting errors that I am attempting to use too many connections. I think that I am exhausting the connection pool by creating more threads every iteration of the loop before the initial threads have ended. The program is behaving as if somehow I have managed to make my multi threaded program a single-threaded program because whenever detection occurs I losing FPS down from 10 to somewhere between 4 and 6 fps.
This code is based of a demo repository that I am working with to learn to implement tracking and storing the inference in a database.
Server MySQL Version: 8.0.26-0ubuntu0.20.04.2 for Linux on x86_64 ((Ubuntu))
Client MySQL Version: Ver 14.14 Distrib 5.7.35, for Linux (aarch64) using EditLine wrapper
SHOW CREATE TABLE:
| nano_detections | CREATE TABLE `nano_detections` (
`PRIMARY_KEY` int NOT NULL AUTO_INCREMENT,
`DATE` date DEFAULT NULL,
`TIME` time DEFAULT NULL,
`CONFIDENCE` float DEFAULT NULL,
`IDENTITY` int DEFAULT NULL,
`X1` int DEFAULT NULL,
`Y1` int DEFAULT NULL,
`X2` int DEFAULT NULL,
`Y2` int DEFAULT NULL,
`IMG` longblob,
PRIMARY KEY (`PRIMARY_KEY`)
) ENGINE=InnoDB AUTO_INCREMENT=3613 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci |
+-----------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
1 row in set (0.10 sec)
"""trt_yolo.py
This script demonstrates how to do real-time object detection with
TensorRT optimized YOLO engine.
"""
import os
import time
import argparse
import cv2
import pycuda.autoinit # This is needed for initializing CUDA driver
import mysql.connector
from mysql.connector import pooling
import pymysql
from DBUtils.PooledDB import PooledDB
from datetime import date
#import local classes and their functions
from utils.yolo_classes import get_cls_dict
from utils.camera import add_camera_args, Camera
from utils.display import open_window, set_display, show_fps
from utils.visualization import BBoxVisualization
from utils.yolo_with_plugins_tracklite_mysql import TrtYOLO
from tracklite.utils.parser import get_config
from threading import Thread, Lock
WINDOW_NAME = 'TrtYOLODemo'
mySQLConnectionPool = PooledDB(creator = pymysql,
host = '192.168.1.131',
user = '*****',
password = '*****',
database = 'bird_detections',
autocommit = True,
charset = 'utf8mb4',
#cursorclass = pymysql.cursors.DictCursor,
blocking = False,
maxconnections = 50,
)
'''
database = mysql.connector.connect(
host='192.168.1.131',
user='*****',
password='*******',
database='bird_detections'
)
cursor = database.cursor()'''
sql_op = 'INSERT INTO nano_detections (DATE, TIME, CONFIDENCE, IDENTITY, X1, Y1, X2, Y2, IMG) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)'
mutex = Lock()
WINDOW_NAME = 'TrtYOLODemo'
def parse_args():
"""Parse input arguments."""
desc = ('Capture and display live camera video, while doing '
'real-time object detection with TensorRT optimized '
'YOLO model on Jetson')
parser = argparse.ArgumentParser(description=desc)
parser = add_camera_args(parser)
parser.add_argument(
'-c', '--category_num', type=int, default=80,
help='number of object categories [80]')
parser.add_argument(
'-m', '--model', type=str, required=True,
help=('[yolov3-tiny|yolov3|yolov3-spp|yolov4-tiny|yolov4|'
'yolov4-csp|yolov4x-mish]-[{dimension}], where '
'{dimension} could be either a single number (e.g. '
'288, 416, 608) or 2 numbers, WxH (e.g. 416x256)'))
parser.add_argument(
'-l', '--letter_box', action='store_true',
help='inference with letterboxed image [False]')
args = parser.parse_args()
return args
def loop_and_detect(cam, trt_yolo, conf_th, vis):
"""Continuously capture images from camera and do object detection.
# Arguments
cam: the camera instance (video source).
trt_yolo: the TRT YOLO object detector instance.
conf_th: confidence/score threshold for object detection.
vis: for visualization.
"""
today = date.today()
#full_screen is set to false by default
full_scrn = False
#fps is set at 0 by default
fps = 0.0
#create time variable for measuring the frames per second in real time
tic = time.time()
#while loop to perform inference
while True:
#mutex.acquire()
today_formatted = today.strftime("%y-%m-%d")
time_formatted = time.strftime("%H:%M:%S")
#determine if window is closed or not ????
#break the loop if window is closed
if cv2.getWindowProperty(WINDOW_NAME, 0) < 0:
break
#create img object from a reading of the camera frame
img = cam.read()
image_blob = cv2.imencode('.jpg', img)[1].tostring()
#break loop if the camera frame is none
if img is None:
break
#create bounding box coordinate, detection confidence, and class id from the detect function of the trt_yolo object.
img, outputs, scores = trt_yolo.detect(img, conf_th)
#mutex.release()
if len(outputs) > 0:
t = Thread(target=mysql_insert, args=(outputs, scores, today_formatted, time_formatted, image_blob))
t.start()
#mutex.acquire()
#img = vis.draw_bboxes(img, boxes, confs, clss)
img = show_fps(img, fps)
cv2.imshow(WINDOW_NAME, img)
toc = time.time()
curr_fps = 1.0 / (toc - tic)
# calculate an exponentially decaying average of fps number
fps = curr_fps if fps == 0.0 else (fps*0.95 + curr_fps*0.05)
tic = toc
key = cv2.waitKey(1)
if key == 27: # ESC key: quit program
break
elif key == ord('F') or key == ord('f'): # Toggle fullscreen
full_scrn = not full_scrn
set_display(WINDOW_NAME, full_scrn)
#mutex.release()
def mysql_insert( bbox_xyxy, scores, today, time, img_blob):
mutex.acquire()
mySQLConnection = mySQLConnectionPool.connection()
mySQLCursor = mySQLConnection.cursor()
copy_bbox = bbox_xyxy
copy_scores = scores
copy_today= today
copy_time = time
copy_img_blob = img_blob
mutex.release()
if len(copy_bbox) > 0:
bbox_xyxy = copy_bbox[:, :4]
identities = copy_bbox[:, -1]
for box, identity, score in zip(bbox_xyxy, identities, copy_scores):
if score > 0 :
x1 = str(box[0])
y1 = str(box[1])
x2 = str(box[2])
y2 = str(box[3])
identity = str(identity)
confidence = str(score)
mySQLCursor.execute(sql_op, (copy_today, copy_time, confidence, identity, x1, y1, x2, y2, copy_img_blob))
mySQLCursor.close()
mySQLConnection.close()
return
#cursor.close()
#conn.close()
def test_insert():
sql_insert = 'INSERT INTO test_table(TEST_STRING) VALUES (%s)'
string_to_insert = 'TEST'
cursor.execute(sql_insert, (string_to_insert))
def main():
cfg_file = "./tracklite/configs/deep_sort.yaml"
cfg = get_config()
cfg = cfg.merge_from_file(cfg_file)
#parse arguments
args = parse_args()
#raise errors for lack of arguments, such as the category number and the model file
if args.category_num <= 0:
raise SystemExit('ERROR: bad category_num (%d)!' % args.category_num)
if not os.path.isfile('yolo/%s.trt' % args.model):
raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % args.model)
#camera object instantiated with arguments
cam = Camera(args)
#raise error if cameras is not opened
if not cam.isOpened():
raise SystemExit('ERROR: failed to open camera!')
#create list of classes to be detected
cls_dict = get_cls_dict(args.category_num)
#instantiate vis object with class_dict passed as an argument
#BBOXVisualization contains code to draw boxes and assign colors to each class
vis = BBoxVisualization(cls_dict)
#instantiate the TtrYOLO object based on the arguments given in the command to start trt_yolo.py
trt_yolo = TrtYOLO(args.model, cfg, args.category_num, args.letter_box)
#open a window based on camera height and width
open_window(
WINDOW_NAME, 'Camera TensorRT YOLO Demo',
cam.img_width, cam.img_height)
#loop and perform detections
loop_and_detect(cam, trt_yolo, conf_th=0.3, vis=vis)
cam.release()
cv2.destroyAllWindows()
if __name__ == '__main__':
main()
#Decoding Image From SQL:
#nparr = np.fromstring(STRING_FROM_DATABASE, np.uint8)
#img = cv2.imdecode(nparr, cv2.CV_LOAD_IMAGE_COLOR)
While trying the project of car behavioral cloning, I successfully trained my model and generated the file model-001.h5.
now in order to test the model, it must successfully drive the vehicle in Udacity self-driving simulator this is done by running the drive.py and the generated model-001.h5 file
python drive.py model-001.h5
however, I receive this error.
x and y must have the same dtype, got tf.uint8 != tf.float32
x and y must have the same dtype, got tf.uint8 != tf.float32
x and y must have the same dtype, got tf.uint8 != tf.float32
Error message
I don't know if this is error is from drive.py or model.py or utils.py
but here is the drive.py code here:
import argparse
import base64
from datetime import datetime
import os
import shutil
import numpy as np
import socketio
import eventlet
import eventlet.wsgi
from PIL import Image
from flask import Flask
from io import BytesIO
from tensorflow.keras.models import load_model
import utils
sio = socketio.Server()
app = Flask(__name__)
model = None
prev_image_array = None
MAX_SPEED = 25.0
MIN_SPEED = 10.0
speed_limit = MAX_SPEED
#sio.on('telemetry')
def telemetry(sid, data):
if data:
# The current steering angle of the car
steering_angle = float(data["steering_angle"])
# The current throttle of the car
throttle = float(data["throttle"])
# The current speed of the car
speed = float(data["speed"])
# The current image from the center camera of the car
image = Image.open(BytesIO(base64.b64decode(data["image"])))
# save frame
if args.image_folder != '':
timestamp = datetime.utcnow().strftime('%Y_%m_%d_%H_%M_%S_%f')[:-3]
image_filename = os.path.join(args.image_folder, timestamp)
image.save('{}.jpg'.format(image_filename))
try:
image = np.asarray(image) # from PIL image to numpy array
image = utils.preprocess(image) # apply the preprocessing
image = np.array([image]) # the model expects 4D array
# predict the steering angle for the image
steering_angle = float(model.predict(image, batch_size=1))
# lower the throttle as the speed increases
# if the speed is above the current speed limit, we are on a downhill.
# make sure we slow down first and then go back to the original max speed.
global speed_limit
if speed > speed_limit:
speed_limit = MIN_SPEED # slow down
else:
speed_limit = MAX_SPEED
throttle = 1.0 - steering_angle**2 - (speed/speed_limit)**2
print('{} {} {}'.format(steering_angle, throttle, speed))
send_control(steering_angle, throttle)
except Exception as e:
print(e)
else:
# NOTE: DON'T EDIT THIS.
sio.emit('manual', data={}, skip_sid=True)
#sio.on('connect')
def connect(sid, environ):
print("connect ", sid)
send_control(0, 0)
def send_control(steering_angle, throttle):
sio.emit(
"steer",
data={
'steering_angle': steering_angle.__str__(),
'throttle': throttle.__str__()
},
skip_sid=True)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Remote Driving')
parser.add_argument(
'model',
type=str,
help='Path to model h5 file. Model should be on the same path.'
)
parser.add_argument(
'image_folder',
type=str,
nargs='?',
default='',
help='Path to image folder. This is where the images from the run will be saved.'
)
args = parser.parse_args()
model = load_model(args.model)
if args.image_folder != '':
print("Creating image folder at {}".format(args.image_folder))
if not os.path.exists(args.image_folder):
os.makedirs(args.image_folder)
else:
shutil.rmtree(args.image_folder)
os.makedirs(args.image_folder)
print("RECORDING THIS RUN ...")
else:
print("NOT RECORDING THIS RUN ...")
# wrap Flask application with engineio's middleware
app = socketio.Middleware(sio, app)
# deploy as an eventlet WSGI server
eventlet.wsgi.server(eventlet.listen(('', 4567)), app)
even though the telemetry(sid, data) has an if statement to change the incoming data to float format
It'd be helpful to see on what line you get an error.
It's not obvious to me where the "if statement to change the incoming data to float format" that you refer to is. I see that the model is called on whatever image is loaded, in whatever datatype it comes from. I predict that the image is stored as uint8, the most efficient storage for a (0,255) colour representation.
So try:
image = np.asarray(image, dtype=np.float32)
To create an array of the correct datatype for your model, which I am presuming is tf.float32.
I'm fighting with TensorRT (TensorRT 4 for python right now) since several weeks. I passed a lot of problems to get TensorRT running. The example code from NVIDIA works well for me :
TensorRT MNIST example
Now, i created my own network in tensorflow (a very simple one) for upscaling images, let's say (in HWC) 320x240x3 into 640x480x3 .The usual way by creating a frozen-graph and running an inferencer just based on Tensorflow gave me expected results but not by using TensorRT.
I have a strange feeling about that i made something wrong by feeding the images into the GPU-memory (This would be probably an issue about pycuda and/or TensorRT).
The worst case scenario would be that TensorRT destroys my network by the optimization process.
I hope someone has just a little idea for saving my life.
This is my Tensorflow-model (i just wrapped the functions):
net = conv2d(input,
64,
k_size=3,
activation=tf.nn.relu,
name='conv1')
net = deconv2d(net,
3,
k_size=5,
activation=tf.tanh,
stride=self.params.resize_factor,
scale=self.params.resize_factor,
name='deconv')
This is the important snippet of my inferencer:
import tensorrt as trt
import uff
from tensorrt.parsers import uffparser
import pycuda.driver as cuda
import numpy as np
...
def _init_infer(self, uff_model):
g_logger = trt.infer.ConsoleLogger(trt.infer.LogSeverity.ERROR)
parser = uffparser.create_uff_parser()
parser.register_input(self.input_node, (self.channels, self.height, self.width), 0)
parser.register_output(self.output_node)
self.engine = trt.utils.uff_to_trt_engine(g_logger, uff_model, parser, self.max_batch_size,
self.max_workspace_size)
parser.destroy()
self.runtime = trt.infer.create_infer_runtime(g_logger)
self.context = self.engine.create_execution_context()
self.output = np.empty(self.output_size, dtype=self.dtype)
# create CUDA stream
self.stream = cuda.Stream()
# allocate device memory
self.d_input = cuda.mem_alloc(self.channels * self.max_batch_size * self.width *
self.height * self.output.dtype.itemsize)
self.d_output = cuda.mem_alloc(self.output_size * self.output.dtype.itemsize)
self.bindings = [int(self.d_input), int(self.d_output)]
def infer(self, input_batch, batch_size=1):
# transfer input data to device
cuda.memcpy_htod_async(self.d_input, input_batch, self.stream)
# execute model
self.context.enqueue(batch_size, self.bindings, self.stream.handle, None)
# transfer predictions back
cuda.memcpy_dtoh_async(self.output, self.d_output, self.stream)
# synchronize threads
self.stream.synchronize()
return self.output
And the executable snippet:
...
# create trt inferencer
trt_inferencer = TensorRTInferencer(params=params)
img = [misc.imread('./test_images/lion.png')]
img[0] = normalize(img[0])
img = img[0]
# inferencing method
result = trt_inferencer.infer(img)
result = inormalize(result, dtype=np.uint8)
result = result.reshape(1, params.height * 2, params.width * 2, 3)
...
And the weird result by comparison :(
upscaled lion TensorRT, Tensorflow, Original
I got it now, finally. The problem was a wrong dimension and order of the input images and output. And for everyone who run into the same problem, this is the adopted executable snippet, dependent on my initialization:
...
# create trt inferencer
trt_inferencer = TensorRTInferencer(params=params)
img = [misc.imread('./test_images/lion.png')]
img[0] = normalize(img[0])
img = img[0]
img = np.transpose(img, (2, 0, 1))
img = img.ravel()
# inferencing method
result = trt_inferencer.infer(img)
result = inormalize(result, dtype=np.uint8)
result = np.reshape(result, newshape=[3, params.height * 2, params.width * 2])
result = np.transpose(result, (1, 2, 0))
...
I am using Caffe to do image classification, can I am using MAC OS X, Pyhton.
Right now I know how to classify a list of images using Caffe with Spark python, but if I want to make it faster, I want to use Spark.
Therefore, I tried to apply the image classification on each element of an RDD, the RDD created from a list of image_path. However, Spark does not allow me to do so.
Here is my code:
This is the code for image classification:
# display image name, class number, predicted label
def classify_image(image_path, transformer, net):
image = caffe.io.load_image(image_path)
transformed_image = transformer.preprocess('data', image)
net.blobs['data'].data[...] = transformed_image
output = net.forward()
output_prob = output['prob'][0]
pred = output_prob.argmax()
labels_file = caffe_root + 'data/ilsvrc12/synset_words.txt'
labels = np.loadtxt(labels_file, str, delimiter='\t')
lb = labels[pred]
image_name = image_path.split(images_folder_path)[1]
result_str = 'image: '+image_name+' prediction: '+str(pred)+' label: '+lb
return result_str
This this the code generates Caffe parameters and apply the classify_image method on each element of the RDD:
def main():
sys.path.insert(0, caffe_root + 'python')
caffe.set_mode_cpu()
model_def = caffe_root + 'models/bvlc_reference_caffenet/deploy.prototxt'
model_weights = caffe_root + 'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel'
net = caffe.Net(model_def,
model_weights,
caffe.TEST)
mu = np.load(caffe_root + 'python/caffe/imagenet/ilsvrc_2012_mean.npy')
mu = mu.mean(1).mean(1)
transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
transformer.set_transpose('data', (2,0,1))
transformer.set_mean('data', mu)
transformer.set_raw_scale('data', 255)
transformer.set_channel_swap('data', (2,1,0))
net.blobs['data'].reshape(50,
3,
227, 227)
image_list= []
for image_path in glob.glob(images_folder_path+'*.jpg'):
image_list.append(image_path)
images_rdd = sc.parallelize(image_list)
transformer_bc = sc.broadcast(transformer)
net_bc = sc.broadcast(net)
image_predictions = images_rdd.map(lambda image_path: classify_image(image_path, transformer_bc, net_bc))
print image_predictions
if __name__ == '__main__':
main()
As you can see, here I tried to broadcast the caffe parameters, transformer_bc = sc.broadcast(transformer), net_bc = sc.broadcast(net)
The error is:
RuntimeError: Pickling of "caffe._caffe.Net" instances is not enabled
Before I am doing the broadcast, the error was :
Driver stacktrace.... Caused by: org.apache.spark.api.python.PythonException: Traceback (most recent call last):....
So, do you know, is there any way I can classify images using Caffe and Spark but also take advantage of Spark?
When you work with complex, non-native objects initialization has to moved directly to the workers for example with singleton module:
net_builder.py:
import cafe
net = None
def build_net(*args, **kwargs):
... # Initialize net here
return net
def get_net(*args, **kwargs):
global net
if net is None:
net = build_net(*args, **kwargs)
return net
main.py:
import net_builder
sc.addPyFile("net_builder.py")
def classify_image(image_path, transformer, *args, **kwargs):
net = net_builder.get_net(*args, **kwargs)
It means you'll have to distribute all required files as well. It can be done either manually or using SparkFiles mechanism.
On a side note you should take a look at the SparkNet package.