Sending ROS messages through a cv2 python script - python

I am trying to implement a robotic solution, where the user will click on a point through a camera feed of an area and the mobile 4 wheel robot will path its way to that location.
I have created the part of translating video pixel coordinates to ground coordinates through the use of the homograph transformation and would like to implement the part of sending the ground coordinates to RViz.
The part of calculating the ground coordinates is shown bellow:
global h
font = cv2.FONT_HERSHEY_SIMPLEX #Loading neccessary fonts
with open("save.h", "rb") as f:
h = load(f)
print "Homographic matrix loaded successfully."
def draw_circle2(event,x,y,flags,param):
global h,mouseX,mouseY, num_of_points, complete,np_coord_on_screen,np_coord_on_ground, myImage, emptyFrame, coord_on_screen, coord_on_ground
if event == cv2.EVENT_LBUTTONDBLCLK:
a = np.array([[x, y]], dtype='float32')
a = np.array([a])
pointOut = cv2.perspectiveTransform(a, h) #Calculation of new point location
loc_pointOut = tuple(pointOut)
pointOut=(loc_pointOut[0][0][0],loc_pointOut[0][0][1]) #Point on ground
print "Current Location: "+str(pointOut)
cv2.imshow('Video',emptyFrame) #Showing emptyFrame
cv2.circle(myImage,(x,y),4,(255,0,0),-1) #Draw a circle on myImage
cv2.putText(myImage,(str((round(pointOut[0],2)))+","+str(round(pointOut[1],2))), (x-5,y-15),font, 0.4,(0,255,0)) #Draw the text
cv2.imshow('Video',myImage) #Showing resulting myImage
myImage = emptyFrame.copy()
# Initial code
# Showing first frame on screen
raw_input("Press any key...")
clear_all()
cv2.namedWindow('Video') #Naming the window to use.
cv2.setMouseCallback('Video',draw_circle2) #Set mouse callBack function.
ret, frame = cap.read() #Get image from camera
if (ret): #If frame has image, show the image on screen
global myImage, emptyFrame
myImage = frame.copy()
emptyFrame = frame.copy()
cv2.imshow('Video',myImage) # Show the image on screen
while True: # making a loop
if cv2.waitKey(1) & 0xFF == ord('q'):
break
if (cv2.waitKey(1) & 0xFF == ord('c')):
#Deleting points from image
cv2.imshow('Video',emptyFrame) #Show the image again, deleting all graphical overlays like text and shapes
coord_on_screen = [] #Resetting coordinate lists
coord_on_ground=[] #Resetting coordinate lists
if (cv2.waitKey(1) & 0xFF == ord('s')):
cap.release()
cv2.destroyAllWindows()
init()
# When everything done, release the capture
cap.release()
cv2.destroyAllWindows()
So, pointOut is the point on the ground (example: (2,4) m)
I need some directions on how to implement the creation of a node and the transmission of the pointOut Marker message to a ROS topic in my code.
My configuration is ROS Kinetic Kame, Python 2.7

Related

openCV - camera view window has different size of camera resolution

I have a 720p resolution camera and I'm trying to open the camera in a view window in openCV. here's the code:
while(True):
ret, frame = vid.read()
if render_poly:
frame = cv2.polylines(frame, [pts],
isClosed, color, thickness)
cv2.imshow('window', frame)
key = cv2.waitKey(1) & 0xFF
if key == ord('q'):
break
elif key == ord('p'):
render_poly = not render_poly
Before that, I tried to ajust the view window with this code (720p resolution):
vid = cv2.VideoCapture(0)
vid.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
vid.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
What I expected is 1280x720 view window size, but for some reason I got 1600x900 which caused incorrect positioning of the polygon I'm trying to render, and to solve that I needed to adjust its coordinates by multiplying it with the scale of 1280/1600 in width and 720/900 in height.
Can I skip that proccess (which will be more in the future project) and just to render the camera view on a window with its same resolution size? is it a good idea?

Making a face detection script with python inside blender

I'm trying to make a python script inside blender that uses the mediapipe module to get the coordinates of the landmarks of my hands, face, and body and then i want to move an object to thosse specific coordinates
I got the coordinate part done but i'm having trouble getting it to work inside blender
import mediapipe as mp
import inspect
import cv2
mp_drawing = mp.solutions.drawing_utils
mp_holistic = mp.solutions.holistic
cap = cv2.VideoCapture(0)
# Initiate holistic model
with mp_holistic.Holistic(min_detection_confidence=0, min_tracking_confidence=0) as holistic:
while cap.isOpened():
ret, frame = cap.read()
# Recolor Feed
image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# Make Detections
results = holistic.process(image)
# print(results.face_landmarks)
# face_landmarks, pose_landmarks, left_hand_landmarks, right_hand_landmarks
# Recolor image back to BGR for rendering
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
if bool(results.right_hand_landmarks):
print(results.right_hand_landmarks)
for data_point in results.right_hand_landmarks.landmark:
print("X = " + str(data_point.x))
print("Y = " + str(data_point.y))
print("Z = " + str(data_point.z))
# Center coordinates
h, w, c = image.shape
center_coordinates = (int(w * data_point.x), int(h * data_point.y))
# Radius of circle
radius = 5
# Blue color in BGR
color = (int(-1000 * data_point.z), int(-1000 * data_point.z), int(-1000 * data_point.z))
# Line thickness of 2 px
thickness = 5
# Using cv2.circle() method
# Draw a circle with blue line borders of thickness of 2 px
image = cv2.circle(image, center_coordinates, radius, color, thickness)
cv2.imshow('Raw Webcam Feed', image)
if cv2.waitKey(10) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
this script would print (and also show) the coordinate of the hand landmarks but what im having trouble with is that this is in a while loop and blender would freeze imediatly after i run my script and would only turn back on when i stopped the script.
I've found a template for "operator modal timer" which can run a script in a kind of like while loop while still giving time for blender to refresh and do all of its other things.
import bpy
class ModalTimerOperator(bpy.types.Operator):
"""Operator which runs its self from a timer"""
bl_idname = "wm.modal_timer_operator"
bl_label = "Modal Timer Operator"
_timer = None
def modal(self, context, event):
if event.type in {'RIGHTMOUSE', 'ESC'}:
self.cancel(context)
return {'CANCELLED'}
if event.type == 'TIMER':
# change theme color, silly!
return {'PASS_THROUGH'}
def execute(self, context):
wm = context.window_manager
self._timer = wm.event_timer_add(0.1, window=context.window)
wm.modal_handler_add(self)
return {'RUNNING_MODAL'}
def cancel(self, context):
wm = context.window_manager
wm.event_timer_remove(self._timer)
def register():
bpy.utils.register_class(ModalTimerOperator)
def unregister():
bpy.utils.unregister_class(ModalTimerOperator)
if __name__ == "__main__":
register()
# test call
bpy.ops.wm.modal_timer_operator()
But the issue is i can't use it since i would need to start and stop the webcam each frame which would be very slow. So what im trying to ask is : Can i somehow put the whole hand landmarks script into a subprocces or a seperately running program which would just send the x,y,z coordinates so that blender can grab it (using the modal timer each second) and move the object to that position?

Python, drawing a polygon over webcam video using mouse clicks to detect points

I'm using Python3 and OpenCV (4.1.0) to realize a script that:
displays the contents of the webcam;
records the coordinates of mouse clicks over video;
after pressing a certain button ('p' in my example), draws a polyline between the points identified by previous mouse clicks;
So far, I'm trying:
import numpy as np
import cv2
def main():
cap = cv2.VideoCapture("files/long_video.mp4") # Open video file
points = []
while (cap.isOpened()):
ret, frame = cap.read() # read a frame
try:
cv2.imshow('Frame', frame)
except:
print('EOF')
break
cv2.setMouseCallback('Frame', left_click_detect, points)
# Abort and exit with 'Q'
key = cv2.waitKey(25)
if (key == ord('q')):
break
elif (key== ord('p')): # HERE, IT SHOULD DRAW POLYLINE OVER VIDEO!!!
pts_array = np.array([[x, y] for (x, y) in points], np.int0)
frame = cv2.polylines(frame, np.int32(np.array(points)), False, (255, 0, 0), thickness=5)
points = []
cv2.imshow('Frame', frame)
cap.release() # release video file
cv2.destroyAllWindows() # close all openCV windows
def left_click(event, x, y, flags, points):
if (event == cv2.EVENT_LBUTTONDOWN):
print(f"\tClick on {x}, {y}")
points.append([x,y])
It kinda works, but after pressing 'p' it doesn't draw the polyline over the video.
Any suggestions?
There are 2 problems with your code:
cv2.polylines() accepts a list of arrays. so here:
frame = cv2.polylines(frame, np.int32(np.array(points)), False, (255, 0, 0), thickness=5)
Replace np.int32(np.array(points)) with [np.int32(points)] to fix the exception. (you also don't need to use np.array() here)
After you draw the polygon on the frame, you call cv2.show(), but almost immediately after, you show the next frame without the polygon on it, so you don't have time to see the polygon. to fix it you need to draw the polygon again for each frame. and to do that, you need to save it until you press p again (to show another polygon).
This will work:
import numpy as np
import cv2
def main():
cap = cv2.VideoCapture("files/long_video.mp4") # Open video file
polygon = []
points = []
while (cap.isOpened()):
ret, frame = cap.read() # read a frame
if not ret:
print('EOF')
break
frame = cv2.polylines(frame, polygon, False, (255, 0, 0), thickness=5)
cv2.imshow('Frame', frame)
# Abort and exit with 'Q'
key = cv2.waitKey(25)
if (key == ord('q')):
break
elif (key== ord('p')):
polygon = [np.int32(points)]
points = []
cv2.setMouseCallback('Frame', left_click_detect, points)
cap.release() # release video file
cv2.destroyAllWindows() # close all openCV windows
def left_click_detect(event, x, y, flags, points):
if (event == cv2.EVENT_LBUTTONDOWN):
print(f"\tClick on {x}, {y}")
points.append([x,y])
print(points)

Previewing line when drawing with opencv

I want to make a simple draw function to draw a line, but I want it to be previewed while it's being drawn; on EVENT_MOUSEMOVE it should draw the line continuously, while not saving it (e.g. how MS paint behaves when you draw a line and can see the output, which is saved when you release the mouse button).
I've used the "more advanced demo" here, where I use the input image as a global, make a copy of it, and then overwrite the image with the copy after drawing, but it doesn't actually make any difference (I neither see the "preview" of the line, and all the lines I draw stay on the canvas) and I can't see why:
img = None
def draw_circle(event,x,y,flags,param):
global ix,iy,drawing,mode,img
img_c = img.copy()
if event == cv2.EVENT_LBUTTONDOWN:
drawing = True
ix,iy = x,y
elif event == cv2.EVENT_MOUSEMOVE:
if drawing == True:
if mode == True:
cv2.line(img,(ix,iy),(x,y),(0,255,0),1)
img = img_c.copy()
else:
cv2.circle(img,(x,y),5,(0,0,255),-1)
elif event == cv2.EVENT_LBUTTONUP:
drawing = False
if mode == True:
cv2.line(img,(ix,iy),(x,y),(0,255,0),1)
else:
cv2.circle(img,(x,y),5,(0,0,255),-1)
while(1):
cv2.imshow('image',img)
k = cv2.waitKey(1) & 0xFF
To preview a line while drawing it, you need to update the image every time you move the mouse, but you need to keep the original to only draw one line. If the line changes orientation the idea is not to draw multiple lines, but to show only one line.
Here is a sample code of what I am talking about. I start with a black canvas, then I preview the line in blue, and once it is finish it becomes green. Read the comments inside the code to understand the details. If you have any questions just comment :)
import numpy as np
import cv2
# create black canvas of size 600x600
img = np.zeros((600, 600, 3), dtype=np.uint8)
# intialize values in unusable states
preview = None
initialPoint = (-1, -1)
# mouse callback
def drawLine(event,x,y,flags,param):
global initialPoint,img, preview
if event == cv2.EVENT_LBUTTONDOWN:
# new initial point and preview is now a copy of the original image
initialPoint = (x,y)
preview = img.copy()
# this will be a point at this point in time
cv2.line(preview, initialPoint, (x,y), (0,255,0), 1)
elif event == cv2.EVENT_MOUSEMOVE:
if preview is not None:
# copy the original image again a redraw the new line
preview = img.copy()
cv2.line(preview, initialPoint, (x,y), (0,255,0), 1)
elif event == cv2.EVENT_LBUTTONUP:
# if we are drawing, preview is not None and since we finish, draw the final line in the image
if preview is not None:
preview = None
cv2.line(img, initialPoint, (x,y), (255,0,0), 1)
# set the named window and callback
cv2.namedWindow("image")
cv2.setMouseCallback("image", drawLine)
while (True):
# if we are drawing show preview, otherwise the image
if preview is None:
cv2.imshow('image',img)
else :
cv2.imshow('image',preview)
k = cv2.waitKey(1) & 0xFF
if k == ord('q'):
break;
cv2.destroyAllWindows()

Closing an image window displays another window. Why?

I'm stuck. My code sucks. My silders don't work either, but the infinite image windows are driving me nuts. When I close the namedWindow, it opens a new display window with the image (infinitely). Help?
import numpy as np
import cv2
from pylepton import Lepton
#setup the Lepton image buffer
def capture(device = "/dev/spidev0.0"):
with Lepton() as l:
a,_ = l.capture() #grab the buffer
cv2.normalize(a, a, 0, 65535, cv2.NORM_MINMAX) # extend contrast
np.right_shift(a, 8, a) # fit data into 8 bits
return np.uint8(a)
#Create a window and give it features
def nothing(x):
pass
cv2.namedWindow('flir', cv2.WINDOW_NORMAL)
cv2.moveWindow('flir',1,1)
cv2.createTrackbar('thresh','flir',50,100,nothing)
cv2.createTrackbar('erode','flir',5,100,nothing)
cv2.createTrackbar('dilate','flir',7,100,nothing)
#process the buffer into an image on a continuous loop
while True:
#update the image processing variables
thresh = cv2.getTrackbarPos('thresh', 'flir')
erodeSize = cv2.getTrackbarPos('erode', 'flir')
dilateSize = cv2.getTrackbarPos('dilate', 'flir')
image = capture()
#apply some image processing
blurredBrightness = cv2.bilateralFilter(image,9,150,150)
thresh = 50
edges = cv2.Canny(blurredBrightness,thresh,thresh*2, L2gradient=True)
_,mask = cv2.threshold(blurredBrightness,200,1,cv2.THRESH_BINARY)
erodeSize = 5
dilateSize = 14
eroded = cv2.erode(mask, np.ones((erodeSize, erodeSize)))
mask = cv2.dilate(eroded, np.ones((dilateSize, dilateSize)))
adjusted_image = cv2.resize(cv2.cvtColor(mask*edges, cv2.COLOR_GRAY2RGB) | image, (640, 4$
final_image = cv2.applyColorMap(adjusted_image, cv2.COLORMAP_HOT)
#display the image
cv2.imshow('flir', final_image)
if cv2.waitKey(1) == ord('q'):
break
cv2.waitKey()
cv2.destroyWindow('flir')
Firstly, Calm down.
Secondly, look at your code closely. Closing the window wouldn't do you any good, because of the lines:
cv2.imshow('flir', final_image)
and
cv2.destroyWindow('flir')
What these 2 are doing in tandem is that you're displaying a frame in a new window, and then destroying it, then recreating that window in imshow, then displaying the next frame and destroying it...and so on and so forth.
That should explain your flickering windows.
To stop execution of your program, you've added this code:
if cv2.waitKey(1) == ord('q'):
break
What this implies is that when you press 'q' on your keyboard while your image window is in focus, your while loop will break and your program will terminate.
So I would advise you to remove cv2.destroyWindow and use 'q' key to quit your application than to close it using your mouse.

Categories