I have a robotic code, that does the following:
camera starts processing and taking images
Mounting Holes (hough transform) function detection is activated
The holes are drawn on the image
approachcirlce function moves robot towards one of the set coordinates
I have two issues :
The mounting holes keep getting called even after detecting the coordinates once.
The robot in the approachcircle function cant move to one coordinates then onto the other. It keeps going back and forth as the x and y aren't specifically set to finish the first set of coordinates first. i.e : between two circles it does not reach either centers as expected. it never reaches the center of a detected circle if its more than one
I want the code to call the mountingholes function once and have the robot to move to each recorded coordinates, after the intial set of coordinates is done. I will have the robot move to another area and start doing the process again. I'm assuming the problem is that the functions are in the camera processing loop which is run indefinitely
The code is below:
##Def:
def approachcircle (r,t,z):
move = robot.Pose()*transl(r,t,z)
robot.MoveL(move)
def approacharea (z):
move = robot.Pose()*transl(0,0,z)
robot.MoveL(move)
def MountingHoles(img,thresh,r):
minR = r
CannyHighT = 50
min_points = 15 #param2
img_1= cv.cvtColor(img,cv.COLOR_BGR2GRAY)
#img3 = cv2.inRange(img_1, thresh, 255)
circles = cv.HoughCircles(img_1,cv.HOUGH_GRADIENT, 1, 2*minR, param1=CannyHighT,
param2=min_points, minRadius=minR, maxRadius=220)
return circles
#Installation
from robolink import * # RoboDK API
from robodk import * # Robot toolbox
RDK = Robolink()
pose = eye()
ITEM_TYPE_ROBOT
RDK = robolink.Robolink()
robot = RDK.Item('TM12X')
import_install('cv2', 'opencv-python')
import cv2 as cv
import numpy as np
import numpy
#----------------------------------
# Settings
PROCESS_COUNT = -1 # How many frames to process before exiting. -1 means indefinitely.
CAM_NAME = "Camera"
DISPLAY_SETTINGS = True
WDW_NAME_PARAMS = 'RoboDK - Blob detection parameters'
DISPLAY_RESULT = True
WDW_NAME_RESULTS = 'RoboDK - Blob detections1'
# Calculate absolute XYZ position clicked from the camera in absolute coordinates:
cam_item = RDK.Item(CAM_NAME, ITEM_TYPE_CAMERA)
if not cam_item.Valid():
raise Exception("Camera not found! %s" % CAM_NAME)
cam_item.setParam('Open', 1) # Force the camera view to open
#----------------------------------------------
# Create an resizable result window
if DISPLAY_RESULT:
cv.namedWindow(WDW_NAME_RESULTS) #, cv.WINDOW_NORMAL)
#----------------------------------------------
# capture = cv.VideoCapture(0)
# retval, image = capture.read()
#----------------------------------------------
# Process camera frames
count = 0
while count < PROCESS_COUNT or PROCESS_COUNT < 0:
print("=============================================")
print("Processing image %i" % count)
count += 1
#----------------------------------------------
# Get the image from RoboDK
bytes_img = RDK.Cam2D_Snapshot("", cam_item)
if bytes_img == b'':
raise
# Image from RoboDK are BGR, uchar, (h,w,3)
nparr = np.frombuffer(bytes_img, np.uint8)
img = cv.imdecode(nparr, cv.IMREAD_UNCHANGED)
if img is None or img.shape == ():
raise
#----------------------------------------------
# Detect blobs
keypoints = MountingHoles(img,250,50)
i = 0
#----------------------------------------------
# Display the detection to the user (reference image and camera image side by side, with detection results)
if DISPLAY_RESULT:
# Draw detected blobs and their id
i = 0
for keypoint in keypoints[0,:]:
cv.putText(img, str(i), (int(keypoint[0]), int(keypoint[1])), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 1, cv.LINE_AA)
cv.circle(img, (int(keypoint[0]), int(keypoint[1])), int(keypoint[2]), (0, 0, 255), 15)
#
i += 1
# Resize the image, so that it fits your screen
img = cv.resize(img, (int(img.shape[1] * .75), int(img.shape[0] * .75)))#
cv.imshow(WDW_NAME_RESULTS, img)
key = cv.waitKey(500)
if key == 27:
break # User pressed ESC, exit
if cv.getWindowProperty(WDW_NAME_RESULTS, cv.WND_PROP_VISIBLE) < 1:
break # User killed the window, exit
#--------------------------------------------------------------------------------------------
# Movement functions
r=0
t=0
i=0
#approacharea(200)
for keypoint in keypoints[0,:]:
#print("id:%i coord=(%0.0f, %0.0f)" % (i, X, Y))
X= int(keypoint[0])-320
Y=int(keypoint[1])-240
r=int(keypoint[2])
print("id:%i coord=(%0.0f, %0.0f)" % (i, X, Y))
if X!= 0 or Y!=0 :
r=X*0.1
t=Y*0.1
approachcircle(r,t,0)
i+=1
Related
I am trying to develop a script which will detect pixelation from LiveTV from an external camera. To test my script I have been using a short snippet of LiveTV which has two instances of pixelation.
See Google Drive below for video:
https://drive.google.com/file/d/1f339HJSWKhyPr1y5sf9tWW4vcXgBOVbz/view?usp=sharing
Currently I am able to filter out most of the noise in the video, and detect the pixelation. However, I am also detecting the white text (given the intensity of the text it gets picked up by the kernel I am applying).
See the code below:
import cv2
import numpy as np
cap = cv2.VideoCapture("./hgtv_short.ts")
while True:
success, image = cap.read()
gray = cv2.cvtColor(src=image, code=cv2.COLOR_BGR2GRAY)
sharpen_kernel = np.array([[.4, .4], [-2.25, -2.25], [.4, .4]])
sharpen = cv2.filter2D(src=gray, ddepth=-1, kernel=sharpen_kernel)
sharpe = sharpen + 128
canny = cv2.Canny(image=sharpe, threshold1=245, threshold2=255, edges=1, apertureSize=3, L2gradient=True)
white = np.where(canny != [0])
coordinates = zip(white[1], white[0])
for p in coordinates:
cv2.circle(canny, p, 30, (200, 0, 0), 2)
cv2.imshow('image', image)
cv2.imshow('edges', canny)
cv2.waitKey(1)
What I would like to do is apply a threshold and findContours to the given coordinates to see if text is in the region. Then I can discern between actual pixelation and text.
NOTE:
If anyone has any other ideas on finding pixelation I am open to suggestions.
UPDATE
Here is a screenshot from the video showing the type of pixelation I am looking for in this video (macro-blocking) to be specific.
Image
Edges
From the above Images you can see that I am detecting the macro-blocking, but also the white text. I would like to be able to discern between text and actual macro-blocking.
SECOND UPDATE
After more trial and error, I found that it will be best to use some sort of reference model to help predict when an image is showing macro-blocking, pixelation, artifacts, etc...
I have decided to use the hog(Histogram of Oriented Gradients) descriptor to create my feature vector. I have created to functions, one loops through the GOOD images and the other the BAD images:
def pos_train_set(self):
print("Starting to Gather Positive Photos")
for pos_file in glob.iglob(os.path.join(self.base_path, "Bad_Images", "*.jpg")):
pos_img = cv2.imread(pos_file, 1)
pos_img = cv2.resize(pos_img, self.winSize, interpolation=cv2.CV_32F)
pos_des = self.hog.compute(pos_img)
pos_des = cv2.normalize(pos_des, None)
self.labels.append(1)
self.training_data.append(pos_des)
print("Gathered Positive Photos")
def neg_train_set(self):
print("Starting to Gather Negative Photos")
for neg_file in glob.iglob(os.path.join(self.base_path, "Good_Images", "*.jpg")):
neg_img = cv2.imread(neg_file, 1)
neg_img = cv2.resize(neg_img, self.winSize, interpolation=cv2.CV_32F)
neg_des = self.hog.compute(neg_img)
neg_des = cv2.normalize(neg_des, None)
self.labels.append(0)
self.training_data.append(neg_des)
print("Gathered Negative Photos")
I then train my model using the SVM(Support Vector Machines) classification algorithm.
def train_set(self):
print("Starting to Convert")
td = np.float32(self.training_data)
lab = np.array(self.labels)
print("Converted List")
print("Starting Shuffle")
rand = np.random.RandomState(10)
shuffle = rand.permutation(len(td))
td = td[shuffle]
lab = lab[shuffle]
print("Shuffled List")
print("Starting SVM")
svm = cv2.ml.SVM_create()
svm.setType(cv2.ml.SVM_C_SVC)
# Exponential Chi2 kernel, similar to the RBF kernel: K(xi,xj)=e−γχ2(xi,xj),χ2(xi,xj)=(xi−xj)2/(xi+xj),γ>0.
svm.setKernel(cv2.ml.SVM_CHI2)
svm.setTermCriteria((cv2.TERM_CRITERIA_MAX_ITER, 100, 1e-6))
svm.setGamma(5.383)
svm.setC(2.67)
print("Starting Training")
svm.train(td, cv2.ml.ROW_SAMPLE, lab)
print("Saving to .yml")
svm.save(os.path.join(self.base_path, "svm_model.yml"))
I then use that SVM model to try and predict if an image is a 1 (Bad Image) or a 0 (Good Image). With the help of the kernel and edge detection I used in my first attempt:
def predict(self):
svm = cv2.ml.SVM_load("./svm_model.yml")
for file in self.files:
os.mkdir(os.path.join(self.base_path, "1_Frames", os.path.basename(file)))
print(f"Starting predict on {file}")
cap = cv2.VideoCapture(file)
while cap.isOpened():
success, image = cap.read(1)
if success:
img = cv2.resize(image, self.winSize, interpolation=cv2.CV_32F)
test_data = self.hog.compute(img)
test_data = cv2.normalize(test_data, None)
test_data = np.float32(test_data)
test_data = np.transpose(test_data)
if not np.any(test_data):
print("Invalid Dimension")
success, image = cap.read(1)
print(f"New Frame {success}")
else:
response = svm.predict(test_data)[1]
if response == 1:
gray = cv2.cvtColor(src=image, code=cv2.COLOR_BGR2GRAY)
sharpen_kernel = np.array([[.4, .4], [-2.25, -2.25], [.4, .4]])
sharpen = cv2.filter2D(src=gray, ddepth=-1, kernel=sharpen_kernel)
sharpe = sharpen + 128
canny = cv2.Canny(image=sharpe, threshold1=245, threshold2=255, edges=1, apertureSize=3, L2gradient=True)
white = np.where(canny != [0])
if not len(white[0]) == 0:
cv2.imwrite(os.path.join(self.base_path, '1_Frames', os.path.basename(file), f'found_{self.x}.jpg'), image)
success, image = cap.read(1)
self.x += 1
else:
success, image = cap.read(1)
pass
else:
cv2.imwrite(os.path.join(self.base_path, '0_Frames', f'found_{self.y}.jpg'), image)
success, image = cap.read(1)
self.y += 1
else:
break
cap.release()
cv2.destroyAllWindows()
This method seems to work well, but I am still open to any further ideas of suggestions. I posted this new update in hopes it may assist someone else looking for suggestions on how to detect issues in images.
I have a goal to do homography on a live video by capturing my screen and processing it.
In order to do so, I took the code from this link, and manipulated it inside a while loop as follows:
from __future__ import print_function
import cv2 as cv
import numpy as np
from windowcapture import WindowCapture
# initialize the WindowCapture class
capture = WindowCapture('My Window')
bar_img = cv.imread('hammer.jpg',cv.IMREAD_GRAYSCALE)
while(True):
# get an updated image of the game
screenshot = capture.get_screenshot()
screenshot = cv.cvtColor(screenshot,cv.IMREAD_GRAYSCALE)
if bar_img is None or screenshot is None:
print('Could not open or find the images!')
exit(0)
#-- Step 1: Detect the keypoints using SURF Detector, compute the descriptors
minHessian = 400
detector = cv.SIFT_create()
keypoints_obj, descriptors_obj = detector.detectAndCompute(bar_img, None)
keypoints_scene, descriptors_scene = detector.detectAndCompute(screenshot, None)
#-- Step 2: Matching descriptor vectors with a FLANN based matcher
# Since SURF is a floating-point descriptor NORM_L2 is used
matcher = cv.DescriptorMatcher_create(cv.DescriptorMatcher_FLANNBASED)
knn_matches = matcher.knnMatch(descriptors_obj, descriptors_scene, 2)
#-- Filter matches using the Lowe's ratio test
ratio_thresh = 0.75
good_matches = []
for m,n in knn_matches:
if m.distance < ratio_thresh * n.distance:
good_matches.append(m)
#-- Draw matches
img_matches = np.empty((max(bar_img.shape[0], screenshot.shape[0]), bar_img.shape[1]+screenshot.shape[1], 3), dtype=np.uint8)
cv.drawMatches(bar_img, keypoints_obj, screenshot, keypoints_scene, good_matches, img_matches, flags=cv.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS)
#-- Localize the object
obj = np.empty((len(good_matches),2), dtype=np.float32)
scene = np.empty((len(good_matches),2), dtype=np.float32)
for i in range(len(good_matches)):
#-- Get the keypoints from the good matches
obj[i,0] = keypoints_obj[good_matches[i].queryIdx].pt[0]
obj[i,1] = keypoints_obj[good_matches[i].queryIdx].pt[1]
scene[i,0] = keypoints_scene[good_matches[i].trainIdx].pt[0]
scene[i,1] = keypoints_scene[good_matches[i].trainIdx].pt[1]
H, _ = cv.findHomography(obj, scene, cv.RANSAC)
#-- Get the corners from the image_1 ( the object to be "detected" )
obj_corners = np.empty((4,1,2), dtype=np.float32)
obj_corners[0,0,0] = 0
obj_corners[0,0,1] = 0
obj_corners[1,0,0] = bar_img.shape[1]
obj_corners[1,0,1] = 0
obj_corners[2,0,0] = bar_img.shape[1]
obj_corners[2,0,1] = bar_img.shape[0]
obj_corners[3,0,0] = 0
obj_corners[3,0,1] = bar_img.shape[0]
scene_corners = cv.perspectiveTransform(obj_corners, H)
#-- Draw lines between the corners (the mapped object in the scene - image_2 )
cv.line(img_matches, (int(scene_corners[0,0,0] + bar_img.shape[1]), int(scene_corners[0,0,1])),\
(int(scene_corners[1,0,0] + bar_img.shape[1]), int(scene_corners[1,0,1])), (0,255,0), 4)
cv.line(img_matches, (int(scene_corners[1,0,0] + bar_img.shape[1]), int(scene_corners[1,0,1])),\
(int(scene_corners[2,0,0] + bar_img.shape[1]), int(scene_corners[2,0,1])), (0,255,0), 4)
cv.line(img_matches, (int(scene_corners[2,0,0] + bar_img.shape[1]), int(scene_corners[2,0,1])),\
(int(scene_corners[3,0,0] + bar_img.shape[1]), int(scene_corners[3,0,1])), (0,255,0), 4)
cv.line(img_matches, (int(scene_corners[3,0,0] + bar_img.shape[1]), int(scene_corners[3,0,1])),\
(int(scene_corners[0,0,0] + bar_img.shape[1]), int(scene_corners[0,0,1])), (0,255,0), 4)
#-- Show detected matches
cv.imshow('Good Matches & Object detection', img_matches)
cv.waitKey()
if cv.waitKey(1) == ord('q'):
cv.destroyAllWindows()
break
print('Done.')
The class WindowCapture that I used uses win32gui to capture the window (maybe it makes a difference if I used it like this and not imread?)
I get the following error when I run the code:
C:\Users\Tester\AppData\Local\Temp\pip-req-build-1i5nllza\opencv\modules\calib3d\src\fundam.cpp:385: error: (-28:Unknown error code -28) The input arrays should have at least 4 corresponding point sets to calculate Homography in function 'cv::findHomography'
Any idea why it happens?
So i'm trying to launch a Python script (originally available from here : https://github.com/dvdtho/python-photo-mosaic). Full code at the bottom of this post.
This basically creates a mosaic (from a source image), with the final image (mosaic) is composed of several other images (tiles).
My question is how I am supposed to fill the variables (the ones at line 212) in order to run the script (through Eclispe in my case).
Should I put directly something like this? (in my case the folder Desktop/tiles contains all the jpg files) :
tile_paths = glob.glob("C:/Users/Sylvia/Desktop/tiles/*.jpg") # I've added this line myself
def create_mosaic(source_path="C:\\Users\\Sylvia\\Desktop\\source\\1.jpg", target="C:\\Users\\Sylvia\\Desktop\\source\\result.jpg", tile_ratio=1920/800, tile_width=75, enlargement=8, reuse=True, color_mode='RGB', tile_paths=None, shuffle_first=30):
Last time i tried i got this error :
def create_mosaic(source, target, tile_ratio=1920/800, tile_width=75,
enlargement=8, reuse=True, color_mode='RGB', tile_paths,
shuffle_first=30):
^ SyntaxError: non-default argument follows default argument
I'm very lost, hopefully someone can help me.
Here's the code :
import time
import itertools
import random
import sys
import numpy as np
from PIL import Image
from skimage import img_as_float
from skimage.measure import compare_mse
def shuffle_first_items(lst, i):
if not i:
return lst
first_few = lst[:i]
remaining = lst[i:]
random.shuffle(first_few)
return first_few + remaining
def bound(low, high, value):
return max(low, min(high, value))
class ProgressCounter:
def __init__(self, total):
self.total = total
self.counter = 0
def update(self):
self.counter += 1
sys.stdout.write("Progress: %s%% %s" % (100 * self.counter / self.total, "\r"))
sys.stdout.flush()
def img_mse(im1, im2):
"""Calculates the root mean square error (RSME) between two images"""
try:
return compare_mse(img_as_float(im1), img_as_float(im2))
except ValueError:
print(f'RMS issue, Img1: {im1.size[0]} {im1.size[1]}, Img2: {im2.size[0]} {im2.size[1]}')
raise KeyboardInterrupt
def resize_box_aspect_crop_to_extent(img, target_aspect, centerpoint=None):
width = img.size[0]
height = img.size[1]
if not centerpoint:
centerpoint = (int(width / 2), int(height / 2))
requested_target_x = centerpoint[0]
requested_target_y = centerpoint[1]
aspect = width / float(height)
if aspect > target_aspect:
# Then crop the left and right edges:
new_width = int(target_aspect * height)
new_width_half = int(new_width/2)
target_x = bound(new_width_half, width-new_width_half, requested_target_x)
left = target_x - new_width_half
right = target_x + new_width_half
resize = (left, 0, right, height)
else:
# ... crop the top and bottom:
new_height = int(width / target_aspect)
new_height_half = int(new_height/2)
target_y = bound(new_height_half, height-new_height_half, requested_target_y)
top = target_y - new_height_half
bottom = target_y + new_height_half
resize = (0, top, width, bottom)
return resize
def aspect_crop_to_extent(img, target_aspect, centerpoint=None):
'''
Crop an image to the desired perspective at the maximum size available.
Centerpoint can be provided to focus the crop to one side or another -
eg just cut the left side off if interested in the right side.
target_aspect = width / float(height)
centerpoint = (width, height)
'''
resize = resize_box_aspect_crop_to_extent(img, target_aspect, centerpoint)
return img.crop(resize)
class Config:
def __init__(self, tile_ratio=1920/800, tile_width=50, enlargement=8, color_mode='RGB'):
self.tile_ratio = tile_ratio # 2.4
self.tile_width = tile_width # height/width of mosaic tiles in pixels
self.enlargement = enlargement # mosaic image will be this many times wider and taller than original
self.color_mode = color_mode # mosaic image will be this many times wider and taller than original
#property
def tile_height(self):
return int(self.tile_width / self.tile_ratio)
#property
def tile_size(self):
return self.tile_width, self.tile_height # PIL expects (width, height)
class TileBox:
"""
Container to import, process, hold, and compare all of the tiles
we have to make the mosaic with.
"""
def __init__(self, tile_paths, config):
self.config = config
self.tiles = list()
self.prepare_tiles_from_paths(tile_paths)
def __process_tile(self, tile_path):
with Image.open(tile_path) as i:
img = i.copy()
img = aspect_crop_to_extent(img, self.config.tile_ratio)
large_tile_img = img.resize(self.config.tile_size, Image.ANTIALIAS).convert(self.config.color_mode)
self.tiles.append(large_tile_img)
return True
def prepare_tiles_from_paths(self, tile_paths):
print('Reading tiles from provided list...')
progress = ProgressCounter(len(tile_paths))
for tile_path in tile_paths:
progress.update()
self.__process_tile(tile_path)
print('Processed tiles.')
return True
def best_tile_block_match(self, tile_block_original):
match_results = [img_mse(t, tile_block_original) for t in self.tiles]
best_fit_tile_index = np.argmin(match_results)
return best_fit_tile_index
def best_tile_from_block(self, tile_block_original, reuse=False):
if not self.tiles:
print('Ran out of images.')
raise KeyboardInterrupt
#start_time = time.time()
i = self.best_tile_block_match(tile_block_original)
#print("BLOCK MATCH took --- %s seconds ---" % (time.time() - start_time))
match = self.tiles[i].copy()
if not reuse:
del self.tiles[i]
return match
class SourceImage:
"""Processing original image - scaling and cropping as needed."""
def __init__(self, image_path, config):
print('Processing main image...')
self.image_path = image_path
self.config = config
with Image.open(self.image_path) as i:
img = i.copy()
w = img.size[0] * self.config.enlargement
h = img.size[1] * self.config.enlargement
large_img = img.resize((w, h), Image.ANTIALIAS)
w_diff = (w % self.config.tile_width)/2
h_diff = (h % self.config.tile_height)/2
# if necesary, crop the image slightly so we use a
# whole number of tiles horizontally and vertically
if w_diff or h_diff:
large_img = large_img.crop((w_diff, h_diff, w - w_diff, h - h_diff))
self.image = large_img.convert(self.config.color_mode)
print('Main image processed.')
class MosaicImage:
"""Holder for the mosaic"""
def __init__(self, original_img, target, config):
self.config = config
self.target = target
# Lets just start with original image, scaled up, instead of a blank one
self.image = original_img
# self.image = Image.new(original_img.mode, original_img.size)
self.x_tile_count = int(original_img.size[0] / self.config.tile_width)
self.y_tile_count = int(original_img.size[1] / self.config.tile_height)
self.total_tiles = self.x_tile_count * self.y_tile_count
print(f'Mosaic will be {self.x_tile_count:,} tiles wide and {self.y_tile_count:,} tiles high ({self.total_tiles:,} total).')
def add_tile(self, tile, coords):
"""Adds the provided image onto the mosiac at the provided coords."""
try:
self.image.paste(tile, coords)
except TypeError as e:
print('Maybe the tiles are not the right size. ' + str(e))
def save(self):
self.image.save(self.target)
def coords_from_middle(x_count, y_count, y_bias=1, shuffle_first=0, ):
'''
Lets start in the middle where we have more images.
And we dont get "lines" where the same-best images
get used at the start.
y_bias - if we are using non-square coords, we can
influence the order to be closer to the real middle.
If width is 2x height, y_bias should be 2.
shuffle_first - We can suffle the first X coords
so that we dont use all the same-best images
in the same spot - in the middle
from movies.mosaic_mem import coords_from_middle
x = 10
y = 10
coords_from_middle(x, y, y_bias=2, shuffle_first=0)
'''
x_mid = int(x_count/2)
y_mid = int(y_count/2)
coords = list(itertools.product(range(x_count), range(y_count)))
coords.sort(key=lambda c: abs(c[0]-x_mid)*y_bias + abs(c[1]-y_mid))
coords = shuffle_first_items(coords, shuffle_first)
return coords
def create_mosaic(source_path, target, tile_ratio=1920/800, tile_width=75, enlargement=8, reuse=True, color_mode='RGB', tile_paths=None, shuffle_first=30):
"""Forms an mosiac from an original image using the best
tiles provided. This reads, processes, and keeps in memory
a copy of the source image, and all the tiles while processing.
Arguments:
source_path -- filepath to the source image for the mosiac
target -- filepath to save the mosiac
tile_ratio -- height/width of mosaic tiles in pixels
tile_width -- width of mosaic tiles in pixels
enlargement -- mosaic image will be this many times wider and taller than the original
reuse -- Should we reuse tiles in the mosaic, or just use each tile once?
color_mode -- L for greyscale or RGB for color
tile_paths -- List of filepaths to your tiles
shuffle_first -- Mosiac will be filled out starting in the center for best effect. Also,
we will shuffle the order of assessment so that all of our best images aren't
necessarily in one spot.
"""
config = Config(
tile_ratio = tile_ratio, # height/width of mosaic tiles in pixels
tile_width = tile_width, # height/width of mosaic tiles in pixels
enlargement = enlargement, # the mosaic image will be this many times wider and taller than the original
color_mode = color_mode, # L for greyscale or RGB for color
)
# Pull in and Process Original Image
print('Setting Up Target image')
source_image = SourceImage(source_path, config)
# Setup Mosaic
mosaic = MosaicImage(source_image.image, target, config)
# Assest Tiles, and save if needed, returns directories where the small and large pictures are stored
print('Assessing Tiles')
tile_box = TileBox(tile_paths, config)
try:
progress = ProgressCounter(mosaic.total_tiles)
for x, y in coords_from_middle(mosaic.x_tile_count, mosaic.y_tile_count, y_bias=config.tile_ratio, shuffle_first=shuffle_first):
progress.update()
# Make a box for this sector
box_crop = (x * config.tile_width, y * config.tile_height, (x + 1) * config.tile_width, (y + 1) * config.tile_height)
# Get Original Image Data for this Sector
comparison_block = source_image.image.crop(box_crop)
# Get Best Image name that matches the Orig Sector image
tile_match = tile_box.best_tile_from_block(comparison_block, reuse=reuse)
# Add Best Match to Mosaic
mosaic.add_tile(tile_match, box_crop)
# Saving Every Sector
mosaic.save()
except KeyboardInterrupt:
print('\nStopping, saving partial image...')
finally:
mosaic.save()
It's ok, this is the new file i have to create in order for it to work :
create_mosaic(
subject="/path/to/source/image",
target="/path/to/output/image",
tile_paths=["/path/to/tile_1" , ... "/path/to/tile_n"],
tile_ratio=1920/800, # Crop tiles to be height/width ratio
tile_width=300, # Tile will be scaled
enlargement=20, # Mosiac will be this times larger than original
reuse=False, # Should tiles be used multiple times?
color_mode='L', # RGB (color) L (greyscale)
)
Problem resovled.
I'm trying to build code that wants to fly a drone with a camera with demoMamboVisionGUI.py below. When the code is executed, the camera screen comes up and press the button to start the flight. The code above displays four cam screens and detects a straight line while detecting a specified color value, blue (BGR2HSV). Using these two codes, the camera recognizes the blue straight line and flies forward little by little, and turns left and right at a certain angle, recognizes the bottom of the specified color (red), lands, and starts flying with another button. I want to make a code that recognizes green and lands. I would appreciate it if you could throw a simple hint.
enter image description here
import cv2
import numpy as np
def im_trim(img):
x = 160
y = 50
w = 280
h = 180
img_trim = img[y:y + h, x:x + w]
return img_trim
def go():
minimum = 9999;
min_theta=0;
try:
cap = cv2.VideoCapture(0)
except:
return
while True:
ret, P = cap.read()
img1 = P
cv2.imshow('asdf',img1)
img_HSV = cv2.cvtColor(img1, cv2.COLOR_BGR2HSV)
img_h, img_s, img_v = cv2.split(img_HSV)
cv2.imshow("HSV", img_HSV)
lower_b = np.array([100, 80, 100])
upper_b = np.array([120, 255, 255])
blue = cv2.inRange(img_HSV, lower_b, upper_b)
cv2.imshow('root',blue)
edges = cv2.Canny(blue, 50, 150, apertureSize =3)
lines = cv2.HoughLines(edges, 1, np.pi/180, threshold = 100)
if lines is not None:
for line in lines:
r, theta = line[0]
#if (r<minimum and r>0) and (np.rad2deg(theta)>-90 and np.rad2deg(theta)<90):
#minimum = r
#min_theta = theta
#if (r > 0 and r < 250) and (np.rad2deg(theta) > 170 or np.rad2deg(theta) < 10):
# self.drone_object.fly_direct(pitch=0, roll=-7, yaw=0, vertical_movement=0,
# duration=1)
#print("right")
#elif (r > 400 and r < 650) and (np.rad2deg(theta) > 170 or np.rad2deg(theta) < 10):
# self.drone_object.fly_direct(pitch=0, roll=7, yaw=0, vertical_movement=0,
# duration=1)
print(r, np.rad2deg(theta))
#이하 if문을 while 문을 통해 반복하여 길 경로를 직진경로로 만든 이후 진행
#if(np.rad2deg(min_theta)>=몇도이상 or 이하):
# 이하 -> 왼쪽턴, 이상 -> 오른쪽턴, 사이 -> 직진
a = np.cos(theta)
b = np.sin(theta)
x0 = a * r
y0 = b * r
x1 = int(x0 + 1000 * (-b))
y1 = int(y0 + 1000 * a)
x2 = int(x0 - 1000 * (-b))
y2 = int(y0 - 1000 * a)
cv2.line(img1, (x1,y1), (x2,y2), (0,255,0), 3)
cv2.imshow('hough',img1)
k = cv2.waitKey(1)
if k == 27:
break
cv2.destroyAllWindows()
if __name__ == "__main__":
go()
print("??")
================================================================================================
"""
Demo of the Bebop vision using DroneVisionGUI that relies on libVLC. It is a different
multi-threaded approach than DroneVision
Author: Amy McGovern
"""
from pyparrot.Minidrone import Mambo
from pyparrot.DroneVisionGUI import DroneVisionGUI
import cv2
# set this to true if you want to fly for the demo
testFlying = True
class UserVision:
def __init__(self, vision):
self.index = 0
self.vision = vision
def save_pictures(self, args):
# print("in save pictures on image %d " % self.index)
img = self.vision.get_latest_valid_picture()
if (img is not None):
filename = "test_image_%06d.png" % self.index
# uncomment this if you want to write out images every time you get a new one
#cv2.imwrite(filename, img)
self.index +=1
#print(self.index)
def demo_mambo_user_vision_function(mamboVision, args):
"""
Demo the user code to run with the run button for a mambo
:param args:
:return:
"""
mambo = args[0]
if (testFlying):
print("taking off!")
mambo.safe_takeoff(5)
if (mambo.sensors.flying_state != "emergency"):
print("flying state is %s" % mambo.sensors.flying_state)
print("Flying direct: going up")
mambo.fly_direct(roll=0, pitch=0, yaw=0, vertical_movement=15, duration=2)
print("flip left")
print("flying state is %s" % mambo.sensors.flying_state)
success = mambo.flip(direction="left")
print("mambo flip result %s" % success)
mambo.smart_sleep(5)
print("landing")
print("flying state is %s" % mambo.sensors.flying_state)
mambo.safe_land(5)
else:
print("Sleeeping for 15 seconds - move the mambo around")
mambo.smart_sleep(15)
# done doing vision demo
print("Ending the sleep and vision")
mamboVision.close_video()
mambo.smart_sleep(5)
print("disconnecting")
mambo.disconnect()
if __name__ == "__main__":
# you will need to change this to the address of YOUR mambo
mamboAddr = "B0:FC:36:F4:37:F9"
# make my mambo object
# remember to set True/False for the wifi depending on if you are using the wifi or the BLE to connect
mambo = Mambo(mamboAddr, use_wifi=True)
print("trying to connect to mambo now")
success = mambo.connect(num_retries=3)
print("connected: %s" % success)
if (success):
# get the state information
print("sleeping")
mambo.smart_sleep(1)
mambo.ask_for_state_update()
mambo.smart_sleep(1)
print("Preparing to open vision")
mamboVision = DroneVisionGUI(mambo, is_bebop=False, buffer_size=200,
user_code_to_run=demo_mambo_user_vision_function, user_args=(mambo, ))
userVision = UserVision(mamboVision)
mamboVision.set_user_callback_function(userVision.save_pictures, user_callback_args=None)
mamboVision.open_video()
==========================================================================
I'm new to OpenCV and I am trying to write a program that detects people in a video. I have this code that is a variation of the peopledetect example.
def inside(r, q):
rx, ry, rw, rh = r
qx, qy, qw, qh = q
return rx > qx and ry > qy and rx + rw < qx + qw and ry + rh < qy + qh
def draw_detections(img, rects, thickness=1):
for x, y, w, h in rects:
# the HOG detector returns slightly larger rectangles than the real objects.
# so we slightly shrink the rectangles to get a nicer output.
pad_w, pad_h = int(0.15 * w), int(0.05 * h)
cv2.rectangle(img, (x + pad_w, y + pad_h),
(x + w - pad_w, y + h - pad_h), (0, 255, 0), thickness)
def find_people(img):
hog = cv2.HOGDescriptor()
hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())
img = frame
if img is None:
return None
# print('Failed to load image file:', fn)
# continue
# except:
# print('loading error')
# continue
found, w = hog.detectMultiScale(
img, winStride=(10, 10), padding=(32, 32), scale=1.05)
found_filtered = []
for ri, r in enumerate(found):
for qi, q in enumerate(found):
if ri != qi and inside(r, q):
break
else:
found_filtered.append(r)
draw_detections(img, found)
draw_detections(img, found_filtered, 3)
print('%d (%d) found' % (len(found_filtered), len(found)))
return img
if __name__ == '__main__':
import argparse
# import itertools as it
ap = argparse.ArgumentParser()
ap.add_argument("-v", "--video",
help="path to the (optional) video file")
ap.add_argument("-b", "--buffer", type=int, default=64,
help="max buffer size")
# ap.add_argument("-f", "--blur-faces", action='blur_faces',
# help="Blur the faces contained in the video")
args = vars(ap.parse_args())
print(help_message)
camera = cv2.VideoCapture(args["video"])
# keep looping
while True:
# grab the current frame
(grabbed, frame) = camera.read()
# if we are viewing a video and we did not grab a frame,
# then we have reached the end of the video
# if args.get("video") and not grabbed:
# break
img = find_people(frame)
cv2.imshow('img', img)
#Waitkey must be called for something to show up on the screen
#It gives the computer time to process the image.
cv2.waitKey(30)
cv2.destroyAllWindows()
This code finds people and draws a rectangle around them. How would I go about finding out if the person detected is the same person as was detected in a previous frame of the video? Or how could I find out if the person the HOG detects has previously been detected?
I know that I could save the locations that the HOG finds and compare to see which are the around the same but I don't think this method would work if the person in the video left the frame and then returned because they would be treated as a new person. Is it possible to associate the colors of their clothes with specific people recognized and use that?
Actually, I am doing something similar because I am "tracking" the persons appearing in a video to count incoming/ongoing people in a shop.
To tell if this is the same person I am just using the position of the detection and I compare it with the rectangles found in the previous detections.
That works quite good except when I have false positive and false negative.