Related
I'm working on a project trying to do object detection and text detection using both yolo and easyocr. Since I'm a beginner and really new to computer vision, I would be glad if someone can help me.
Here's the code:
import cv2
import numpy as np
import easyocr
# Load Yolo
net = cv2.dnn.readNet('yolov4-tiny-custom_3000.weights', 'yolov4-tiny-custom.cfg')
classes = []
with open("obj.names", "r") as f:
classes = [line.strip() for line in f.readlines()]
layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]
colors = np.random.uniform(0, 255, size=(len(classes), 3))
cap = cv2.VideoCapture('car1.mp4')
# Declare Ocr
cascade_src = 'haarcascade_russian_plate_number.xml'
cascade = cv2.CascadeClassifier(cascade_src)
reader = easyocr.Reader(['en'], gpu = False)
# Declare Ocr
while True:
_, frame = cap.read()
height, width, channels = frame.shape
#frame = cv2.resize(frame, (800, 600))
# Yolo Detection
# Detecting objects
blob = cv2.dnn.blobFromImage(frame, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
net.setInput(blob)
outs = net.forward(output_layers)
# Showing informations on the screen
class_ids = []
confidences = []
boxes = []
for out in outs:
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.5:
# Object detected
center_x = int(detection[0] * width)
center_y = int(detection[1] * height)
w = int(detection[2] * width)
h = int(detection[3] * height)
# Rectangle coordinates
x = int(center_x - w / 2)
y = int(center_y - h / 2)
boxes.append([x, y, w, h])
confidences.append(float(confidence))
class_ids.append(class_id)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
for i in range(len(boxes)):
if i in indexes:
x, y, w, h = boxes[i]
label = str(classes[class_ids[i]])
color = colors[class_ids[i]]
cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
cv2.putText(frame, label, (x, y + 30), cv2.FONT_HERSHEY_PLAIN, 3, color, 3)
print("Jenis Mobil: " +label)
# Text Reader Using Ocr
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
plate = cascade.detectMultiScale(gray, 1.1, 5)
for x,y,w,h in plate:
wT,hT,cT = frame.shape
a,b = (int(0.02*wT),int(0.02*hT))
plate2 = frame[y+a:y+h-a,x+b:x+w-b,:]
cv2.rectangle(frame,(x,y),(x+w,y+h),(60,60,255),2)
cv2.rectangle(frame,(x-1,y-40),(x+w+1,y),(60,60,255),-1)
result = reader.readtext(plate2)
for detek in result:
top_left = (int(detek[0][0][0]), int(detek[0][0][1]))
bottom_right = (int(detek[0][2][0]), int(detek[0][2][1]))
text = detek[1]
cv2.putText(frame,text,(x,y-10),cv2.FONT_HERSHEY_SIMPLEX,0.9,(255,255,255),2)
print("Nomor Kendaran: " + text)
# Text Reader Using Ocr
cv2.imshow("Detection", frame)
key = cv2.waitKey(1)
if key == 27:
break
cap.release()
cv2.destroyAllWindows()
I am trying to use ROI to detect the object but I am not able to do it.
Any advice please?
Crop the image before it is fed to the model
while True:
_, frame = cap.read()
im_crop = im[y1:y2, x1:x2] # set x1,x2,y1,y2 based on your ROI
blob = cv2.dnn.blobFromImage(frame, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
this will speed up the inference time as well as there is less data to process by the model
I have a problem with code below, this program code for fire detection from video files
When I run this code I have an error
in
video.release()
NameError: name 'video' is not defined
The video variable is already defined, and it's not clear to me why the error pops up
import cv2
import numpy as np
import sys
import math
title_window = 'Linear Blend'
cv2.namedWindow(title_window)
def on_trackbar(val):
pass
cv2.createTrackbar("Hmax", title_window , 0, 255, on_trackbar)
cv2.createTrackbar("Hmin", title_window , 0, 255, on_trackbar)
cv2.createTrackbar("Smax", title_window , 0, 255, on_trackbar)
cv2.createTrackbar("Smin", title_window , 0, 255, on_trackbar)
cv2.createTrackbar("Vmax", title_window , 0, 255, on_trackbar)
cv2.createTrackbar("Vmin", title_window , 0, 255, on_trackbar)
def detectFire(src): # src la anh
_, contours, _ = cv2.findContours(src, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) ## contours la danh sach cac mang mau trang
contour_sizes = [(cv2.contourArea(contour), contour) for contour in contours] ## lay dien tich cac contour
if len(contour_sizes) > 0:
biggest_contour = max(contour_sizes, key=lambda x: x[0])[1] ## contour co dien toich lon nhat ## biggest_contour danh sach toa do cac pixel mau trang
mask = np.zeros(src.shape, np.uint8)
cv2.drawContours(mask, [biggest_contour], -1, 255, -1) ## chuyen tu biggest_contour qua anh mask
rect = cv2.boundingRect(biggest_contour)
return mask, rect
else:
return src, [0,0,0,0]
if len(sys.argv) == 2:
# load video file from first command line argument
video = cv2.VideoCapture("Fire_smoke.mp4") # doc video tu duong dan
width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)); # lay gia tri do rong video
height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) # do cao
fps = video.get(cv2.CAP_PROP_FPS) # lay gia tri fps, frame per secoond, so khung tren 1 giay
frame_time = round(1000/fps);
last_white = 0
ret, frame = video.read()
pause = False
sum_diff = 0
loop = 0
while True:
if pause is False:
ret, frame = video.read()
if not ret:
print("... end of video file reached");
break
cv2.imshow("origin", frame)
Hmax = cv2.getTrackbarPos('Hmax',title_window)
Hmin = cv2.getTrackbarPos('Hmin',title_window)
Smax = cv2.getTrackbarPos('Smax',title_window)
Smin = cv2.getTrackbarPos('Smin',title_window)
Vmax = cv2.getTrackbarPos('Vmax',title_window)
Vmin = cv2.getTrackbarPos('Vmin',title_window)
# lower = [Hmin, Smin, Vmin]
# upper = [Hmax, Smax, Vmax]
lower = [6, 152, 138] # gioi han duoi mau lua trong he mau HSV
upper = [48, 248, 255]
blur = cv2.GaussianBlur(frame, (21, 21), 0) # loc nhieu
hsv = cv2.cvtColor(blur, cv2.COLOR_BGR2HSV) # chuyen doi khong gian mau
lower = np.array(lower, dtype="uint8")
upper = np.array(upper, dtype="uint8")
mask = cv2.inRange(hsv, lower, upper) #
cv2.imshow("resdfd", mask) ## mask la anh trang den cua cac vung co mau lua
mask, rect = detectFire(mask)
detected_edges = cv2.Canny(mask, 100, 200, 3) ## lay bien anh
n_white = np.sum(detected_edges == 255)
diff_white = n_white - last_white
last_white = n_white
if diff_white > 0 and diff_white < 100:
sum_diff += diff_white
loop += 1
if loop > 50:
loop = 0
if sum_diff > 150:
print("fire")
else:
print("clear")
print("fluctuation:", sum_diff)
sum_diff = 0
cv2.imshow("cen", detected_edges)
# image display and key handling
output = cv2.bitwise_and(frame, frame, mask=mask)
x, y, w, h = rect
cv2.rectangle(output,(x,y),(x+w,y+h),(0,255,0),2)
cv2.line(output, (width-10, height-10), (width-10, height-10-int(n_white*height/5000)), (0,0,255), 5)
cv2.imshow(title_window, output)
if cv2.waitKey(10) == ord('x'):
break
elif cv2.waitKey(10) == ord('p'):
pause = True
elif cv2.waitKey(10) == ord('r'):
pause = False
cv2.destroyAllWindows()
video.release()
I tried to change the variables but I don't understand the cause of the error, I'm very weak in programming
I have this code that performs the mouse functions using eyes and other facial gestures with opencv and dlib. I am running this code using a button click from a tkinter window. When this code starts to run, that tkinter window freezes (i.e, I cannot click any other button from that).
Is there a way that I can make the frame used by opencv a Top level, like top level frames in tkinter so that it doesn't freezes any other frames, or how can I replace opencv frame with Tkinter toplevel frame.
P.S: I have been on it for two days, literally tried anything I can find on the internet and can't seem to find a solution.
_, frame = vid.read()
frame = cv2.flip(frame, 1)
frame = imutils.resize(frame, width=cam_w, height=cam_h)
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
This is the part where changes will be made I guess. Full code is shared below.
from imutils import face_utils
from utils import *
import numpy as np
import pyautogui as pag
import imutils
import dlib
import cv2
# Thresholds and consecutive frame length for triggering the mouse action.
MOUTH_AR_THRESH = 0.3
MOUTH_AR_CONSECUTIVE_FRAMES = 3
EYE_AR_THRESH = 0.20
EYE_AR_CONSECUTIVE_FRAMES = 5
WINK_AR_DIFF_THRESH = 0.001
WINK_AR_CLOSE_THRESH = 0.2
WINK_CONSECUTIVE_FRAMES = 4
# Initialize the frame counters for each action as well as
# booleans used to indicate if action is performed or not
MOUTH_COUNTER = 0
EYE_COUNTER = 0
WINK_COUNTER = 0
INPUT_MODE = False
EYE_CLICK = False
LEFT_WINK = False
RIGHT_WINK = False
SCROLL_MODE = False
ANCHOR_POINT = (0, 0)
WHITE_COLOR = (255, 255, 255)
YELLOW_COLOR = (0, 255, 255)
RED_COLOR = (0, 0, 255)
GREEN_COLOR = (0, 255, 0)
BLUE_COLOR = (255, 0, 0)
BLACK_COLOR = (0, 0, 0)
# Initialize Dlib's face detector (HOG-based) and then create
# the facial landmark predictor
shape_predictor = "model/shape_predictor_68_face_landmarks.dat"
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor(shape_predictor)
# Grab the indexes of the facial landmarks for the left and
# right eye, nose and mouth respectively
(lStart, lEnd) = face_utils.FACIAL_LANDMARKS_IDXS["left_eye"]
(rStart, rEnd) = face_utils.FACIAL_LANDMARKS_IDXS["right_eye"]
(nStart, nEnd) = face_utils.FACIAL_LANDMARKS_IDXS["nose"]
(mStart, mEnd) = face_utils.FACIAL_LANDMARKS_IDXS["mouth"]
# Video capture
vid = cv2.VideoCapture(0)
resolution_w = 1366
resolution_h = 768
cam_w = 640
cam_h = 480
unit_w = resolution_w / cam_w
unit_h = resolution_h / cam_h
while True:
# Grab the frame from the threaded video file stream, resize
# it, and convert it to grayscale
# channels)
_, frame = vid.read()
frame = cv2.flip(frame, 1)
frame = imutils.resize(frame, width=cam_w, height=cam_h)
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# Detect faces in the grayscale frame
rects = detector(gray, 0)
# Loop over the face detections
if len(rects) > 0:
rect = rects[0]
else:
cv2.imshow("Frame", frame)
key = cv2.waitKey(1) & 0xFF
continue
# Determine the facial landmarks for the face region, then
# convert the facial landmark (x, y)-coordinates to a NumPy
# array
shape = predictor(gray, rect)
shape = face_utils.shape_to_np(shape)
# Extract the left and right eye coordinates, then use the
# coordinates to compute the eye aspect ratio for both eyes
mouth = shape[mStart:mEnd]
leftEye = shape[lStart:lEnd]
rightEye = shape[rStart:rEnd]
nose = shape[nStart:nEnd]
# Because I flipped the frame, left is right, right is left.
temp = leftEye
leftEye = rightEye
rightEye = temp
# Average the mouth aspect ratio together for both eyes
mar = mouth_aspect_ratio(mouth)
leftEAR = eye_aspect_ratio(leftEye)
rightEAR = eye_aspect_ratio(rightEye)
ear = (leftEAR + rightEAR) / 2.0
diff_ear = np.abs(leftEAR - rightEAR)
nose_point = (nose[3, 0], nose[3, 1])
# Compute the convex hull for the left and right eye, then
# visualize each of the eyes
mouthHull = cv2.convexHull(mouth)
leftEyeHull = cv2.convexHull(leftEye)
rightEyeHull = cv2.convexHull(rightEye)
cv2.drawContours(frame, [mouthHull], -1, YELLOW_COLOR, 1)
cv2.drawContours(frame, [leftEyeHull], -1, YELLOW_COLOR, 1)
cv2.drawContours(frame, [rightEyeHull], -1, YELLOW_COLOR, 1)
for (x, y) in np.concatenate((mouth, leftEye, rightEye), axis=0):
cv2.circle(frame, (x, y), 2, GREEN_COLOR, -1)
# Check to see if the eye aspect ratio is below the blink
# threshold, and if so, increment the blink frame counter
if diff_ear > WINK_AR_DIFF_THRESH:
if leftEAR < rightEAR:
if leftEAR < EYE_AR_THRESH:
WINK_COUNTER += 1
if WINK_COUNTER > WINK_CONSECUTIVE_FRAMES:
pag.click(button='left')
WINK_COUNTER = 0
elif leftEAR > rightEAR:
if rightEAR < EYE_AR_THRESH:
WINK_COUNTER += 1
if WINK_COUNTER > WINK_CONSECUTIVE_FRAMES:
pag.click(button='right')
WINK_COUNTER = 0
else:
WINK_COUNTER = 0
else:
if ear <= EYE_AR_THRESH:
EYE_COUNTER += 1
if EYE_COUNTER > EYE_AR_CONSECUTIVE_FRAMES:
SCROLL_MODE = not SCROLL_MODE
# INPUT_MODE = not INPUT_MODE
EYE_COUNTER = 0
# nose point to draw a bounding box around it
else:
EYE_COUNTER = 0
WINK_COUNTER = 0
if mar > MOUTH_AR_THRESH:
MOUTH_COUNTER += 1
if MOUTH_COUNTER >= MOUTH_AR_CONSECUTIVE_FRAMES:
# if the alarm is not on, turn it on
INPUT_MODE = not INPUT_MODE
# SCROLL_MODE = not SCROLL_MODE
MOUTH_COUNTER = 0
ANCHOR_POINT = nose_point
else:
MOUTH_COUNTER = 0
if INPUT_MODE:
cv2.putText(frame, "READING INPUT!", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, RED_COLOR, 2)
x, y = ANCHOR_POINT
nx, ny = nose_point
w, h = 60, 35
multiple = 1
cv2.rectangle(frame, (x - w, y - h), (x + w, y + h), GREEN_COLOR, 2)
cv2.line(frame, ANCHOR_POINT, nose_point, BLUE_COLOR, 2)
dir = direction(nose_point, ANCHOR_POINT, w, h)
cv2.putText(frame, dir.upper(), (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.7, RED_COLOR, 2)
drag = 18
if dir == 'right':
pag.moveRel(drag, 0)
elif dir == 'left':
pag.moveRel(-drag, 0)
elif dir == 'up':
if SCROLL_MODE:
pag.scroll(40)
else:
pag.moveRel(0, -drag)
elif dir == 'down':
if SCROLL_MODE:
pag.scroll(-40)
else:
pag.moveRel(0, drag)
if SCROLL_MODE:
cv2.putText(frame, 'SCROLL MODE IS ON!', (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.7, RED_COLOR, 2)
# Show the frame
cv2.imshow("Frame", frame)
key = cv2.waitKey(1) & 0xFF
# If the `Esc` key was pressed, break from the loop
if key == 27:
break
# Do a bit of cleanup
cv2.destroyAllWindows()
vid.release()
The program is saying Process finished with exit code 0 but i am not getting any output. I am using Python version 2.7 and the program's job is to detect free parking slots in a car park. It also has pedestrian detection. Any help will be very much appreciated please i badly need this code to work. Thanks
Here is the link to the source code, along with a video link of how it works
https://github.com/ankit1khare/ComputerVision
DESIRED OUTPUT: The program should open the input video and draw the parking overlay on top of the video.
Here are the codes for the main program
import yaml
import numpy as np
import cv2
# path references
fn = "Khare_testvideo_01.mp4" #3
#fn = "datasets\parkinglot_1_720p.mp4"
#fn = "datasets\street_high_360p.mp4"
fn_yaml = "Khare_yml_01.yml"
fn_out = "Khare_outputvideo_01.avi"
cascade_src = 'Khare_classifier_02.xml'
car_cascade = cv2.CascadeClassifier(cascade_src)
global_str = "Last change at: "
change_pos = 0.00
dict = {
'text_overlay': True,
'parking_overlay': True,
'parking_id_overlay': True,
'parking_detection': True,
'motion_detection': True,
'pedestrian_detection': False, # takes a lot of processing power
'min_area_motion_contour': 500, # area given to detect motion
'park_laplacian_th': 2.8,
'park_sec_to_wait': 1, # 4 wait time for changing the status of a region
'start_frame': 0, # begin frame from specific frame number
'show_ids': True, # shows id on each region
'classifier_used': True,
'save_video': True
}
# Set from video
cap = cv2.VideoCapture(fn)
print("video found")
video_info = { 'fps': cap.get(cv2.CAP_PROP_FPS),
'width': int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)*0.6),
'height': int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)*0.6),
'fourcc': cap.get(cv2.CAP_PROP_FOURCC),
'num_of_frames': int(cap.get(cv2.CAP_PROP_FRAME_COUNT))}
cap.set(cv2.CAP_PROP_POS_FRAMES, dict['start_frame']) # jump to frame number specified
def run_classifier(img, id):
# gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
cars = car_cascade.detectMultiScale(img, 1.1, 1)
if cars == ():
return False
else:
# parking_status[id] = False
return True
# Define the codec and create VideoWriter object
if dict['save_video']:
fourcc = cv2.VideoWriter_fourcc('X','V','I','D') # options: ('P','I','M','1'), ('D','I','V','X'), ('M','J','P','G'), ('X','V','I','D')
out = cv2.VideoWriter(fn_out, -1, 25.0,(video_info['width'], video_info['height']))
print("save video -- out w * H")
# initialize the HOG descriptor/person detector. Take a lot of processing power.
if dict['pedestrian_detection']:
hog = cv2.HOGDescriptor()
hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())
# Use Background subtraction
if dict['motion_detection']:
fgbg = cv2.createBackgroundSubtractorMOG2(history=300, varThreshold=16, detectShadows=True)
# Read YAML data (parking space polygons)
with open(fn_yaml, 'r') as stream:
parking_data = yaml.load(stream)
parking_contours = []
parking_bounding_rects = []
parking_mask = []
parking_data_motion = []
if parking_data != None:
for park in parking_data:
points = np.array(park['points'])
rect = cv2.boundingRect(points)
points_shifted = points.copy()
points_shifted[:, 0] = points[:, 0] - rect[0] # shift contour to region of interest
points_shifted[:, 1] = points[:, 1] - rect[1]
parking_contours.append(points)
parking_bounding_rects.append(rect)
mask = cv2.drawContours(np.zeros((rect[3], rect[2]), dtype=np.uint8), [points_shifted], contourIdx=-1,
color = 255, thickness=-1, lineType=cv2.LINE_8)
mask = mask == 255
parking_mask.append(mask)
kernel_erode = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(3,3)) # morphological kernel
kernel_dilate = cv2.getStructuringElement(cv2.MORPH_RECT,(5,19))
if parking_data != None:
parking_status = [False]*len(parking_data)
parking_buffer = [None]*len(parking_data)
# bw = ()
def print_parkIDs(park, coor_points, frame_rev):
moments = cv2.moments(coor_points)
centroid = (int(moments['m10']/moments['m00'])-3, int(moments['m01']/moments['m00'])+3)
# putting numbers on marked regions
cv2.putText(frame_rev, str(park['id']), (centroid[0]+1, centroid[1]+1), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255), 1, cv2.LINE_AA)
cv2.putText(frame_rev, str(park['id']), (centroid[0]-1, centroid[1]-1), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255), 1, cv2.LINE_AA)
cv2.putText(frame_rev, str(park['id']), (centroid[0]+1, centroid[1]-1), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255), 1, cv2.LINE_AA)
cv2.putText(frame_rev, str(park['id']), (centroid[0]-1, centroid[1]+1), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255), 1, cv2.LINE_AA)
cv2.putText(frame_rev, str(park['id']), centroid, cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,0), 1, cv2.LINE_AA)
while(cap.isOpened()):
video_cur_pos = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000.0 # Current position of the video file in seconds
video_cur_frame = cap.get(cv2.CAP_PROP_POS_FRAMES) # Index of the frame to be decoded/captured next
ret, frame_initial = cap.read()
if ret == True:
frame = cv2.resize(frame_initial, None, fx=0.6, fy=0.6)
if ret == False:
print("Video ended")
break
# Background Subtraction
frame_blur = cv2.GaussianBlur(frame.copy(), (5,5), 3)
# frame_blur = frame_blur[150:1000, 100:1800]
frame_gray = cv2.cvtColor(frame_blur, cv2.COLOR_BGR2GRAY)
frame_out = frame.copy()
# Drawing the Overlay. Text overlay at the left corner of screen
if dict['text_overlay']:
str_on_frame = "%d/%d" % (video_cur_frame, video_info['num_of_frames'])
cv2.putText(frame_out, str_on_frame, (5, 30), cv2.FONT_HERSHEY_SIMPLEX,
0.8, (0, 255, 255), 2, cv2.LINE_AA)
cv2.putText(frame_out,global_str + str(round(change_pos, 2)) + 'sec', (5, 60), cv2.FONT_HERSHEY_SIMPLEX,
0.8, (255, 0, 0), 2, cv2.LINE_AA)
# motion detection for all objects
if dict['motion_detection']:
# frame_blur = frame_blur[380:420, 240:470]
# cv2.imshow('dss', frame_blur)
fgmask = fgbg.apply(frame_blur)
bw = np.uint8(fgmask==255)*255
bw = cv2.erode(bw, kernel_erode, iterations=1)
bw = cv2.dilate(bw, kernel_dilate, iterations=1)
# cv2.imshow('dss',bw)
# cv2.imwrite("frame%d.jpg" % co, bw)
(_, cnts, _) = cv2.findContours(bw.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# loop over the contours
for c in cnts:
# print(cv2.contourArea(c))
# if the contour is too small, we ignore it
if cv2.contourArea(c) < dict['min_area_motion_contour']:
continue
(x, y, w, h) = cv2.boundingRect(c)
cv2.rectangle(frame_out, (x, y), (x + w, y + h), (255, 0, 0), 1)
# detecting cars and vacant spaces
if dict['parking_detection']:
for ind, park in enumerate(parking_data):
points = np.array(park['points'])
rect = parking_bounding_rects[ind]
roi_gray = frame_gray[rect[1]:(rect[1]+rect[3]), rect[0]:(rect[0]+rect[2])] # crop roi for faster calcluation
laplacian = cv2.Laplacian(roi_gray, cv2.CV_64F)
# cv2.imshow('oir', laplacian)
points[:, 0] = points[:, 0] - rect[0] # shift contour to roi
points[:, 1] = points[:, 1] - rect[1]
delta = np.mean(np.abs(laplacian * parking_mask[ind]))
# if(delta<2.5):
# print("ind, del", ind, delta)
status = delta < dict['park_laplacian_th']
# If detected a change in parking status, save the current time
if status != parking_status[ind] and parking_buffer[ind]==None:
parking_buffer[ind] = video_cur_pos
change_pos = video_cur_pos
# print("state ", ind,delta)
# applying classifier in case a change is detected in the status of area
# if dict['classifier_used']:
# classifier_result = run_classifier(roi_gray)
# if classifier_result:
# print(classifier_result)
# If status is still different than the one saved and counter is open
elif status != parking_status[ind] and parking_buffer[ind] != None:
if video_cur_pos - parking_buffer[ind] > dict['park_sec_to_wait']:
parking_status[ind] = status
parking_buffer[ind] = None
# If status is still same and counter is open
elif status == parking_status[ind] and parking_buffer[ind] != None:
parking_buffer[ind] = None
# changing the color on the basis on status change occured in the above section and putting numbers on areas
if dict['parking_overlay']:
for ind, park in enumerate(parking_data):
points = np.array(park['points'])
if parking_status[ind]:
color = (0, 255, 0)
rect = parking_bounding_rects[ind]
roi_gray_ov = frame_gray[rect[1]:(rect[1] + rect[3]),
rect[0]:(rect[0] + rect[2])] # crop roi for faster calcluation
res = run_classifier(roi_gray_ov, ind)
if res:
parking_data_motion.append(parking_data[ind])
# del parking_data[ind]
color = (0, 0, 255)
else:
color = (0, 0, 255)
cv2.drawContours(frame_out, [points], contourIdx=-1,
color=color, thickness=2, lineType=cv2.LINE_8)
if dict['show_ids']:
print_parkIDs(park, points, frame_out)
if parking_data_motion != []:
for index, park_coord in enumerate(parking_data_motion):
points = np.array(park_coord['points'])
color = (0, 0, 255)
recta = parking_bounding_rects[ind]
roi_gray1 = frame_gray[recta[1]:(recta[1] + recta[3]),
recta[0]:(recta[0] + recta[2])] # crop roi for faster calcluation
# laplacian = cv2.Laplacian(roi_gray, cv2.CV_64F)
# delta2 = np.mean(np.abs(laplacian * parking_mask[ind]))
# state = delta2<1
# classifier_result = run_classifier(roi_gray1, index)
# cv2.imshow('dsd', roi_gray1)
fgbg1 = cv2.createBackgroundSubtractorMOG2(history=300, varThreshold=16, detectShadows=True)
roi_gray1_blur = cv2.GaussianBlur(roi_gray1.copy(), (5, 5), 3)
# cv2.imshow('sd', roi_gray1_blur)
fgmask1 = fgbg1.apply(roi_gray1_blur)
bw1 = np.uint8(fgmask1 == 255) * 255
bw1 = cv2.erode(bw1, kernel_erode, iterations=1)
bw1 = cv2.dilate(bw1, kernel_dilate, iterations=1)
# cv2.imshow('sd', bw1)
# cv2.imwrite("frame%d.jpg" % co, bw)
(_, cnts1, _) = cv2.findContours(bw1.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# loop over the contours
for c in cnts1:
print(cv2.contourArea(c))
# if the contour is too small, we ignore it
if cv2.contourArea(c) < 4:
continue
(x, y, w, h) = cv2.boundingRect(c)
classifier_result1 = run_classifier(roi_gray1, index)
if classifier_result1:
# print(classifier_result)
color = (0, 0, 255) # Red again if car found by classifier
else:
color = (0,255, 0)
classifier_result1 = run_classifier(roi_gray1, index)
if classifier_result1:
# print(classifier_result)
color = (0, 0, 255) # Red again if car found by classifier
else:
color = (0, 255, 0)
cv2.drawContours(frame_out, [points], contourIdx=-1,
color=color, thickness=2, lineType=cv2.LINE_8)
if dict['pedestrian_detection']:
# detect people in the image. Slows down the program, requires high GPU speed
(rects, weights) = hog.detectMultiScale(frame, winStride=(4, 4), padding=(8, 8), scale=1.05)
# draw the bounding boxes
for (x, y, w, h) in rects:
cv2.rectangle(frame_out, (x, y), (x + w, y + h), (255, 0, 0), 2)
# write the output frames
if dict['save_video']:
#if video_cur_frame % 35 == 0: # take every 30 frames
out.write(frame_out)
# Display video
cv2.imshow('frame', frame_out)
# cv2.imshow('background mask', bw)
k = cv2.waitKey(1)
if k == ord('q'):
break
elif k == ord('c'):
cv2.imwrite('frame%d.jpg' % video_cur_frame, frame_out)
elif k == ord('j'):
cap.set(cv2.CAP_PROP_POS_FRAMES, video_cur_frame+1000) # jump 1000 frames
elif k == ord('u'):
cap.set(cv2.CAP_PROP_POS_FRAMES, video_cur_frame + 500) # jump 500 frames
if cv2.waitKey(33) == 27:
break
cv2.waitKey(0)
cap.release()
if dict['save_video']: out.release()
cv2.destroyAllWindows()
change your these lines
`if dict['save_video']:
fourcc = cv2.VideoWriter_fourcc('X','V','I','D') # options: ('P','I','M','1'), ('D','I','V','X'), ('M','J','P','G'), ('X','V','I','D')
out = cv2.VideoWriter(fn_out, -1, 25.0,(video_info['width'], video_info['height']))`
to
`if dict['save_video']:
fourcc = cv2.VideoWriter_fourcc(*'XVID') # options: ('P','I','M','1'), ('D','I','V','X'), ('M','J','P','G'), ('X','V','I','D')
out = cv2.VideoWriter(fn_out, fourcc, 25.0,(video_info['width'], video_info['height']))`
and try again
Also put your functions/methods definitions to the top of the code.
I'm currently combing two examples from OpenCv which let you detect your face and track object. The purpose is to first detect the face and then track it.
My code currently :
import numpy as np
import cv2
import cv2.cv as cv
import video
import math
cascade = 0
counter = 0
class App(object):
def __init__(self, video_src):
self.cam = video.create_capture(video_src)
ret, self.frame = self.cam.read()
cv2.namedWindow('camshift')
self.selection = None
self.drag_start = None
self.tracking_state = 0
self.show_backproj = False
def show_hist(self):
bin_count = self.hist.shape[0]
bin_w = 24
img = np.zeros((256, bin_count*bin_w, 3), np.uint8)
for i in xrange(bin_count):
h = int(self.hist[i])
cv2.rectangle(img, (i*bin_w+2, 255), ((i+1)*bin_w-2, 255-h), (int(180.0*i/bin_count), 255, 255), -1)
img = cv2.cvtColor(img, cv2.COLOR_HSV2BGR)
cv2.imshow('hist', img)
'''
#param: img the image for the face detection
#param: cascade the cascade of the ViolaJones face detection
#return: rects, an array of the cornors of the detected face. [x1 y1 x2 y2]
'''
def detect(self,img, cascade):
# Detect the faces
rects = cascade.detectMultiScale(img, scaleFactor=1.1, minNeighbors=3, minSize=(150, 150), flags = cv.CV_HAAR_SCALE_IMAGE)
# Check if any faces are detected
if len(rects) == 0:
# return empty array
return []
else:
# Get the correct x and y values
rects[:,2:] += rects[:,:2]
# loop over the recs and shrink the width with 40%
for rec in rects:
rec[0] = rec[0] + int(math.floor(((rec[2] - rec[0])*0.4)/2))
rec[2] = rec[2] - int(math.floor(((rec[2] - rec[0])*0.4)/2))
return rects
def draw_rects(self,img, rects, color):
for x1, y1, x2, y2 in rects:
cv2.rectangle(img, (x1, y1), (x2, y2), color, 2)
def getFace(self,img):
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray = cv2.equalizeHist(gray)
rects = self.detect(gray, cascade)
self.rects = rects
img = self.draw_rects(img, rects, (0, 255, 0))
if len(rects) != 0:
self.selection = rects[0][1], rects[0][0], rects[0][3], rects[0][2]
return rects
def run(self):
counter= 0
rects = None
while True:
counter +=1;
ret, self.frame = self.cam.read()
vis = self.frame.copy()
if counter % 150 == 0:
rects = self.getFace(vis);
hsv = cv2.cvtColor(self.frame, cv2.COLOR_BGR2HSV)
mask = cv2.inRange(hsv, np.array((0., 60., 32.)), np.array((180., 255., 255.)))
if rects is not None:
self.draw_rects(vis, rects, (0, 255, 0))
if self.selection:
print 'test0'
x0, y0, x1, y1 = self.selection
self.track_window = (x0, y0, x1-x0, y1-y0)
hsv_roi = hsv[x0:x1,y0:y1]
mask_roi = mask[x0:x1,y0:y1]
hist = cv2.calcHist( [hsv_roi], [0], mask_roi, [16], [0, 180] )
cv2.normalize(hist, hist, 0, 255, cv2.NORM_MINMAX);
self.hist = hist.reshape(-1)
self.show_hist()
vis_roi = vis[x0:x1,y0:y1]
cv2.bitwise_not(vis_roi, vis_roi)
vis[mask == 0] = 0
self.tracking_state = 1
self.selection = None
if self.tracking_state == 1:
self.selection = None
prob = cv2.calcBackProject([hsv], [0], self.hist, [0, 180], 1)
prob &= mask
term_crit = ( cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 1 )
track_box, self.track_window = cv2.CamShift(prob, self.track_window, term_crit)
if self.show_backproj:
vis[:] = prob[...,np.newaxis]
try: cv2.ellipse(vis, track_box, (0, 0, 255), 2)
except: print track_box
cv2.imshow('camshift', vis)
ch = 0xFF & cv2.waitKey(5)
if ch == 27:
break
if ch == ord('b'):
self.show_backproj = not self.show_backproj
cv2.destroyAllWindows()
if __name__ == '__main__':
import sys, getopt
args, video_src = getopt.getopt(sys.argv[1:], '', ['cascade=', 'nested-cascade='])
try: video_src = video_src[0]
except: video_src = 0
args = dict(args)
cascade_fn = args.get('--cascade', "haarcascade_frontalface_alt.xml")
cascade = cv2.CascadeClassifier(cascade_fn)
App(video_src).run()
Currently I show where the face was initially (in a green rectangle) and what is tracked at the moment (in a red oval). I am able to detect the face, but the face tracker keeps tracking all other stuff except for my face (always on one or two shoulders). I suspected it had something to do with the coordinates, but I've checked them and they seem fine (mask_roi, hsv_roi, vis_roi). An example :
Can anybody point out my mistake ?
I was unable to run your code (no module named video). I'm using OpenCV 2.4.4 and my solution to your problem is as follows:
Make sure your face is properly lit (no shadows, bright natural skin color, dark background helps a lot)
play with hsv values in mask = cv2.inRange(hsv, np.array((0., 60., 32.)), np.array((180., 255., 255.))). I'm using those: np.array((0., 51., 89.)), np.array((17., 140., 255.))
Tip:
you could make a window just for your mask so you can see how well it works
after: cv2.namedWindow('camshift') put cv2.namedWindow('mask')
and after: mask = cv2.inRange... put cv2.imshow('mask', mask) or mask_roi.