OpenCV: lossless capture? - python

I wrote a simple python program that uses OpenCV and scantailor-cli to scan documents with my document camera and store them as high-contrast, low filesize PDFs.
Approach:
capture the document in full color with OpenCV at 1920x1080 in JPG format
convert to high-contrast black-and-white PDF with scantailor-cli
This generally works, however I have discovered an issue with my usage of OpenCV that is limiting the quality of my scans. I'm hoping someone here can give me the nudge I need to improve this.
When I use the UI-based capture software that came with my IPEVO document camera (Visualizer), I get a nice, clearn capture that makes a great conversion into high-contrast. When I use OpenCV, I get unwanted compression artifacts and conversion to high-contrast yields unsatisfactory results.
Full program listing is here: https://github.com/jonashw/Document-Scanner/blob/master/scan.py
Relevant excerpt below
def do_camera(device_id):
cam = cv2.VideoCapture(device_id)#,cv2.CAP_DSHOW)
w,h = 1920,1080
cam.set(cv2.CAP_PROP_FRAME_WIDTH, w)
cam.set(cv2.CAP_PROP_FRAME_HEIGHT, h)
cam.set(cv2.CAP_PROP_FORMAT, 0)
w = cam.get(cv2.CAP_PROP_FRAME_WIDTH)
h = cam.get(cv2.CAP_PROP_FRAME_HEIGHT)
grab_mode = cam.get(cv2.CAP_PROP_MODE)
format = cam.get(cv2.CAP_PROP_FORMAT)
print("capturing with w={}, h={}, grab_mode={}, format={}".format(w,h,grab_mode, format))
#ok = False
#i = -100
#while (not ok) and i < 10:
#if i != 0:
#print("setting grab_mode {}".format(grab_mode + i))
#ok = cam.set(cv2.CAP_PROP_MODE, grab_mode + i)
#i += 1
#if ok:
#gm = cam.get(cv2.CAP_PROP_MODE)
#printf("Grab mode = {}", format(gm))
cv2.namedWindow("test", cv2.WINDOW_NORMAL)
img_counter = 0
img_paths = []
while True:
ret, frame = cam.read()
if not ret:
print("failed to grab frame")
break
cv2.imshow("test", frame)
cv2.resizeWindow("test",1280,720)
cv2.moveWindow("test",1920,0)
k = cv2.waitKey(1)
if k%256 == 27:
# ESC pressed
print("Escape hit, closing...")
break
elif k%256 == 32:
# SPACE pressed
img_name = "capture_{}.png".format(img_counter)
img_path = os.path.join(capture_path, img_name)
img_paths.append(img_path)
os.makedirs(capture_path, exist_ok=True)
cv2.imwrite(img_path, frame, [cv2.IMWRITE_PNG_COMPRESSION, 0])
print("{} written!".format(img_name))
img_counter += 1
cam.release()
cv2.destroyAllWindows()
return img_paths
I suspect the exact line I need to alter is this one.
cv2.imwrite(img_path, frame, [cv2.IMWRITE_PNG_COMPRESSION, 0])
As you can see, I have opted for PNG format, which should not result in any JPEG-like compression artifacts. Despite this selection, I get artifacts.
I have also tried embracing JPEG and setting the quality to max. This does not improve matters:
cv2.imwrite(img_path, frame, [cv2.IMWRITE_JPEG_QUALITY, 100])
What can I do? Please and thank you

Related

cv2.imwrite( ) only saves last image

I am trying to build a short script to take multiple images with my stereo-camera and save them
to a directory when I press a button.
But for some reason I only get the last image, even if I've taken multiple pictures.
It also shows no errors and prints out the right strings as I've written in my code.
But all I get is the last image pair.
I've looked at several posts but none of them have the same problem.
EDIT: I didnt add Seconds to the image names as everyone here suggested, so the images got overwritten as they were taken during the same minute.
Here is my finished code now if anyone wants to use it:
import numpy as np
import cv2
import os
import datetime
# shape of wholeFrame: (240, 640, 3)
cap = cv2.VideoCapture(1)
now = 0
imgs_taken = 0
newpath_l = "Recorded_Images/left_imgs"
newpath_r = "Recorded_Images/right_imgs"
newpath = "Recorded_Images/both_imgs"
if not os.path.exists(newpath_l):
os.makedirs(newpath_l)
if not os.path.exists(newpath_r):
os.makedirs(newpath_r)
if not os.path.exists(newpath):
os.makedirs(newpath)
while 1:
cap.grab()
ret, wholeFrame = cap.retrieve()
if ret:
leftFrame = wholeFrame[:, 0:320, :]
rightFrame = wholeFrame[:, 320:640, :]
# Rectifying images here
stereoImage = np.concatenate((leftFrame, rightFrame), axis=1)
cv2.imshow('Take Snapshots', stereoImage)
key = cv2.waitKey(1) & 0xFF
# Saving images on keypress with timestamp
if key == ord('p'):
now = datetime.datetime.now()
if not cv2.imwrite(newpath_l + now.strftime("/img_left_%d%m%Y_%H%M%S.png"), leftFrame):
print("Left Snapshot not taken")
else:
print("Left Snapshot taken.")
if not cv2.imwrite(newpath_r + now.strftime("/img_right_%d%m%Y_%H%M%S.png"), rightFrame):
print("Right Snapshot not taken")
else:
print("Right Snapshot taken.")
if not cv2.imwrite(newpath + now.strftime("/img_both_%d%m%Y_%H%M%S.png"), stereoImage):
print("Stereo-Snapshot not taken")
else:
print("Stereo-Snapshot taken.")
imgs_taken = imgs_taken + 1
if key == ord('x'):
print("Number of images taken: " + str(imgs_taken))
break
else:
break
cv2.destroyAllWindows()
cap.release()
There is no problem with cv2.imwrite itself, but rather in how you name the frames that you are saving. You are naming the frames as Day + Month + Year _ Hour + Minute. This means any frame you save within a given minute will be overridden by the last frame saved in that minute. Saving a frame at, say, 19:00:23 will be overridden by a frame saved at 19:00:34.
Depending on your use-case, you could add + now.strftime("%S") to be able to save one frame every second, or you could even add + now.strftime("%S_%f") for millisecond precision.

OpenCV + RaspberyPI like a room monitor

I'm new using opencv and python, my project its about a smarthome.
I managed to install opencv on a raspberrypi and using a webcam.
My program will work on the following three scenarios.
1.A person enters the room, detects face and person, sends message "Dad is in room 1."
2.A person enters the room, detects face but not person, sends message "Unknown person is in room 1"
3. No one is in the room, send a message "No one is in room 1"
Scenarios 1 and 2 I have idea how to solve them, but where I am stuck is in the esceanrio 3. I tried to save the name of the detected person in a variable, if this is empty should send the message, but it has not worked for me.
The code I am using is the following, the problem I have is at the end of the code:
import cv2, sys, numpy, os
size = 1
fn_haar = 'haarcascade_frontalface_default.xml'
fn_dir = 'att_faces'
# Part 1: Create fisherRecognizer
print('Training...')
# Create a list of images and a list of corresponding names
(images, lables, names, id) = ([], [], {}, 0)
# Get the folders containing the training data
for (subdirs, dirs, files) in os.walk(fn_dir):
# Loop through each folder named after the subject in the photos
for subdir in dirs:
names[id] = subdir
subjectpath = os.path.join(fn_dir, subdir)
# Loop through each photo in the folder
for filename in os.listdir(subjectpath):
# Skip non-image formates
f_name, f_extension = os.path.splitext(filename)
if(f_extension.lower() not in
['.png','.jpg','.jpeg','.gif','.pgm']):
print("Skipping "+filename+", wrong file type")
continue
path = subjectpath + '/' + filename
lable = id
# Add to training data
images.append(cv2.imread(path, 0))
lables.append(int(lable))
id += 1
(im_width, im_height) = (112, 92)
# Create a Numpy array from the two lists above
(images, lables) = [numpy.array(lis) for lis in [images, lables]]
# OpenCV trains a model from the images
model = cv2.face.createFisherFaceRecognizer()
model.train(images, lables)
# Part 2: Use fisherRecognizer on camera stream
haar_cascade = cv2.CascadeClassifier(fn_haar)
webcam = cv2.VideoCapture(0)
while True:
# Loop until the camera is working
rval = False
while(not rval):
# Put the image from the webcam into 'frame'
(rval, frame) = webcam.read()
if(not rval):
print("Failed to open webcam. Trying again...")
# Flip the image (optional)
frame=cv2.flip(frame,1,0)
# Convert to grayscalel
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# Resize to speed up detection (optinal, change size above)
mini = cv2.resize(gray, (int(gray.shape[1] / size), int(gray.shape[0] / size)))
# Detect faces and loop through each one
faces = haar_cascade.detectMultiScale(mini)
for i in range(len(faces)):
face_i = faces[i]
# Coordinates of face after scaling back by `size`
(x, y, w, h) = [v * size for v in face_i]
face = gray[y:y + h, x:x + w]
face_resize = cv2.resize(face, (im_width, im_height))
# Try to recognize the face
prediction = model.predict(face_resize)
cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 3)
# [1]
# Write the name of recognized face
cv2.putText(frame,
'%s - %.0f' % (names[prediction[0]],prediction[1]),
(x-10, y-10), cv2.FONT_HERSHEY_PLAIN,1,(0, 255, 0))
face = '%S' % (names[prediction[0]]) #Guardar nombre en variable
#Start to validate the name
if face != "" : #If a name is detected
print(face + "Is in the room..") #Print the name in terminal
elif face == "" : #If a name is not detected
print("The room is empty...") #Print the text in terminal
#This last part is where I have problem, when a face is not detected, the text is not printed in the terminal
# Show the image and check for ESC being pressed
cv2.imshow('OpenCV', frame)
key = cv2.waitKey(10)
if key == 27:
break
The code I am using is based on the following tutorial: Face Detection
Any help is appreciated, thank you. Greetings
If there are no faces detected in the room, your code does not enter the for i in range(len(faces)) loop, which by the way could be simplified to for i in faces.
Thus, an else at the end of your for loop solves your problem:
for face in faces:
do_stuff(face)
else:
print("room is empty")

Extracting keyframes | Python | Opencv

I am currently working on keyframe extraction from videos.
Code :
while success:
success, currentFrame = vidcap.read()
isDuplicate = False
limit = count if count <= 10 else (count - 10)
for img in xrange(limit, count):
previusFrame = cv2.imread("%sframe-%d.png" % (outputDir, img))
try:
difference = cv2.subtract(currentFrame, previusFrame)
except:
pass
This gives me huge amounts of frames.
Expected ouput: Calculate pixel difference between frames and then compare it with a threshold value and store unique keyframes.
Working on videos for the first time. please guide on how to proceed to achieve the expected output
Here is a script to extract I-frames with ffprobe and OpenCV:
import os
import cv2
import subprocess
filename = '/home/andriy/Downloads/video.mp4'
def get_frame_types(video_fn):
command = 'ffprobe -v error -show_entries frame=pict_type -of default=noprint_wrappers=1'.split()
out = subprocess.check_output(command + [video_fn]).decode()
frame_types = out.replace('pict_type=','').split()
return zip(range(len(frame_types)), frame_types)
def save_i_keyframes(video_fn):
frame_types = get_frame_types(video_fn)
i_frames = [x[0] for x in frame_types if x[1]=='I']
if i_frames:
basename = os.path.splitext(os.path.basename(video_fn))[0]
cap = cv2.VideoCapture(video_fn)
for frame_no in i_frames:
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_no)
ret, frame = cap.read()
outname = basename+'_i_frame_'+str(frame_no)+'.jpg'
cv2.imwrite(outname, frame)
print ('Saved: '+outname)
cap.release()
else:
print ('No I-frames in '+video_fn)
if __name__ == '__main__':
save_i_keyframes(filename)
You can change 'I' to 'P' if you need to extract P-frames.

ipython cv2.imwrite() doesn't work - produces no errors, but also no files

I have written some face detection and normalization code - this part takes 20 photos of the face and saves it to a folder.
I have tried several times to make it save but the cv2.imwrite doesn't work. It did it once but now it doesn't let me find the photos.
I've already implement some ideas from other problems from stack overflow but it still doesn't work.
When I run the code there are no errors or anything, it just doesn't save the photos.
This is the python code and I run it via ipython notebook:
folder = "C:\Users\John\people" + raw_input('Person: ').lower() # input name
cv2.namedWindow("PyData Tutorial", cv2.WINDOW_AUTOSIZE)
if not os.path.exists(folder):
os.mkdir(folder)
counter = 1
timer = 0
while counter < 21 : # take 20 pictures
frame = webcam.get_frame()
faces_coord = detector.detect(frame) # detect
if len(faces_coord) and timer % 700 == 50: # every Second or so
faces = normalize_faces(frame, faces_coord) # norm pipeline
cv2.imwrite(folder + 'C:\Users\John\people' + str(counter) + '.jpg', faces[0])
plt_show(faces[0], "Images Saved:" + str(counter))
clear_output(wait = True) # saved face in notebook
counter += 1
draw_rectangle(frame, faces_coord) # rectangle around face
cv2.imshow("PyData Tutorial", frame) # live feed in external
cv2.waitKey(50)
timer += 50
cv2.destroyAllWindows()
else:
print "This name already exists."
Thank you.

Python OpenCV: Refresh images when a key is pressed

I have developed a program using python opencv2 module.
The program uploads an image whenever a key is pressed.
Here's the Pseudo code:
import cv2
from msvcrt import getch
while True:
k = getch()
if k == 'w':
img = cv2.imread(filedir + orange.jpg)
cv2.namedWindow
cv2.imshow(img)
waitkey()
destroyAllWindows
elif k == 'a'
img = cv2.imread(filedir + banana.jpg)
cv2.namedWindow
cv2.imshow(img)
waitkey()
destroyAllWindows
This is self explanatory, as i am trying to upload an 'orange.jpg' file, when 'w' is pressed.
My real question is: How to design the program in such a manner, that user doesn't have to press the key twice, one key press closes the image file, and other key press opens the file. This fails the design, as I want processing to happen in one single keystroke. Even if user presses 'w' and 'orange.jpg' is already uploaded, instead of closing this file, the file should get refreshed. Similarly, when user presses 'a', and the 'orange.jpg' is open, then the 'orange.jpg' file should gets closed and banana.jpg should get open automatically, and this should be one time operation. As of now, I have to press the keys two times, to perform this task.
I have the code implemented, so even if someone suggests me to go to pygtk and upload image by pressing key using that, I have no issues. My only goal is to destroy the images uploaded without much of user interference, i.e. the processing should appear autonomous.
As beark has said, that using getch() in the program means that focus will be always on the console. I was not satisfied with this, and wanted only the images to upload by pressing keys, but console was hindering this action.
Thanks.
First, get rid of the getch(). It will only work while the console window has the focus, which is not really portable.
Use waitKey() instead:
import cv2
cv2.namedWindow("lala")
img = cv2.imread(filedir + orange.jpg) # load initial image
while True:
cv2.imshow("lala", img)
# The function waitKey waits for a key event infinitely (when delay<=0)
k = chr(cv2.waitKey(100))
if k == 'w': # toggle current image
img = cv2.imread(filedir + orange.jpg)
elif k == 'a':
img = cv2.imread(filedir + banana.jpg)
elif k == 27: #escape key
break
cv2.destroyAllWindows()
I have solved the issue:
import sys
import cv2
import os
def main():
File_Lst =[]
plat = sys.platform
#print plat
if plat == 'win32': #for windows operating system
File_dir = "C:\\Users\\user\\Desktop\\fruit\\"
elif plat == 'linux2': # for linux
File_dir = "/host/Users/user/Desktop/fruit/"
for file in os.listdir(File_dir):
File_Lst.append(file)
print File_Lst
welcome_index = File_Lst.index('welcome.jpg')
welcome_str = File_Lst[welcome_index]
orange_index = File_Lst.index('orange.jpg')
orange_str = File_Lst[orange_index]
apple_index = File_Lst.index('apple.jpg')
apple_str = File_Lst[apple_index]
banana_index = File_Lst.index('banana.jpg')
banana_str = File_Lst[banana_index]
doughnuts_index = File_Lst.index('doughnuts.jpg')
doughnuts_str = File_Lst[doughnuts_index]
img = cv2.imread(File_dir + welcome_str )
cv2.destroyAllWindows()
cv2.imshow("Press KEYS to know which food is good or bad", img)
while True:
k = cv2.waitKey(0)
if k == ord('w'): # wait for 'w' key to upload orange nutrition information
img = cv2.imread(File_dir + orange_str)
newx,newy = img.shape[1]/2,img.shape[0]/2 #new size (w,h)
img = cv2.resize(img,(newx,newy))
cv2.destroyAllWindows()
cv2.imshow("Orange Nutritional Information", img)
elif k == ord('a'): # wait for 'w' key to upload apple nutrition information
img = cv2.imread(File_dir + apple_str)
newx,newy = img.shape[1]/2,img.shape[0]/2 #new size (w,h)
img = cv2.resize(img,(newx,newy))
cv2.destroyAllWindows()
cv2.imshow("Apple Nutritional Information", img)
elif k == ord('s'): # wait for 'w' key to upload apple nutrition information
img = cv2.imread(File_dir + banana_str)
newx,newy = img.shape[1]/2,img.shape[0]/2 #new size (w,h)
img = cv2.resize(img,(newx,newy))
cv2.destroyAllWindows()
cv2.imshow("Banana Nutritional Information", img)
elif k == 32:
break
cv2.destroyAllWindows()
else:
img = cv2.imread(File_dir + doughnuts_str)
cv2.destroyAllWindows()
cv2.imshow("Bad, Have good eating habits CHUMP", img)
continue
main()
I am destroying the window for every image show, this way, the consistency that every key strokes corresponds to fresh image upload gets maintained

Categories