Extracting keyframes | Python | Opencv - python

I am currently working on keyframe extraction from videos.
Code :
while success:
success, currentFrame = vidcap.read()
isDuplicate = False
limit = count if count <= 10 else (count - 10)
for img in xrange(limit, count):
previusFrame = cv2.imread("%sframe-%d.png" % (outputDir, img))
try:
difference = cv2.subtract(currentFrame, previusFrame)
except:
pass
This gives me huge amounts of frames.
Expected ouput: Calculate pixel difference between frames and then compare it with a threshold value and store unique keyframes.
Working on videos for the first time. please guide on how to proceed to achieve the expected output

Here is a script to extract I-frames with ffprobe and OpenCV:
import os
import cv2
import subprocess
filename = '/home/andriy/Downloads/video.mp4'
def get_frame_types(video_fn):
command = 'ffprobe -v error -show_entries frame=pict_type -of default=noprint_wrappers=1'.split()
out = subprocess.check_output(command + [video_fn]).decode()
frame_types = out.replace('pict_type=','').split()
return zip(range(len(frame_types)), frame_types)
def save_i_keyframes(video_fn):
frame_types = get_frame_types(video_fn)
i_frames = [x[0] for x in frame_types if x[1]=='I']
if i_frames:
basename = os.path.splitext(os.path.basename(video_fn))[0]
cap = cv2.VideoCapture(video_fn)
for frame_no in i_frames:
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_no)
ret, frame = cap.read()
outname = basename+'_i_frame_'+str(frame_no)+'.jpg'
cv2.imwrite(outname, frame)
print ('Saved: '+outname)
cap.release()
else:
print ('No I-frames in '+video_fn)
if __name__ == '__main__':
save_i_keyframes(filename)
You can change 'I' to 'P' if you need to extract P-frames.

Related

how do i extract every #nth frame from a video with a python script?

how do i extract every #nth frame from a video with a python script?
the one where I don't extract every int(args.framegap) doesn't hang so could anyone show me what i'm doing wrong?
this is my script but it hangs on extracting frames, anyone who knows why or how to fix this?
i also want them named properly, for example if i extract every 5 frames they should be named 001.png , 006.png , 011.png etc..
thanks
import argparse
import cv2
import time
import os
import shutil
parser = argparse.ArgumentParser(description='arguments')
parser.add_argument('--videofile', type=str, help='path to your video file, for example --videofile C:\file\video\extract\video.mp4')
parser.add_argument('--projectname', type=str, help='name of the project to create the directories')
parser.add_argument('--framegap', type=int, help='name of the project to create the directories')
args = parser.parse_args()
doc_path = os.path.expanduser('~\Documents')
data_path = os.path.expanduser('~\Documents\\visionsofchaos\\fewshot\\data')
train_filtered = data_path+str(args.projectname)+'_train'+'\\'+'input_filtered'
#take every #nth frame
def video_to_frames(input_loc, output_loc):
"""Function to extract frames from input video file
and save them as separate frames in an output directory.
Args:
input_loc: Input video file.
output_loc: Output directory to save the frames.
Returns:
None
"""
try:
os.mkdir(output_loc)
except OSError:
pass
# Log the time
time_start = time.time()
# Start capturing the feed
cap = cv2.VideoCapture(input_loc)
# Find the number of frames
video_length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) - 1
print ("Number of frames: ", video_length)
count = 0
print ("Converting video..\n")
# Start converting the video
while cap.isOpened():
# Extract the frame
ret, frame = cap.read()
if not ret:
continue
# Write the results back to output location.
if count % int(args.framegap) == 0:
cv2.imwrite(train_filtered + "/%#03d.png" % (count+1), frame)
count += 1
# If there are no more frames left
if (count > (video_length-1)):
# Log the time again
time_end = time.time()
# Release the feed
cap.release()
# Print stats
print ("Done extracting frames.\n%d frames extracted" % count)
print ("It took %d seconds forconversion." % (time_end-time_start))
break
if __name__=="__main__":
input_loc = args.videofile
output_loc = data_path + '\\' + args.projectname + '_gen\\input_filtered'
video_to_frames(input_loc, output_loc)
All problem makes wrong indentations
You have to run count += 1 outside if count % int(args.framegap) == 0:
You run it inside if so after first frame it changes to count = 1 and later if doesn't allow to run anoter count += 1 so it is always count = 1 and this blocks all code.
count = -1
# loop
count += 1
# --- the same indentations ---
if count % int(args.framegap) == 0:
#.... code ...
or
count = 0
# loop
if count % int(args.framegap) == 0:
#.... code ...
# --- the same indentations ---
count += 1
I think you should also change indentations with another if (count > (video_length-1)) because it is also inside if count % int(args.framegap) == 0: and it can make problems
count = 0
# loop
if count % int(args.framegap) == 0:
#.... code ...
# --- the same indentations ---
count += 1
# --- the same indentations ---
if count > (video_length-1):
#.... code ...

OpenCV: lossless capture?

I wrote a simple python program that uses OpenCV and scantailor-cli to scan documents with my document camera and store them as high-contrast, low filesize PDFs.
Approach:
capture the document in full color with OpenCV at 1920x1080 in JPG format
convert to high-contrast black-and-white PDF with scantailor-cli
This generally works, however I have discovered an issue with my usage of OpenCV that is limiting the quality of my scans. I'm hoping someone here can give me the nudge I need to improve this.
When I use the UI-based capture software that came with my IPEVO document camera (Visualizer), I get a nice, clearn capture that makes a great conversion into high-contrast. When I use OpenCV, I get unwanted compression artifacts and conversion to high-contrast yields unsatisfactory results.
Full program listing is here: https://github.com/jonashw/Document-Scanner/blob/master/scan.py
Relevant excerpt below
def do_camera(device_id):
cam = cv2.VideoCapture(device_id)#,cv2.CAP_DSHOW)
w,h = 1920,1080
cam.set(cv2.CAP_PROP_FRAME_WIDTH, w)
cam.set(cv2.CAP_PROP_FRAME_HEIGHT, h)
cam.set(cv2.CAP_PROP_FORMAT, 0)
w = cam.get(cv2.CAP_PROP_FRAME_WIDTH)
h = cam.get(cv2.CAP_PROP_FRAME_HEIGHT)
grab_mode = cam.get(cv2.CAP_PROP_MODE)
format = cam.get(cv2.CAP_PROP_FORMAT)
print("capturing with w={}, h={}, grab_mode={}, format={}".format(w,h,grab_mode, format))
#ok = False
#i = -100
#while (not ok) and i < 10:
#if i != 0:
#print("setting grab_mode {}".format(grab_mode + i))
#ok = cam.set(cv2.CAP_PROP_MODE, grab_mode + i)
#i += 1
#if ok:
#gm = cam.get(cv2.CAP_PROP_MODE)
#printf("Grab mode = {}", format(gm))
cv2.namedWindow("test", cv2.WINDOW_NORMAL)
img_counter = 0
img_paths = []
while True:
ret, frame = cam.read()
if not ret:
print("failed to grab frame")
break
cv2.imshow("test", frame)
cv2.resizeWindow("test",1280,720)
cv2.moveWindow("test",1920,0)
k = cv2.waitKey(1)
if k%256 == 27:
# ESC pressed
print("Escape hit, closing...")
break
elif k%256 == 32:
# SPACE pressed
img_name = "capture_{}.png".format(img_counter)
img_path = os.path.join(capture_path, img_name)
img_paths.append(img_path)
os.makedirs(capture_path, exist_ok=True)
cv2.imwrite(img_path, frame, [cv2.IMWRITE_PNG_COMPRESSION, 0])
print("{} written!".format(img_name))
img_counter += 1
cam.release()
cv2.destroyAllWindows()
return img_paths
I suspect the exact line I need to alter is this one.
cv2.imwrite(img_path, frame, [cv2.IMWRITE_PNG_COMPRESSION, 0])
As you can see, I have opted for PNG format, which should not result in any JPEG-like compression artifacts. Despite this selection, I get artifacts.
I have also tried embracing JPEG and setting the quality to max. This does not improve matters:
cv2.imwrite(img_path, frame, [cv2.IMWRITE_JPEG_QUALITY, 100])
What can I do? Please and thank you

re.findall with negative numbers, my code only works on positive numbers

I am trying to get a number from a program on my screen using OpenCV.
I have the exact spot on the screen that the value is at and I am able to use image to text recognition to turn the live updating image into text.
the text I get is a string as follows (profit: 12.34, whatever it is at the time)
I am for some reason unable to just get the number so I am using re.findall to get the number in the string. it works like a hot damn as long as the value is >= 0 . I get a returned value as a float. works perfect. but the second the number goes negative I get this error message
File "C:/Users/austi/.spyder-py3/OpenCV_Files/Closest_workingCV.py", line 55, in <module>
price = re.findall("\d+", text)[0]
IndexError: list index out of range
here is my code thus far
import numpy as np
from PIL import ImageGrab
import cv2
import time
import pytesseract
import re
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
while True:
def process_img(original_image):
processed_img = cv2.cvtColor(original_image, cv2.COLOR_BGR2GRAY)
return processed_img
def process_img1(original_image1):
processed_img1 = cv2.cvtColor(original_image1, cv2.COLOR_BGR2GRAY)
processed_img1 = cv2.Canny(processed_img1, threshold1=200, threshold2=300)
return processed_img1
coor1 = (20, 150, 950, 950)
coor2 = (60, 550, 250, 590)
# coor3 = (20, 150, 950, 950)
#last_time = time.time()
for i in range(2):
if i == 0:
x = coor1
screen = np.array(ImageGrab.grab(bbox=(x)))
new_screen = process_img(screen)
#screen('Loop took {} seconds'.format(time.time()-last_time))
#last_time = time.time()
cv2.imshow('window', new_screen)
if cv2.waitKey(25) & 0xFF == ord('q'):
cv2.destroyAllWindows()
break
elif i == 1:
x = coor2
screen1 = np.array(ImageGrab.grab(bbox=(x)))
new_screen1 = process_img(screen1)
cv2.imshow('window1', new_screen1)
text = pytesseract.image_to_string(new_screen1)
#price = text.split(":")[1]
price = re.findall("\d+", text)[0]
#rint(repr(text))
#price = re.findall("\d+","Foo -111 Bar 55", text)
price = float(price)
#text = [float(s) for s in re.findall(r'-?\d+\.?\d*', text)]
#print(text)
print(price)
if cv2.waitKey(25) & 0xFF == ord('q'):
cv2.destroyAllWindows()
break
like I said it updates that number multiple times a second flawlessly so long as its not a negative number.
anyone have any suggestions on how to fix this? or any better ways to get to my end goal.
I've searched and found similar issues but when I implement any proposed solutions that I have found it either flat out does not work for any number or I have the same issue of only numbers 0 or greater. not sure what to do please help. i included the snippet of my screen that the code is looking at
Thanks
I don't think the problem is with the regex itself, except that it can be expanded to catch the negative sign. I've reworked your code to deal with error cases:
screen1 = np.array(ImageGrab.grab(bbox=(x)))
new_screen1 = process_img(screen1)
cv2.imshow('window1', new_screen1)
text = pytesseract.image_to_string(new_screen1)
if text:
# test code to show the text to scan
print("text to scan:", repr(text))
try:
price = re.findall("-?\d+", text)[0]
print(price)
if cv2.waitKey(25) & 0xFF == ord('q'):
cv2.destroyAllWindows()
break
else:
pass
except IndexError:
print("No price found")
else:
print("No text to scan")

Python: delete item from list when difference with previous appended item is small

I'm creating an image popularity algorithm that cuts a video% .mp4 into frames. With the help of AI, the program examines which frames probably display the most beautiful images; the result of this is expressed in 'score'.
This works but I encounter a problem. Because certain frames in a video are very similar, I have many frames with (almost) the same score.
In the end result, a list is generated with [score, frame number]. I want, for example, if 3 items in the list are almost identical frame numbers and therefore (almost) identical scores, I only keep the frame number in the list with the highest score in order to remove duplicates.
It has something to do with this line: result.append((predict(pil_image, model), name))
Here is the code:
import os
import torch
import torchvision.models
import torchvision.transforms as transforms
from PIL import Image
import json
import cv2
def prepare_image(image):
if image.mode != 'RGB':
image = image.convert("RGB")
Transform = transforms.Compose([
transforms.Resize([224, 224]),
transforms.ToTensor(),
])
image = Transform(image)
image = image.unsqueeze(0)
return image
def predict(image, model):
image = prepare_image(image)
with torch.no_grad():
preds = model(image)
score = preds.detach().numpy().item()
print("Picture score: " + str(round(score, 2)) + " | frames left: " +str(framesToDo))
return str(round(score, 2))
if __name__ == '__main__':
model = torchvision.models.resnet50()
model.fc = torch.nn.Linear(in_features=2048, out_features=1)
model.load_state_dict(torch.load('model/model-resnet50.pth', map_location=torch.device('cpu')))
model.eval()
result = []
# In de folder videos are videos saved with the name of 1 until 23
for i in range(1, 23):
vidcap = cv2.VideoCapture('./video/' + str(i) + '.mp4')
succes, vidcap_image = vidcap.read()
count = 0
framestep = 500 #for Stackoverflow example
framesToDo = vidcap.get(cv2.CAP_PROP_FRAME_COUNT)
# while succes and count < max_frames
while succes and count < int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT)): #maximum amount of frames in video
name = str(i) + '_' + str(count)
cv2.imwrite("./frames_saved/" + 'vid' + '_' + name + ".jpg", vidcap_image) # save frame as jpg image
count += framestep # 500 frames further
framesToDo = framesToDo - framestep
cv2_image = cv2.cvtColor(vidcap_image, cv2.COLOR_BGR2RGB)
pil_image = Image.fromarray(cv2_image)
result.append((predict(pil_image, model), name))
succes, vidcap_image = vidcap.read()
result.sort(reverse=False)
print(result)
with open('result.json', 'w') as filehandle:
filehandle.write(json.dumps(result))````
Since there is no reproducible example, you can adapt this to solve your problem, this analyses each frame data and skips unnecessary ones, updates the best values and append new values.
MAX_FRAME_NUMBER_DIFF = 60
MAX_SCORE_DIFF = 0.5
current_frame = count
current_score = predict(pil_image, model)
data = (current_score, current_frame)
if not results:
results.append(data)
else:
last_score, last_frame = results[-1]
is_similar_frame = current_frame - last_frame <= MAX_FRAME_NUMBER_DIFF
is_score_better = current_score > last_score
is_score_way_better = current_score - last_score <= MAX_SCORE_DIFF
if is_similar_frame:
if is_score_better:
if is_score_way_better: # if diff between current score and previous score bigger than MAX_SCORE_DIFF
results.append(data)
else: # current score better than previous but not so better
results[-1] = data # update last value
else: # current score not better than previous
continue # skip this one
else: # if not similar frames
results.append(data)

Average the color of remaining frames

I've been seeing alot of images where it contains average color of each frame so I tried it myself.
This code will average the color of x amount of frames depending on the video's FPS.
import cv2
import numpy as np
cap = cv2.VideoCapture("video.mp4")
fcount = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
fps = int(cap.get(cv2.CAP_PROP_FPS))
duration = fcount/fps
image = np.zeros((500, round(fcount/fps), 3), np.uint8)
print("FPS: %s, COUNT %s, DURATION %s" %(fps,fcount,duration))
frames = []
framespos = 0
framesprocessed = 0
framesremain = fcount % fps
while True:
flag,frame = cap.read()
if flag:
avg_color_per_row = np.average(frame, axis=0)
avg_colors = np.average(avg_color_per_row, axis=0)
avg_color_int = np.array(avg_colors, dtype=np.uint8)
frames.append(avg_color_int)
if len(frames) == fps:
image[:,[framespos]] = np.average(frames, axis=0)
framespos += 1
framesprocessed += fps
print("%s/%s" %(framesprocessed,fcount))
frames.clear()
else:
cv2.imwrite('averages.png',image)
print("TOTAL FRAMES: %s, PROCESSED FRAMES %s, UNPROCESSED FRAMES %s" %(fcount,framesprocessed,fcount%fps))
cap.release()
break
This will write something like this:
This code works fine but it will not process all the frames. If fcount % fps > 0, those last frames wont get processed. How can I process those frames?
I just added this code at else that checks if there's a remainder frames and also check if the frames array has contents then append the contents to image array.
if framesremain > 0 and len(frames) > 0:
image[:,[framespos]] = np.average(frames, axis=0)
framesprocessed += framesremain
print("%s/%s" %(framesprocessed,fcount))

Categories