I've been seeing alot of images where it contains average color of each frame so I tried it myself.
This code will average the color of x amount of frames depending on the video's FPS.
import cv2
import numpy as np
cap = cv2.VideoCapture("video.mp4")
fcount = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
fps = int(cap.get(cv2.CAP_PROP_FPS))
duration = fcount/fps
image = np.zeros((500, round(fcount/fps), 3), np.uint8)
print("FPS: %s, COUNT %s, DURATION %s" %(fps,fcount,duration))
frames = []
framespos = 0
framesprocessed = 0
framesremain = fcount % fps
while True:
flag,frame = cap.read()
if flag:
avg_color_per_row = np.average(frame, axis=0)
avg_colors = np.average(avg_color_per_row, axis=0)
avg_color_int = np.array(avg_colors, dtype=np.uint8)
frames.append(avg_color_int)
if len(frames) == fps:
image[:,[framespos]] = np.average(frames, axis=0)
framespos += 1
framesprocessed += fps
print("%s/%s" %(framesprocessed,fcount))
frames.clear()
else:
cv2.imwrite('averages.png',image)
print("TOTAL FRAMES: %s, PROCESSED FRAMES %s, UNPROCESSED FRAMES %s" %(fcount,framesprocessed,fcount%fps))
cap.release()
break
This will write something like this:
This code works fine but it will not process all the frames. If fcount % fps > 0, those last frames wont get processed. How can I process those frames?
I just added this code at else that checks if there's a remainder frames and also check if the frames array has contents then append the contents to image array.
if framesremain > 0 and len(frames) > 0:
image[:,[framespos]] = np.average(frames, axis=0)
framesprocessed += framesremain
print("%s/%s" %(framesprocessed,fcount))
Related
how do i extract every #nth frame from a video with a python script?
the one where I don't extract every int(args.framegap) doesn't hang so could anyone show me what i'm doing wrong?
this is my script but it hangs on extracting frames, anyone who knows why or how to fix this?
i also want them named properly, for example if i extract every 5 frames they should be named 001.png , 006.png , 011.png etc..
thanks
import argparse
import cv2
import time
import os
import shutil
parser = argparse.ArgumentParser(description='arguments')
parser.add_argument('--videofile', type=str, help='path to your video file, for example --videofile C:\file\video\extract\video.mp4')
parser.add_argument('--projectname', type=str, help='name of the project to create the directories')
parser.add_argument('--framegap', type=int, help='name of the project to create the directories')
args = parser.parse_args()
doc_path = os.path.expanduser('~\Documents')
data_path = os.path.expanduser('~\Documents\\visionsofchaos\\fewshot\\data')
train_filtered = data_path+str(args.projectname)+'_train'+'\\'+'input_filtered'
#take every #nth frame
def video_to_frames(input_loc, output_loc):
"""Function to extract frames from input video file
and save them as separate frames in an output directory.
Args:
input_loc: Input video file.
output_loc: Output directory to save the frames.
Returns:
None
"""
try:
os.mkdir(output_loc)
except OSError:
pass
# Log the time
time_start = time.time()
# Start capturing the feed
cap = cv2.VideoCapture(input_loc)
# Find the number of frames
video_length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) - 1
print ("Number of frames: ", video_length)
count = 0
print ("Converting video..\n")
# Start converting the video
while cap.isOpened():
# Extract the frame
ret, frame = cap.read()
if not ret:
continue
# Write the results back to output location.
if count % int(args.framegap) == 0:
cv2.imwrite(train_filtered + "/%#03d.png" % (count+1), frame)
count += 1
# If there are no more frames left
if (count > (video_length-1)):
# Log the time again
time_end = time.time()
# Release the feed
cap.release()
# Print stats
print ("Done extracting frames.\n%d frames extracted" % count)
print ("It took %d seconds forconversion." % (time_end-time_start))
break
if __name__=="__main__":
input_loc = args.videofile
output_loc = data_path + '\\' + args.projectname + '_gen\\input_filtered'
video_to_frames(input_loc, output_loc)
All problem makes wrong indentations
You have to run count += 1 outside if count % int(args.framegap) == 0:
You run it inside if so after first frame it changes to count = 1 and later if doesn't allow to run anoter count += 1 so it is always count = 1 and this blocks all code.
count = -1
# loop
count += 1
# --- the same indentations ---
if count % int(args.framegap) == 0:
#.... code ...
or
count = 0
# loop
if count % int(args.framegap) == 0:
#.... code ...
# --- the same indentations ---
count += 1
I think you should also change indentations with another if (count > (video_length-1)) because it is also inside if count % int(args.framegap) == 0: and it can make problems
count = 0
# loop
if count % int(args.framegap) == 0:
#.... code ...
# --- the same indentations ---
count += 1
# --- the same indentations ---
if count > (video_length-1):
#.... code ...
I'm creating an image popularity algorithm that cuts a video% .mp4 into frames. With the help of AI, the program examines which frames probably display the most beautiful images; the result of this is expressed in 'score'.
This works but I encounter a problem. Because certain frames in a video are very similar, I have many frames with (almost) the same score.
In the end result, a list is generated with [score, frame number]. I want, for example, if 3 items in the list are almost identical frame numbers and therefore (almost) identical scores, I only keep the frame number in the list with the highest score in order to remove duplicates.
It has something to do with this line: result.append((predict(pil_image, model), name))
Here is the code:
import os
import torch
import torchvision.models
import torchvision.transforms as transforms
from PIL import Image
import json
import cv2
def prepare_image(image):
if image.mode != 'RGB':
image = image.convert("RGB")
Transform = transforms.Compose([
transforms.Resize([224, 224]),
transforms.ToTensor(),
])
image = Transform(image)
image = image.unsqueeze(0)
return image
def predict(image, model):
image = prepare_image(image)
with torch.no_grad():
preds = model(image)
score = preds.detach().numpy().item()
print("Picture score: " + str(round(score, 2)) + " | frames left: " +str(framesToDo))
return str(round(score, 2))
if __name__ == '__main__':
model = torchvision.models.resnet50()
model.fc = torch.nn.Linear(in_features=2048, out_features=1)
model.load_state_dict(torch.load('model/model-resnet50.pth', map_location=torch.device('cpu')))
model.eval()
result = []
# In de folder videos are videos saved with the name of 1 until 23
for i in range(1, 23):
vidcap = cv2.VideoCapture('./video/' + str(i) + '.mp4')
succes, vidcap_image = vidcap.read()
count = 0
framestep = 500 #for Stackoverflow example
framesToDo = vidcap.get(cv2.CAP_PROP_FRAME_COUNT)
# while succes and count < max_frames
while succes and count < int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT)): #maximum amount of frames in video
name = str(i) + '_' + str(count)
cv2.imwrite("./frames_saved/" + 'vid' + '_' + name + ".jpg", vidcap_image) # save frame as jpg image
count += framestep # 500 frames further
framesToDo = framesToDo - framestep
cv2_image = cv2.cvtColor(vidcap_image, cv2.COLOR_BGR2RGB)
pil_image = Image.fromarray(cv2_image)
result.append((predict(pil_image, model), name))
succes, vidcap_image = vidcap.read()
result.sort(reverse=False)
print(result)
with open('result.json', 'w') as filehandle:
filehandle.write(json.dumps(result))````
Since there is no reproducible example, you can adapt this to solve your problem, this analyses each frame data and skips unnecessary ones, updates the best values and append new values.
MAX_FRAME_NUMBER_DIFF = 60
MAX_SCORE_DIFF = 0.5
current_frame = count
current_score = predict(pil_image, model)
data = (current_score, current_frame)
if not results:
results.append(data)
else:
last_score, last_frame = results[-1]
is_similar_frame = current_frame - last_frame <= MAX_FRAME_NUMBER_DIFF
is_score_better = current_score > last_score
is_score_way_better = current_score - last_score <= MAX_SCORE_DIFF
if is_similar_frame:
if is_score_better:
if is_score_way_better: # if diff between current score and previous score bigger than MAX_SCORE_DIFF
results.append(data)
else: # current score better than previous but not so better
results[-1] = data # update last value
else: # current score not better than previous
continue # skip this one
else: # if not similar frames
results.append(data)
currently my code gets all the frames in a video but i only need a few(about 20) from each video file.
import cv2 as cv
d = 1
count = 0
for i in range(5):
s = '/home/mrrobot/Downloads/clipsnew/' + str(d) +'.mp4'
vidcap = cv.VideoCapture(s)
success,image = vidcap.read()
success = True
while success:
vidcap.set(30,50) #Does this even work?
cv.imwrite("/home/mrrobot/Downloads/image2/frame%d.jpg" % count, image)
success,image = vidcap.read()
print ('Read a new frame: ', success)
count += 1
d += 1
simply add condition that count should be less than 50 and skip first 30 frames OR set CV_CAP_PROP_POS_FRAMES property to 30 and read 20 frames
import cv2 as cv
d = 1
for i in range(5):
s = '/home/mrrobot/Downloads/clipsnew/' + str(d) +'.mp4'
vidcap = cv.VideoCapture(s)
success,image = vidcap.read()
success = True
count = 0
while success and count < 50:
if count >= 30:
cv.imwrite("/home/mrrobot/Downloads/image2/frame%d.jpg" % count, image)
success,image = vidcap.read()
print ('Read a new frame: ', success)
count += 1
d += 1
I am currently working on keyframe extraction from videos.
Code :
while success:
success, currentFrame = vidcap.read()
isDuplicate = False
limit = count if count <= 10 else (count - 10)
for img in xrange(limit, count):
previusFrame = cv2.imread("%sframe-%d.png" % (outputDir, img))
try:
difference = cv2.subtract(currentFrame, previusFrame)
except:
pass
This gives me huge amounts of frames.
Expected ouput: Calculate pixel difference between frames and then compare it with a threshold value and store unique keyframes.
Working on videos for the first time. please guide on how to proceed to achieve the expected output
Here is a script to extract I-frames with ffprobe and OpenCV:
import os
import cv2
import subprocess
filename = '/home/andriy/Downloads/video.mp4'
def get_frame_types(video_fn):
command = 'ffprobe -v error -show_entries frame=pict_type -of default=noprint_wrappers=1'.split()
out = subprocess.check_output(command + [video_fn]).decode()
frame_types = out.replace('pict_type=','').split()
return zip(range(len(frame_types)), frame_types)
def save_i_keyframes(video_fn):
frame_types = get_frame_types(video_fn)
i_frames = [x[0] for x in frame_types if x[1]=='I']
if i_frames:
basename = os.path.splitext(os.path.basename(video_fn))[0]
cap = cv2.VideoCapture(video_fn)
for frame_no in i_frames:
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_no)
ret, frame = cap.read()
outname = basename+'_i_frame_'+str(frame_no)+'.jpg'
cv2.imwrite(outname, frame)
print ('Saved: '+outname)
cap.release()
else:
print ('No I-frames in '+video_fn)
if __name__ == '__main__':
save_i_keyframes(filename)
You can change 'I' to 'P' if you need to extract P-frames.
I'm writing a Python script to go through a bunch of videos (.mp4) and take 5 random 64 x 64 pixel images every 100th frame per video. Here's my code to do that:
import cv2
import random
IMAGE_SIZE = 64
IMAGES_PER_FRAME = 5
def get_images():
categories = ['anemone', 'smoke', 'water']
# for each category
for category in categories:
for i in range(len(categories)):
# get video name (e.g. water2.mp4)
video_name = category + str(i) + '.mp4'
print('Processing: ' + video_name)
process_video(video_name)
def process_video(video_name):
# load video using cv2
video_cap = cv2.VideoCapture(video_name)
if video_cap.isOpened():
ret, frame = video_cap.read()
else:
ret = False
# while there's another frame
i = 0
j = 0
while ret:
ret, frame = video_cap.read()
if i % 100 == 0:
for k in range(IMAGES_PER_FRAME):
# save several images from frame to local directory
random_image = get_random_image(frame)
file_name = video_name.replace('.mp4', '') + '_' + str(j) + '.png'
cv2.imwrite(file_name, random_image)
j += 1
i += 1
video_cap.release()
def get_random_image(frame):
frame_height, frame_width, _ = frame.shape
left_x = random.randrange(0, frame_width - IMAGE_SIZE)
top_y = random.randrange(0, frame_height - IMAGE_SIZE)
# get random 64 x 64 x 3 chunk from frame
return frame[top_y:top_y + IMAGE_SIZE, left_x:left_x + IMAGE_SIZE, :]
get_images()
My script gets through all the 'anemone' videos but then gives the following error for 'smoke1.mp4':
Traceback (most recent call last):
File "getImages.py", line 49, in <module>
process_video('smoke1.mp4')
File "getImages.py", line 33, in process_video
random_image = get_random_image(frame)
File "getImages.py", line 42, in get_random_image
frame_height, frame_width, _ = frame.shape
AttributeError: 'NoneType' object has no attribute 'shape'
So somehow, I'm getting frame = None when I try to process 'smoke1.mp4'. I'm pretty unsure how this is possible though, especially given that the anemone videos work fine.
Any ideas?
Thanks!