I'm writing a Python script to go through a bunch of videos (.mp4) and take 5 random 64 x 64 pixel images every 100th frame per video. Here's my code to do that:
import cv2
import random
IMAGE_SIZE = 64
IMAGES_PER_FRAME = 5
def get_images():
categories = ['anemone', 'smoke', 'water']
# for each category
for category in categories:
for i in range(len(categories)):
# get video name (e.g. water2.mp4)
video_name = category + str(i) + '.mp4'
print('Processing: ' + video_name)
process_video(video_name)
def process_video(video_name):
# load video using cv2
video_cap = cv2.VideoCapture(video_name)
if video_cap.isOpened():
ret, frame = video_cap.read()
else:
ret = False
# while there's another frame
i = 0
j = 0
while ret:
ret, frame = video_cap.read()
if i % 100 == 0:
for k in range(IMAGES_PER_FRAME):
# save several images from frame to local directory
random_image = get_random_image(frame)
file_name = video_name.replace('.mp4', '') + '_' + str(j) + '.png'
cv2.imwrite(file_name, random_image)
j += 1
i += 1
video_cap.release()
def get_random_image(frame):
frame_height, frame_width, _ = frame.shape
left_x = random.randrange(0, frame_width - IMAGE_SIZE)
top_y = random.randrange(0, frame_height - IMAGE_SIZE)
# get random 64 x 64 x 3 chunk from frame
return frame[top_y:top_y + IMAGE_SIZE, left_x:left_x + IMAGE_SIZE, :]
get_images()
My script gets through all the 'anemone' videos but then gives the following error for 'smoke1.mp4':
Traceback (most recent call last):
File "getImages.py", line 49, in <module>
process_video('smoke1.mp4')
File "getImages.py", line 33, in process_video
random_image = get_random_image(frame)
File "getImages.py", line 42, in get_random_image
frame_height, frame_width, _ = frame.shape
AttributeError: 'NoneType' object has no attribute 'shape'
So somehow, I'm getting frame = None when I try to process 'smoke1.mp4'. I'm pretty unsure how this is possible though, especially given that the anemone videos work fine.
Any ideas?
Thanks!
Related
I've been trying to merge images together into one long image. The images are all in one folder and have the same file name format group1.X.jpg with X being a number, starting at 0. I've tried using img.paste to merge the images together.
This is basically what I've been trying:
img1 = Image.open(directory + filePrefix + str(fileFirst) + fileExtension)
w, h = img1.size
h = h * fileCount
img = Image.new("RGB", (w, h))
tmpImg = img.load()
console.log("Setup Complete")
number = 0
for number in track(range(fileCount - 1)):
imgPaste = Image.open(dir + prefix + str(number) + extension)
if not number >= fileCount:
Image.Image.paste(tmpImg, imgPaste, (0, h * (number + 1)))
number += 1
img.save(file)
stitch(directory, filePrefix, fileExtension)
The above code, when ran, outputs the following:
Working... ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 0% -:--:--
Traceback (most recent call last):
File "c:\Users\marcus\Desktop\Stuff\Files\code\webtoonstitcher.py", line 36, in <module>
stitch(directory, filePrefix, fileExtension)
File "c:\Users\marcus\Desktop\Stuff\Files\code\webtoonstitcher.py", line 32, in stitch
Image.Image.paste(tmpImg, imgPaste, (0, h * (number + 1)))
File "C:\Users\marcus\AppData\Roaming\Python\Python310\site-packages\PIL\Image.py", line 1619, in paste
if self.mode != im.mode:
AttributeError: 'PixelAccess' object has no attribute 'mode'```
You can get list of all images using glob and then just iterate through that list:
import glob
from PIL import Image
def stitch(directory, file_prefix, file_extension):
files = glob.glob(directory + f'{file_prefix}*.{file_extension}')
images = [Image.open(file) for file in files]
background_width = max([image.width for image in images])
background_height = sum([image.height for image in images])
background = Image.new('RGBA', (background_width, background_height), (255, 255, 255, 255))
y = 0
for image in images:
background.paste(image, (0, y))
y += image.height
background.save('image.png')
stitch('', 'group1', 'png')
Sample output:
I think the issue is here:
h = h * fileCount
img = Image.new("RGB", (w, h))
You should delete the first line and change the second line to:
img = Image.new("RGB", (w, h * fileCount))
Otherwise your h will be far too big when you use it later to calculate the offset for paste().
I have this set of images from which I want to create a set of sub images with a stride with sub image size of 128*128, original image must be greater than this size (row and column both), I have created the following functions :
def sliding_window(image, stride, imgSize):
height, width, _ = image.shape
img = []
a1 = list(range(0, height-imgSize+stride, stride))
a2 = list(range(0, width-imgSize+stride, stride))
if (a1[-1]+imgSize != height):
a1[-1] = height-imgSize
if (a2[-1]+imgSize != width):
a2[-1] = width-imgSize
for y in a1:
for x in a2:
im1 = image[y:y+imgSize, x:x+imgSize, :]
img.append(np.array(im1))
return img
and the main code snippet from where I call this definition :
im_counter = 0
image_data = []
image_label = []
for cl in file_images:
for img_file in data[cl]:
path = img_path + cl + "/" + img_file
im = image.load_img(path)
im = image.img_to_array(im)
im_counter += 1
if(im_counter % 500 == 0):
print("{} images processed...".format(im_counter))
if (im.shape[0] >= SIZE and im.shape[1] >= SIZE):
img = sliding_window(im, STRIDE, SIZE)
for i in range(len(img)):
if(img[i].shape[2] >=3):
temp_img = img[i]
temp_img = preprocess_input(temp_img)
image_data.append(temp_img)
del temp_img
gc.collect()
image.append(class_dictionary[cl])
Now, the above code snippet takes forever to run on only 3000 images (takes at least 25 hours with utilizing only 1 CPU core), I want to make this faster, I have server access, the CPU has many cores, so can you please suggest a parallelized version of it so that it runs faster ?
NOTE : The sequence of subimages in which it is returned from the original image matters very much, No arbitrary sequence of image is allowed.
Here is a rough outline of something you can try.
def main():
# Create a list of tuples consisting of the file path, and the class
# dictionary info for each of the cl arguments
args = []
for cl in file_images:
for img_file in data[cl]:
path = img_path + cl + "/" + img_file
args.append((path, class_dictionary[cl]))
with multiprocessing.Pool(processes=30) as pool: # or however many processes
image_counter = 0
# Use multiprocessing to call handle_on_image(pathname, info)
# and return the results in order
for images, info in pool.starmap(handle_one_image, args):
# Images is a list of returned images. info is the class_dictionary info that we passed
for image in images:
image_counter += 1
image_data.append(image)
image_label.append(info)
def handle_one_image(path, info):
image_data = []
im = image.load_img(path)
im = image.img_to_array(im)
if (im.shape[0] >= SIZE and im.shape[1] >= SIZE):
img = sliding_window(im, STRIDE, SIZE)
for i in range(len(img)):
if(img[i].shape[2] >=3):
temp_img = img[i]
temp_img = preprocess_input(temp_img)
image_data.append(temp_img)
return image_data, info
else:
# indicate that no images are available
return [], info
My code is about to create an H5 file for Each and Every video in the folder, Extracting the feature from the video and stored into the H5 file.
in Below shown code extraction feature from multi videos and all the features are stored in the single H5 file
H5 file order:
video1:
- feature
video2:
- feature
issues:
How to create an H5 file for every video after a process is done
Code: Create_data.py
import argparse
from utils.generate_dataset import Generate_Dataset
parser = argparse.ArgumentParser(""Welcome you to fraction)
# Dataset options
parser.add_argument('--input', '--split', type=str, help="input video")
parser.add_argument('--output', type=str, default='', help="out data")
args = parser.parse_args()
if __name__ == "__main__":
gen = Generate_Dataset(args.input, args.output)
gen.generate_dataset()
gen.h5_file.close()
Code: Generate_Dataset.py :
import os
from networks.CNN import ResNet
from utils.KTS.cpd_auto import cpd_auto
from tqdm import tqdm
import math
import cv2
import numpy as np
import h5py
import numpy as np
class Generate_Dataset:
def __init__(self, video_path, save_path):
self.resnet = ResNet()
self.dataset = {}
self.video_list = []
self.video_path = ''
self.h5_file = h5py.File(save_path, 'w')
self._set_video_list(video_path)
def _set_video_list(self, video_path):
# import pdb;pdb.set_trace()
if os.path.isdir(video_path):
self.video_path = video_path
fileExt = r".mp4",".avi"
self.video_list = [_ for _ in os.listdir(video_path) if _.endswith(fileExt)]
self.video_list.sort()
else:
self.video_path = ''
self.video_list.append(video_path)
for idx, file_name in enumerate(self.video_list):
self.dataset['video_{}'.format(idx+1)] = {}
self.h5_file.create_group('video_{}'.format(idx+1))
def _extract_feature(self, frame):
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame = cv2.resize(frame, (224, 224))
res_pool5 = self.resnet(frame)
frame_feat = res_pool5.cpu().data.numpy().flatten()
return frame_feat
def _get_change_points(self, video_feat, n_frame, fps):
n = n_frame / fps
m = int(math.ceil(n/2.0))
K = np.dot(video_feat, video_feat.T)
change_points, _ = cpd_auto(K, m, 1)
change_points = np.concatenate(([0], change_points, [n_frame-1]))
temp_change_points = []
for idx in range(len(change_points)-1):
segment = [change_points[idx], change_points[idx+1]-1]
if idx == len(change_points)-2:
segment = [change_points[idx], change_points[idx+1]]
temp_change_points.append(segment)
change_points = np.array(list(temp_change_points))
# temp_n_frame_per_seg = []
# for change_points_idx in range(len(change_points)):
# n_frame = change_points[change_points_idx][1] - change_points[change_points_idx][0]
# temp_n_frame_per_seg.append(n_frame)
# n_frame_per_seg = np.array(list(temp_n_frame_per_seg))
# print(change_points)
arr = change_points
list1 = arr.tolist()
list2 = list1[-1].pop(1) #pop [-1]value
print(list2)
print(list1)
print("****************") # [-1][-1] value find and divided by 15
cps_m = math.floor(arr[-1][1]/15)
list1[-1].append(cps_m) #append to list
print(list1)
print("****************") #list to nd array convertion
arr = np.asarray(list1)
print(arr)
arrmul = arr * 15
print(arrmul)
print("****************")
# print(type(change_points))
# print(n_frame_per_seg)
# print(type(n_frame_per_seg))
median_frame = []
for x in arrmul:
print(x)
med = np.mean(x)
print(med)
int_array = med.astype(int)
median_frame.append(int_array)
print(median_frame)
# print(type(int_array))
return arrmul
# TODO : save dataset
def _save_dataset(self):
pass
def generate_dataset(self):
print('[INFO] CNN processing')
for video_idx, video_filename in enumerate(self.video_list):
video_path = video_filename
if os.path.isdir(self.video_path):
video_path = os.path.join(self.video_path, video_filename)
video_basename = os.path.basename(video_path).split('.')[0]
video_capture = cv2.VideoCapture(video_path)
fps = video_capture.get(cv2.CAP_PROP_FPS)
n_frames = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT))
frame_list = []
picks = []
video_feat = None
video_feat_for_train = None
for frame_idx in tqdm(range(n_frames-1)):
success, frame = video_capture.read()
if frame_idx % 15 == 0:
if success:
frame_feat = self._extract_feature(frame)
picks.append(frame_idx)
if video_feat_for_train is None:
video_feat_for_train = frame_feat
else:
video_feat_for_train = np.vstack((video_feat_for_train, frame_feat))
if video_feat is None:
video_feat = frame_feat
else:
video_feat = np.vstack((video_feat, frame_feat))
else:
break
video_capture.release()
arrmul = self._get_change_points(video_feat, n_frames, fps)
self.h5_file['video_{}'.format(video_idx+1)]['features'] = list(video_feat_for_train)
self.h5_file['video_{}'.format(video_idx+1)]['picks'] = np.array(list(picks))
self.h5_file['video_{}'.format(video_idx+1)]['n_frames'] = n_frames
self.h5_file['video_{}'.format(video_idx+1)]['fps'] = fps
self.h5_file['video_{}'.format(video_idx + 1)]['video_name'] = video_filename.split('.')[0]
self.h5_file['video_{}'.format(video_idx+1)]['change_points'] = arrmul
Expected results :
Folder: video
video_1:
video1.mp4
video2.mp4
Files are in this structure, now read video files and create separate H5 files after the process is over.
For more Code reference
You need to :
remove self.h5_file = h5py.File(save_path, 'w') from __init__()
remove self.h5_file.create_group('video_{}'.format(idx+1)) from _set_video_list()
remove gen.h5_file.close() from main()
change last block of generate_dataset() into something like:
.
video_capture.release()
arrmul = self._get_change_points(video_feat, n_frames, fps)
h5_dir = os.path.dirname(video_path)
h5_full_path = os.path.join(h5_dir, 'video_{}'.format(video_idx+1))
with h5py.File(h5_full_path, 'w') as h5_file:
h5_file['features'] = list(video_feat_for_train)
h5_file['picks'] = np.array(list(picks))
h5_file['n_frames'] = n_frames
h5_file['fps'] = fps
h5_file['video_name'] = video_filename.split('.')[0]
h5_file['change_points'] = arrmul
Please note that your inner video file indices and actual video file name numbers may not match. So I suggest to change
h5_dir = os.path.dirname(video_path)
h5_full_path = os.path.join(h5_dir, 'video_{}'.format(video_idx+1))
from above into
h5_full_path = video_path.split('.')[0] + '.h5'
This will create features file with the name matched to the video file.
align = openface.AlignDlib(dlibFacePredictor) subject=input("Enter Subject:") a=['BI','SIC','PGIS','SQA','ITSM'] path = 'C:/Users/ACER/Desktop/PROJECT ALL RESOURCE/Implementation/PYTHON FILES/images/' if subject in a: print("Success") wbook = load_workbook(filename = "C:/Users/ACER/Desktop/PROJECT ALL RESOURCE/Implementation/PYTHON FILES/Attendance/"+subject+".xlsx") sheet = wbook.get_sheet_by_name('TYBSCIT'+subject) else: print("Invalid")
def getDateColumns(): for i in range(1, len(sheet.rows[0]) + 1): cols = get_column_letter(i) if sheet.cell('%s%s'% (col,'1')).value == currentDate: return cols def getProfileId(Ids): connect = sqlite3.connect("C:/Users/ACER/Desktop/PROJECT ALL RESOURCE/Implementation/PYTHON FILES/sqlite3/Studentdb.db") cmd = "SELECT * FROM Students WHERE ID=" + str(Ids) cursor = connect.execute(cmd) profile = None for row in cursor: profile = row connect.close() return profile
attend = [0 for i in range(60)] rec = cv2.face.LBPHFaceRecognizer_create() # Local Binary Patterns Histograms rec.read('C:/Users/ACER/Desktop/PROJECT ALL RESOURCE/Implementation/PYTHON FILES/Training/trainingData.yml') # loading the trained data picNumber = 2 image= cv2.imread('C:/Users/ACER/Desktop/PROJECT ALL RESOURCE/Implementation/PYTHON FILES/images/'+subject+currentDate+'.jpg') font = cv2.FONT_HERSHEY_SIMPLEX # the font of text on face recognition gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # conveting the camera input into GrayScale dets = detector(image, 1) totalConf = 0.0 faceRec = 0 for i, d in enumerate(dets): image2 = image[d.top():d.bottom(), d.left():d.right()] rgbImg = cv2.cvtColor(image2, cv2.COLOR_BGR2RGB) bb = align.getLargestFaceBoundingBox(rgbImg) alignedFace = align.align(96, rgbImg, bb=None, landmarkIndices=openface.AlignDlib.OUTER_EYES_AND_NOSE) alignedFace= cv2.cvtColor(alignedFace, cv2.COLOR_BGR2GRAY) # conveting the camera input into GrayScale Ids, conf = rec.predict(alignFace) # Comparing from the trained data
Traceback (most recent call last): File "C:\Users\ACER\Desktop\PROJECT ALL RESOURCE\Implementation\PYTHON FILES\facerecognition.py", line 60, in <module> Ids, conf = rec.predict(alignFace) # Comparing from the trained data NameError: name 'alignFace' is not defined
As the TraceBack states, alignFace is not defined. You have made a typo of your previously-defined alignedFace - use this instead.
I'm creating an image popularity algorithm that cuts a video% .mp4 into frames. With the help of AI, the program examines which frames probably display the most beautiful images; the result of this is expressed in 'score'.
This works but I encounter a problem. Because certain frames in a video are very similar, I have many frames with (almost) the same score.
In the end result, a list is generated with [score, frame number]. I want, for example, if 3 items in the list are almost identical frame numbers and therefore (almost) identical scores, I only keep the frame number in the list with the highest score in order to remove duplicates.
It has something to do with this line: result.append((predict(pil_image, model), name))
Here is the code:
import os
import torch
import torchvision.models
import torchvision.transforms as transforms
from PIL import Image
import json
import cv2
def prepare_image(image):
if image.mode != 'RGB':
image = image.convert("RGB")
Transform = transforms.Compose([
transforms.Resize([224, 224]),
transforms.ToTensor(),
])
image = Transform(image)
image = image.unsqueeze(0)
return image
def predict(image, model):
image = prepare_image(image)
with torch.no_grad():
preds = model(image)
score = preds.detach().numpy().item()
print("Picture score: " + str(round(score, 2)) + " | frames left: " +str(framesToDo))
return str(round(score, 2))
if __name__ == '__main__':
model = torchvision.models.resnet50()
model.fc = torch.nn.Linear(in_features=2048, out_features=1)
model.load_state_dict(torch.load('model/model-resnet50.pth', map_location=torch.device('cpu')))
model.eval()
result = []
# In de folder videos are videos saved with the name of 1 until 23
for i in range(1, 23):
vidcap = cv2.VideoCapture('./video/' + str(i) + '.mp4')
succes, vidcap_image = vidcap.read()
count = 0
framestep = 500 #for Stackoverflow example
framesToDo = vidcap.get(cv2.CAP_PROP_FRAME_COUNT)
# while succes and count < max_frames
while succes and count < int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT)): #maximum amount of frames in video
name = str(i) + '_' + str(count)
cv2.imwrite("./frames_saved/" + 'vid' + '_' + name + ".jpg", vidcap_image) # save frame as jpg image
count += framestep # 500 frames further
framesToDo = framesToDo - framestep
cv2_image = cv2.cvtColor(vidcap_image, cv2.COLOR_BGR2RGB)
pil_image = Image.fromarray(cv2_image)
result.append((predict(pil_image, model), name))
succes, vidcap_image = vidcap.read()
result.sort(reverse=False)
print(result)
with open('result.json', 'w') as filehandle:
filehandle.write(json.dumps(result))````
Since there is no reproducible example, you can adapt this to solve your problem, this analyses each frame data and skips unnecessary ones, updates the best values and append new values.
MAX_FRAME_NUMBER_DIFF = 60
MAX_SCORE_DIFF = 0.5
current_frame = count
current_score = predict(pil_image, model)
data = (current_score, current_frame)
if not results:
results.append(data)
else:
last_score, last_frame = results[-1]
is_similar_frame = current_frame - last_frame <= MAX_FRAME_NUMBER_DIFF
is_score_better = current_score > last_score
is_score_way_better = current_score - last_score <= MAX_SCORE_DIFF
if is_similar_frame:
if is_score_better:
if is_score_way_better: # if diff between current score and previous score bigger than MAX_SCORE_DIFF
results.append(data)
else: # current score better than previous but not so better
results[-1] = data # update last value
else: # current score not better than previous
continue # skip this one
else: # if not similar frames
results.append(data)