My code is about to create an H5 file for Each and Every video in the folder, Extracting the feature from the video and stored into the H5 file.
in Below shown code extraction feature from multi videos and all the features are stored in the single H5 file
H5 file order:
video1:
- feature
video2:
- feature
issues:
How to create an H5 file for every video after a process is done
Code: Create_data.py
import argparse
from utils.generate_dataset import Generate_Dataset
parser = argparse.ArgumentParser(""Welcome you to fraction)
# Dataset options
parser.add_argument('--input', '--split', type=str, help="input video")
parser.add_argument('--output', type=str, default='', help="out data")
args = parser.parse_args()
if __name__ == "__main__":
gen = Generate_Dataset(args.input, args.output)
gen.generate_dataset()
gen.h5_file.close()
Code: Generate_Dataset.py :
import os
from networks.CNN import ResNet
from utils.KTS.cpd_auto import cpd_auto
from tqdm import tqdm
import math
import cv2
import numpy as np
import h5py
import numpy as np
class Generate_Dataset:
def __init__(self, video_path, save_path):
self.resnet = ResNet()
self.dataset = {}
self.video_list = []
self.video_path = ''
self.h5_file = h5py.File(save_path, 'w')
self._set_video_list(video_path)
def _set_video_list(self, video_path):
# import pdb;pdb.set_trace()
if os.path.isdir(video_path):
self.video_path = video_path
fileExt = r".mp4",".avi"
self.video_list = [_ for _ in os.listdir(video_path) if _.endswith(fileExt)]
self.video_list.sort()
else:
self.video_path = ''
self.video_list.append(video_path)
for idx, file_name in enumerate(self.video_list):
self.dataset['video_{}'.format(idx+1)] = {}
self.h5_file.create_group('video_{}'.format(idx+1))
def _extract_feature(self, frame):
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame = cv2.resize(frame, (224, 224))
res_pool5 = self.resnet(frame)
frame_feat = res_pool5.cpu().data.numpy().flatten()
return frame_feat
def _get_change_points(self, video_feat, n_frame, fps):
n = n_frame / fps
m = int(math.ceil(n/2.0))
K = np.dot(video_feat, video_feat.T)
change_points, _ = cpd_auto(K, m, 1)
change_points = np.concatenate(([0], change_points, [n_frame-1]))
temp_change_points = []
for idx in range(len(change_points)-1):
segment = [change_points[idx], change_points[idx+1]-1]
if idx == len(change_points)-2:
segment = [change_points[idx], change_points[idx+1]]
temp_change_points.append(segment)
change_points = np.array(list(temp_change_points))
# temp_n_frame_per_seg = []
# for change_points_idx in range(len(change_points)):
# n_frame = change_points[change_points_idx][1] - change_points[change_points_idx][0]
# temp_n_frame_per_seg.append(n_frame)
# n_frame_per_seg = np.array(list(temp_n_frame_per_seg))
# print(change_points)
arr = change_points
list1 = arr.tolist()
list2 = list1[-1].pop(1) #pop [-1]value
print(list2)
print(list1)
print("****************") # [-1][-1] value find and divided by 15
cps_m = math.floor(arr[-1][1]/15)
list1[-1].append(cps_m) #append to list
print(list1)
print("****************") #list to nd array convertion
arr = np.asarray(list1)
print(arr)
arrmul = arr * 15
print(arrmul)
print("****************")
# print(type(change_points))
# print(n_frame_per_seg)
# print(type(n_frame_per_seg))
median_frame = []
for x in arrmul:
print(x)
med = np.mean(x)
print(med)
int_array = med.astype(int)
median_frame.append(int_array)
print(median_frame)
# print(type(int_array))
return arrmul
# TODO : save dataset
def _save_dataset(self):
pass
def generate_dataset(self):
print('[INFO] CNN processing')
for video_idx, video_filename in enumerate(self.video_list):
video_path = video_filename
if os.path.isdir(self.video_path):
video_path = os.path.join(self.video_path, video_filename)
video_basename = os.path.basename(video_path).split('.')[0]
video_capture = cv2.VideoCapture(video_path)
fps = video_capture.get(cv2.CAP_PROP_FPS)
n_frames = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT))
frame_list = []
picks = []
video_feat = None
video_feat_for_train = None
for frame_idx in tqdm(range(n_frames-1)):
success, frame = video_capture.read()
if frame_idx % 15 == 0:
if success:
frame_feat = self._extract_feature(frame)
picks.append(frame_idx)
if video_feat_for_train is None:
video_feat_for_train = frame_feat
else:
video_feat_for_train = np.vstack((video_feat_for_train, frame_feat))
if video_feat is None:
video_feat = frame_feat
else:
video_feat = np.vstack((video_feat, frame_feat))
else:
break
video_capture.release()
arrmul = self._get_change_points(video_feat, n_frames, fps)
self.h5_file['video_{}'.format(video_idx+1)]['features'] = list(video_feat_for_train)
self.h5_file['video_{}'.format(video_idx+1)]['picks'] = np.array(list(picks))
self.h5_file['video_{}'.format(video_idx+1)]['n_frames'] = n_frames
self.h5_file['video_{}'.format(video_idx+1)]['fps'] = fps
self.h5_file['video_{}'.format(video_idx + 1)]['video_name'] = video_filename.split('.')[0]
self.h5_file['video_{}'.format(video_idx+1)]['change_points'] = arrmul
Expected results :
Folder: video
video_1:
video1.mp4
video2.mp4
Files are in this structure, now read video files and create separate H5 files after the process is over.
For more Code reference
You need to :
remove self.h5_file = h5py.File(save_path, 'w') from __init__()
remove self.h5_file.create_group('video_{}'.format(idx+1)) from _set_video_list()
remove gen.h5_file.close() from main()
change last block of generate_dataset() into something like:
.
video_capture.release()
arrmul = self._get_change_points(video_feat, n_frames, fps)
h5_dir = os.path.dirname(video_path)
h5_full_path = os.path.join(h5_dir, 'video_{}'.format(video_idx+1))
with h5py.File(h5_full_path, 'w') as h5_file:
h5_file['features'] = list(video_feat_for_train)
h5_file['picks'] = np.array(list(picks))
h5_file['n_frames'] = n_frames
h5_file['fps'] = fps
h5_file['video_name'] = video_filename.split('.')[0]
h5_file['change_points'] = arrmul
Please note that your inner video file indices and actual video file name numbers may not match. So I suggest to change
h5_dir = os.path.dirname(video_path)
h5_full_path = os.path.join(h5_dir, 'video_{}'.format(video_idx+1))
from above into
h5_full_path = video_path.split('.')[0] + '.h5'
This will create features file with the name matched to the video file.
Related
hi i have a problem where my code is turned into a executable file a error message will appear on the screen does any one know how to fix this error this is for some body tracking software the really cause of this is cv2 but i need some help to make this a executable file so no error messages appeare when opened Many Thanks Jacob PLEASE HELO
[1]: https://i.stack.imgur.com/LKJ5r.png
import cv2
import mediapipe as mp
import pandas as pd
from ast import literal_eval
from tkinter import *
from tkinter import filedialog
import numpy
root = Tk()
#create lebel widget
ScreenLabel = Label(root, text="Body tracking")
#put on screen
root.geometry("500x450+700+200")
#def open():
#global track
#track = filedialog.askopenfilename(initialdir="/dowloads", title="select A File", filetypes=([("all files","*.*")]))
e = Entry(root, width=100)
e.pack()
def tracking():
mp_drawing = mp.solutions.drawing_utils
mp_pose = mp.solutions.pose
video_file_path = e.get()
cap = cv2.VideoCapture(video_file_path)
coordinates_list = []
frame_width = int(cap.get(3))
frame_height = int(cap.get(4))
size = (frame_width, frame_height)
fps = 30
v_split = video_file_path.split(".")
v_split[-2] = v_split[-2] + "_processed"
prep_vid_file_path = ".".join(v_split)
video_saver = cv2.VideoWriter(prep_vid_file_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, size)
keypoints_list = ['CENTER_HEAD',
'LEFT_SHOULDER',
'RIGHT_SHOULDER',
'LEFT_ELBOW',
'RIGHT_ELBOW',
'LEFT_WRIST',
'RIGHT_WRIST',
'LEFT_PINKY',
'RIGHT_PINKY',
'LEFT_INDEX',
'RIGHT_INDEX',
'LEFT_THUMB',
'RIGHT_THUMB',
'LEFT_HIP',
'RIGHT_HIP',
'LEFT_KNEE',
'RIGHT_KNEE',
'LEFT_ANKLE',
'RIGHT_ANKLE',
'LEFT_HEEL',
'RIGHT_HEEL',
'LEFT_FOOT_INDEX',
'RIGHT_FOOT_INDEX']
with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
while True:
ret, frame = cap.read()
if ret == False:
cap.release()
video_saver.release()
cv2.destroyAllWindows()
break
image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
image.flags.writeable = False
results = pose.process(image)
image.flags.writeable = True
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
timestamp_name = str(cap.get(cv2.CAP_PROP_POS_MSEC))
try:
landmarks = results.pose_landmarks.landmark
CENTER_HEAD = (landmarks[mp_pose.PoseLandmark.NOSE.value].x, landmarks[mp_pose.PoseLandmark.NOSE.value].y, landmarks[mp_pose.PoseLandmark.NOSE.value].z)
LEFT_SHOULDER = (landmarks[mp_pose.PoseLandmark.LEFT_SHOULDER.value].x, landmarks[mp_pose.PoseLandmark.LEFT_SHOULDER.value].y, landmarks[mp_pose.PoseLandmark.LEFT_SHOULDER.value].z)
RIGHT_SHOULDER = (landmarks[mp_pose.PoseLandmark.RIGHT_SHOULDER.value].x, landmarks[mp_pose.PoseLandmark.RIGHT_SHOULDER.value].y, landmarks[mp_pose.PoseLandmark.RIGHT_SHOULDER.value].z)
LEFT_ELBOW = (landmarks[mp_pose.PoseLandmark.LEFT_ELBOW.value].x, landmarks[mp_pose.PoseLandmark.LEFT_ELBOW.value].y, landmarks[mp_pose.PoseLandmark.LEFT_ELBOW.value].z)
RIGHT_ELBOW = (landmarks[mp_pose.PoseLandmark.RIGHT_ELBOW.value].x, landmarks[mp_pose.PoseLandmark.RIGHT_ELBOW.value].y, landmarks[mp_pose.PoseLandmark.RIGHT_ELBOW.value].z)
LEFT_WRIST = (landmarks[mp_pose.PoseLandmark.LEFT_WRIST.value].x, landmarks[mp_pose.PoseLandmark.LEFT_WRIST.value].y, landmarks[mp_pose.PoseLandmark.LEFT_WRIST.value].z)
RIGHT_WRIST = (landmarks[mp_pose.PoseLandmark.RIGHT_WRIST.value].x, landmarks[mp_pose.PoseLandmark.RIGHT_WRIST.value].y, landmarks[mp_pose.PoseLandmark.RIGHT_WRIST.value].z)
LEFT_PINKY = (landmarks[mp_pose.PoseLandmark.LEFT_PINKY.value].x, landmarks[mp_pose.PoseLandmark.LEFT_PINKY.value].y, landmarks[mp_pose.PoseLandmark.LEFT_PINKY.value].z)
RIGHT_PINKY = (landmarks[mp_pose.PoseLandmark.RIGHT_PINKY.value].x, landmarks[mp_pose.PoseLandmark.RIGHT_PINKY.value].y, landmarks[mp_pose.PoseLandmark.RIGHT_PINKY.value].z)
LEFT_INDEX = (landmarks[mp_pose.PoseLandmark.LEFT_INDEX.value].x, landmarks[mp_pose.PoseLandmark.LEFT_INDEX.value].y, landmarks[mp_pose.PoseLandmark.LEFT_INDEX.value].z)
RIGHT_INDEX = (landmarks[mp_pose.PoseLandmark.RIGHT_INDEX.value].x, landmarks[mp_pose.PoseLandmark.RIGHT_INDEX.value].y, landmarks[mp_pose.PoseLandmark.RIGHT_INDEX.value].z)
LEFT_THUMB = (landmarks[mp_pose.PoseLandmark.LEFT_THUMB.value].x, landmarks[mp_pose.PoseLandmark.LEFT_THUMB.value].y, landmarks[mp_pose.PoseLandmark.LEFT_THUMB.value].z)
RIGHT_THUMB = (landmarks[mp_pose.PoseLandmark.RIGHT_THUMB.value].x, landmarks[mp_pose.PoseLandmark.RIGHT_THUMB.value].y, landmarks[mp_pose.PoseLandmark.RIGHT_THUMB.value].z)
LEFT_HIP = (landmarks[mp_pose.PoseLandmark.LEFT_HIP.value].x, landmarks[mp_pose.PoseLandmark.LEFT_HIP.value].y, landmarks[mp_pose.PoseLandmark.LEFT_HIP.value].z)
RIGHT_HIP = (landmarks[mp_pose.PoseLandmark.RIGHT_HIP.value].x, landmarks[mp_pose.PoseLandmark.RIGHT_HIP.value].y, landmarks[mp_pose.PoseLandmark.RIGHT_HIP.value].z)
LEFT_KNEE = (landmarks[mp_pose.PoseLandmark.LEFT_KNEE.value].x, landmarks[mp_pose.PoseLandmark.LEFT_KNEE.value].y, landmarks[mp_pose.PoseLandmark.LEFT_KNEE.value].z)
RIGHT_KNEE = (landmarks[mp_pose.PoseLandmark.RIGHT_KNEE.value].x, landmarks[mp_pose.PoseLandmark.RIGHT_KNEE.value].y, landmarks[mp_pose.PoseLandmark.RIGHT_KNEE.value].z)
LEFT_ANKLE = (landmarks[mp_pose.PoseLandmark.LEFT_ANKLE.value].x, landmarks[mp_pose.PoseLandmark.LEFT_ANKLE.value].y, landmarks[mp_pose.PoseLandmark.LEFT_ANKLE.value].z)
RIGHT_ANKLE = (landmarks[mp_pose.PoseLandmark.RIGHT_ANKLE.value].x, landmarks[mp_pose.PoseLandmark.RIGHT_ANKLE.value].y, landmarks[mp_pose.PoseLandmark.RIGHT_ANKLE.value].z)
LEFT_HEEL = (landmarks[mp_pose.PoseLandmark.LEFT_HEEL.value].x, landmarks[mp_pose.PoseLandmark.LEFT_HEEL.value].y, landmarks[mp_pose.PoseLandmark.LEFT_HEEL.value].z)
RIGHT_HEEL = (landmarks[mp_pose.PoseLandmark.RIGHT_HEEL.value].x, landmarks[mp_pose.PoseLandmark.RIGHT_HEEL.value].y, landmarks[mp_pose.PoseLandmark.RIGHT_HEEL.value].z)
LEFT_FOOT_INDEX = (landmarks[mp_pose.PoseLandmark.LEFT_FOOT_INDEX.value].x, landmarks[mp_pose.PoseLandmark.LEFT_FOOT_INDEX.value].y, landmarks[mp_pose.PoseLandmark.LEFT_FOOT_INDEX.value].z)
RIGHT_FOOT_INDEX = (landmarks[mp_pose.PoseLandmark.RIGHT_FOOT_INDEX.value].x, landmarks[mp_pose.PoseLandmark.RIGHT_FOOT_INDEX.value].y, landmarks[mp_pose.PoseLandmark.RIGHT_FOOT_INDEX.value].z)
coordinates_list.append(str(timestamp_name) + " : " + str({'CENTER_HEAD':CENTER_HEAD,
'LEFT_SHOULDER':LEFT_SHOULDER,
'RIGHT_SHOULDER':RIGHT_SHOULDER,
'LEFT_ELBOW':LEFT_ELBOW,
'RIGHT_ELBOW':RIGHT_ELBOW,
'LEFT_WRIST':LEFT_WRIST,
'RIGHT_WRIST':RIGHT_WRIST,
'LEFT_PINKY':LEFT_PINKY,
'RIGHT_PINKY':RIGHT_PINKY,
'LEFT_INDEX':LEFT_INDEX,
'RIGHT_INDEX':RIGHT_INDEX,
'LEFT_THUMB':LEFT_THUMB,
'RIGHT_THUMB':RIGHT_THUMB,
'LEFT_HIP':LEFT_HIP,
'RIGHT_HIP':RIGHT_HIP,
'LEFT_KNEE':LEFT_KNEE,
'RIGHT_KNEE':RIGHT_KNEE,
'LEFT_ANKLE':LEFT_ANKLE,
'RIGHT_ANKLE':RIGHT_ANKLE,
'LEFT_HEEL':LEFT_HEEL,
'RIGHT_HEEL':RIGHT_HEEL,
'LEFT_FOOT_INDEX':LEFT_FOOT_INDEX,
'RIGHT_FOOT_INDEX':RIGHT_FOOT_INDEX}))
except:
pass
mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)
video_saver.write(image)
cv2.imshow("Video Feed", image)
cv2.waitKey(1)
v_split = video_file_path.split(".")
v_split[-1] = "txt"
v_split[-2] = v_split[-2] + "_coordinates"
coordinates_file_path = ".".join(v_split)
with open(coordinates_file_path, "w") as f:
f.write("\n".join(coordinates_list))
v_split = video_file_path.split(".")
v_split[-1] = "xlsx"
v_split[-2] = v_split[-2] + "_excel"
coordinates_excel_path = ".".join(v_split)
def get_vals(x):
vals = []
for i in x:
for j in i:
vals.append(j)
return vals
df = pd.DataFrame(coordinates_list)
df["frame"] = df[0].apply(lambda x: x.split(" : ")[0])
df[0] = df[0].apply(lambda x: get_vals(literal_eval(x.split(" : ")[1]).values()))
df_columns = []
mapper_dict = {0:"x", 1:"y", 2:"z"}
for keypoint in keypoints_list:
for i in range(0,3):
df_columns.append(keypoint+"_"+mapper_dict.get(i))
df = pd.concat([df["frame"], pd.DataFrame(df[0].to_list(), columns=df_columns)], axis=1)
df.to_excel(coordinates_excel_path, index=False)
#open_btn = Button( root, text="open File", command=open, borderwidth=0).pack()
ExportButton = Button(root, text="Track", command=tracking ,borderwidth=0 ,fg="blue")
ExportButton.pack()
root.mainloop()
.Hi all, I have 70k images saved into .h5 file and now with this script I want to read from that file and annotate text instances into .json file. When I run this script it takes very long time to annotate 1 image (cca 2h).
When I do this with 15 images then the script works fine and annotate all 15 images about a few seconds.
Now with 70k images -> .h5 file is 51gb.
I don't know is problem in code or the h5 file is too big? Because code works fine with small amount of images, but I'm working on some project where I need 70k or 700k images.
from __future__ import division
import os
import os.path as osp
from re import U
import numpy as np
import matplotlib.pyplot as plt
import h5py
from common import *
import json
import cv2
import numpy as np
from itertools import cycle
import js2py
#from gen import brojac
#from synthgen import imnames
global x
global y
def write_json(data, filename='annotation.json'):
with open(filename,'w') as file:
json.dump(data,file,indent=4)
DATA_PATH = 'results'
DB_FNAME = osp.join(DATA_PATH,'SynthText.h5')
def get_data():
return h5py.File(DB_FNAME,'r')
def viz_textbb(text_im, imageName, charBB_list, wordBB, textToList, alpha=1.0):
"""
text_im : image containing text
charBB_list : list of 2x4xn_i bounding-box matrices
wordBB : 2x4xm matrix of word coordinates
"""
#print("k",z, type(z))
plt.close(1)
plt.figure(1)
plt.imshow(text_im)
H,W = text_im.shape[:2]
global imnames
#print("MOLIIIM",wordBB)
#DODANO IZ MAIN-a
#**********************************************
db = h5py.File('results/SynthText.h5', 'r')
dsets = sorted(db['data'].keys())
for k in dsets:
db = get_data()
imnames = sorted(db['data'].keys())
start = 0
count = 0
coordinate = []
coordinate1 = []
name = []
name1 = []
final = []
upperList = []
downList = []
counter = 0
FinalFinal = []
imageData = { }
dictList = []
for eachWord in textToList:
length = len(eachWord)
for i in range(0,4):
for j in range(start,length+start):
coordinate.append([charBB_list[0][0][i][j], charBB_list[0][1][i][j]])
coordinate1.append((charBB_list[0][0][i][j], charBB_list[0][1][i][j]))
name.append(coordinate)
name1.append(coordinate1)
coordinate = []
for j in range(0, length):
for i in range(len(name)) :
#print(i,j, name[i][j]) ## koordinate da se snađem, treba
final.append(name[i][j])
#print(name)
#NEŠTA ZA CRTANJE, NEBITNO
if(i == 0 or i == 1):
upperList.append(name[i][j])
if(i == 2):
downList.append(name[i+1][j])
if(i == 3):
downList.append(name[i-1][j])
down = reversed(downList)
joinList = [*upperList,*down,upperList[0]]
FinalFinal.append(joinList)
imageData['transcription']=eachWord
imageData['language']="Latin"
imageData['illegibility']=False
imageData['points']=final
dictionary_copy = imageData.copy()
dictList.append(dictionary_copy)
del(dictionary_copy)
finalToList = np.array(final)
name=[]
final = []
upperList = []
downList = []
start = len(eachWord) + start
#del(dictList[0])
finalDict = {f'gt_{imageName}':dictList}
#print(type(finalDict)) --> dict
#print(imageName,finalDict)
#print(finalDict)
#print(len(textToList))
#print(textToList)
with open("annotation.json") as json_file:
data=json.load(json_file)
temp=data["annotations"]
#temp.append(finalDict)
temp.update(finalDict)
#temp['annotations'] = finalDict
write_json(data)
json_file.close()
for list in FinalFinal:
x,y = zip(*list)
plt.plot(x,y)
#print(x,y)
# points = tuple(zip(x,y))
# # boundaries of the bounding box
# left, right = min(points, key=lambda p: p[0]), max(points, key=lambda p: p[0])
# bottom, top = min(points, key=lambda p: p[1]), max(points, key=lambda p: p[1])
# # area
# base = right[0] - left[0]
# height = top[1] - bottom[1]
# A = base * height
#print(A)
for i in range(len(charBB_list)):
# #print(charBB_list) #ispisuje x-eve za jedan vrh svih instanci pojedinih slova, pa drugi, 3. i 4. i onda posebno y-one
bbs = charBB_list[i]
ni = bbs.shape[-1]
for j in range(ni):
bb = bbs[:,:,j]
bb = np.c_[bb,bb[:,0]] #ako se doda ,bb[:,0] -> printa isto kao i gornji lijevi
#plt.plot(bb[0,:], bb[1,:], 'r', alpha=alpha)
# plot the word-BB:
for i in range(wordBB.shape[-1]):
bb = wordBB[:,:,i] #koordinate wordBB-a
bb = np.c_[bb,bb[:,0]] #spaja skroz lijevu, TREBA
#plt.plot(bb[0,:], bb[1,:], 'g', alpha=alpha)
# visualize the indiv vertices:
vcol = ['r','g','b','k']
#for j in range(4):
#plt.scatter(bb[0,j],bb[1,j],color=vcol[j])
#print(bb) # ----> KOORDINATE wordBB-a
#print(bb[1,j])
plt.gca().set_xlim([0,W-1])
plt.gca().set_ylim([H-1,0])
plt.show(block=False)
def main(db_fname):
db = h5py.File(db_fname, 'r')
dsets = sorted(db['data'].keys())
print ("total number of images : ", colorize(Color.RED, len(dsets), highlight=True))
for k in dsets:
rgb = db['data'][k][...]
charBB = db['data'][k].attrs['charBB']
wordBB = db['data'][k].attrs['wordBB']
txt = db['data'][k].attrs['txt']
textToList = (db['data'][k].attrs['txt']).tolist()
#print(textToList)
viz_textbb(rgb, k,[charBB], wordBB, textToList)
print ("image name : ", colorize(Color.RED, k, bold=True))
print (" ** no. of chars : ", colorize(Color.YELLOW, charBB.shape[-1]))
print (" ** no. of words : ", colorize(Color.YELLOW, wordBB.shape[-1]))
print (" ** text : ", colorize(Color.GREEN, txt))
#print("To know", z[1], type(z[1]))
# OTKOMATI OVO DOLJE AKO ŽELIM STISKAT ENTER
# if 'q' in input("next? ('q' to exit) : "):
# break
db.close()
if __name__=='__main__':
main('results/SynthText.h5')
I have a code that I learn from this link https://www.youtube.com/watch?v=nnH55-zD38I&t=1047s
but I got an error that said:
line 62, in <module>
cv2.putText(imgOriginal,classNames[id], (50,50),cv2.FONT_HERSHEY_COMPLEX,1,(0,255,0),1)
IndexError: list index out of range
[ WARN:0] global C:\Users\runneradmin\AppData\Local\Temp\pip-req-build-m8us58q4\opencv\modules\videoio\src\cap_msmf.cpp
(438) `anonymous-namespace'::SourceReaderCB::~SourceReaderCB terminating async callback
Here is the code:
import cv2
import numpy as np
import os
path = '#2 Image descriptor\Assets\Query'
orb = cv2.ORB_create(nfeatures=1000)
#IMPORT IMAGES
images = []
classNames = []
myList = os.listdir(path)
print('Jumlah Classes = ', len(myList))
#print(myList)
for cl in myList:
imgCur = cv2.imread(f'{path}/{cl}', 0)
images.append(imgCur)
classNames.append(os.path.splitext(cl)[0])
print (classNames)
def findDes(images) :
desList = []
for img in images :
kp, des = orb.detectAndCompute(img, None)
desList.append(des)
return desList
def findID(img, desList, thres=15):
kp2, des2 = orb.detectAndCompute(img, None)
bf = cv2.BFMatcher()
matchList = []
finalVal = -1
try :
for des in desList:
matches = bf.knnMatch(des, des2, k=2)
good = []
for m, n in matches:
if m.distance < 0.7*n.distance:
good.append([m])
matchList.append(len(good))
except :
pass
#print(matchList)
if len(matchList) != 0:
if max(matchList) > thres:
finalVal = matchList.index(max(matchList))
return finalVal
desList = findDes(images)
print(len(desList))
cap = cv2.VideoCapture(0)
#cap = cv2.imread('#2 Image descriptor\Assets\Train\RE8.jpg')
while True:
success, img2 = cap.read()
imgOriginal = img2.copy()
img2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
id = findID(img2, desList)
if id != -1:
cv2.putText(imgOriginal,classNames[id], (50,50),cv2.FONT_HERSHEY_COMPLEX,1,(0,255,0),1)
cv2.imshow('img2', imgOriginal)
if cv2.waitKey(1) == ord('q'):
break
cap.release() #to make sure disable the camera from the code
cv2.destroyAllWindows
I think the error at:
imgCur = cv2.imread(f'{path}/{cl}', 0)
The problem is at this part of your code:
if id != -1:
cv2.putText(imgOriginal,classNames[id], (50,50),cv2.FONT_HERSHEY_COMPLEX,1,(0,255,0),1)
The error is basically telling you that you are trying to index id from the classNames list at an index that doesn't exist in the list. For example, if the classNames list is empty, doing classNames[0] will return the error. You can try debugging like so:
if id != -1:
if id < len(classNames):
cv2.putText(imgOriginal,classNames[id], (50,50),cv2.FONT_HERSHEY_COMPLEX,1,(0,255,0),1)
else:
print(classNames)
print(f"Error: Attempted to index {id} from list of length {len(classNames)}.")
I am trying to debug a data preprocessor for training a CNN. (it tries to load data along with training). Giving out the too many open files error with a dataset of > 400 images (less than roughly 400 works). Here's the code. It's sort of like a mem leak going on, maybe too many files are being queued? Or I am not deleting something(PIL? But it closes files on its own).
def buffered_gen_mp(pil_img_gen, buffer_size=2):
"""
Generator that runs a slow source generator in a separate process.
buffer_size: the maximal number of items to pre-generate (length of the buffer)
"""
buffer = mp.Queue(maxsize=buffer_size-1)
def _buffered_generation_process(pil_img_gen_, buffer):
for (img_fname,img),(limg_fname,limg) in pil_img_gen_:
sio = StringIO.StringIO()
img.save(sio, 'PNG')
img_enc = sio.getvalue()
sio.close()
sio = StringIO.StringIO()
limg.save(sio, 'PNG')
limg_enc = sio.getvalue()
sio.close()
buffer.put(((img_fname,img_enc),(limg_fname,limg_enc)), block=True)
buffer.put(None) # sentinel: signal the end of the iterator
buffer.close()
process = mp.Process(target=_buffered_generation_process, args=(pil_img_gen, buffer))
process.start()
for data in iter(buffer.get, None):
(img_fname,img_enc),(limg_fname,limg_enc) = data
img = Image.open(StringIO.StringIO(img_enc))
limg = Image.open(StringIO.StringIO(limg_enc))
yield ((img_fname,img),(limg_fname,limg))
def ImageFnameGen(data_dir, img=True, label=True, depth=False, disp=False):
for inst_dir in sorted(data_dir.dirs()):
out = []
img_fname = inst_dir/'image.jpg'
limg_fname = inst_dir/'labels.png'
if img:
out.append(img_fname)
if label:
out.append(limg_fname)
yield out
def PilImageGen(img_fname_g):
for fnames in img_fname_g:
out = []
for fname in fnames:
out.append((fname,Image.open(str(fname))))
yield out
def ScaledImageGen(cfg, data_dir=None, randomize=True, loop=True):
img_fname_gen = ImageFnameGen(data_dir)
pil_img_gen = PilImageGen(img_fname_gen)
out = []
for (img_fname,img),(limg_fname,limg) in pil_img_gen:
# resize img and limg
out.append(((img_fname,img),(limg_fname,limg)))
while True:
if randomize:
random.shuffle(out)
for item in out:
yield item
if not loop:
break
def GeomJitImageGen(cfg, scaled_img_gen):
#stuff
while True:
for (img_fname, img), (limg_fname, limg) in scaled_img_gen:
# do some stuff
img = np.asarray(img).astype(np.double)
limg = np.asarray(limg).astype(np.double)
wimg = warper_x(img).clip(0,255).astype(np.uint8)
wlimg = warper_y(limg).clip(0,255).astype(np.uint8)
yield (img_fname, Image.fromarray(wimg)), (limg_fname, Image.fromarray(wlimg))
def PhotoJitImageGen(img_gen):
# define weights, algorithm
while True:
for (img_fname, img), (limg_fname, limg) in img_gen:
alg = np.random.choice(algorithms,p=weights)
jimg = alg(img)
yield (img_fname, jimg), (limg_fname, limg)
class Loader(object):
def __init__(self, args, expt):
# define self.cfg
input_gen = buffered_gen_mp(PhotoJitImageGen(GeomJitImageGen(self.cfg, ScaledImageGen(self.cfg))), buffer_size=32*8)
self.input_gen = input_gen
# stuff
def __call__(self, x_shared, y_shared):
assert(len(x_shared)==len(y_shared))
n = len(x_shared)
for ix in xrange(n):
(img_fname, pil_img), (limg_fname, pil_limg) = self.input_gen.next()
img = np.asarray(pil_img)
limg = np.asarray(pil_limg)
# stuff
I am trying to compare one image with all images of another file , get the difference percentage and print file name of the least difference percentage .... if i try to append the output differences to a list ... i get an error saying " float values cannot be iterated".... this is what i have done so far ....
from itertools import izip
import os
import numpy as np
import cv2
from matplotlib import pyplot as plt
from PIL import Image
import math
res = 0
def take_and_save_picture(im_save):
'''Take a picture and save it
Args:
im_save: filepath where the image should be stored
'''
camera_port = 0
ramp_frames = 30
cap = cv2.VideoCapture(camera_port)
def get_image():
retval, im = cap.read()
return im
for i in xrange(ramp_frames):
temp = get_image()
print("Taking image...")
# Take the actual image we want to keep
camera_capture = get_image()
#im_save_tmp = im_save + '.jpg'
im_save_tmp = im_save
# A nice feature of the imwrite method is that it will automatically choose the
# correct format based on the file extension you provide. Convenient!
cv2.imwrite(im_save_tmp, camera_capture)
# You'll want to release the camera, otherwise you won't be able to create a new
# capture object until your script exits
# del(cap)
img1 = cv2.imread(im_save_tmp, 0)
edges = cv2.Canny(img1, 100, 200)
cv2.imwrite(im_save, edges)
cv2.waitKey(0)
cv2.destroyAllWindows()
def re(path1,path2):
#path1 = raw_input("Enter the path1:")
#path2 = raw_input("Enter the path2:")
i2= Image.open(path2)
listing = os.listdir(path1)
for file in listing:
i1 = Image.open(path1 + file)
assert i1.mode == i2.mode, "Different kinds of images."
assert i1.size == i2.size, "Different sizes."
pairs = izip(i1.getdata(), i2.getdata())
if len(i1.getbands()) == 1:
# for gray-scale jpegs
dif = sum(abs(p1-p2) for p1,p2 in pairs)
else:
dif = sum(abs(c1-c2) for p1,p2 in pairs for c1,c2 in zip(p1,p2))
ncomponents = i1.size[0] * i1.size[1] * 3
res = (dif / 255.0 * 100) / ncomponents
print "Difference (percentage):", res
def main():
capture_img = "/Users/Me/Documents/python programs/New/pro.png"
#img_to_compare = "/Users/Me/Documents/python programs/compare/img2.jpg"
take_and_save_picture(capture_img)
path1 = "/Users/Me/Documents/python programs/New/numbers1/"
path2 = "/Users/Me/Documents/python programs/New/pro.png"
re(path1,path2)
if __name__ == '__main__':
main()
the output is the difference
Difference (percentage): 2.52484809028
Difference (percentage): 2.64822048611
Difference (percentage): 2.64822048611
Difference (percentage): 3.55436197917
the values that i get in "res" have to be stored in a list and the minimum value should be found and printed.... please give me some code ... totally new to python ... thank you ...
You're code must be like this:
#######
list_dif = []
def re(path1,path2):
#path1 = raw_input("Enter the path1:")
#path2 = raw_input("Enter the path2:")
i2= Image.open(path2)
listing = os.listdir(path1)
for file in listing:
i1 = Image.open(path1 + file)
assert i1.mode == i2.mode, "Different kinds of images."
assert i1.size == i2.size, "Different sizes."
pairs = izip(i1.getdata(), i2.getdata())
if len(i1.getbands()) == 1:
# for gray-scale jpegs
dif = sum(abs(p1-p2) for p1,p2 in pairs)
else:
dif = sum(abs(c1-c2) for p1,p2 in pairs for c1,c2 in zip(p1,p2))
ncomponents = i1.size[0] * i1.size[1] * 3
#######
for n in range(ncomponents):
res = (dif / 255.0 * 100) / (ncomponents + 1)
list_dif.append(res)
print "Difference (percentage):", list_dif
Something like this?
def re(path1,path2):
#path1 = raw_input("Enter the path1:")
#path2 = raw_input("Enter the path2:")
i2= Image.open(path2)
listing = os.listdir(path1)
res = []
for file in listing:
i1 = Image.open(path1 + file)
assert i1.mode == i2.mode, "Different kinds of images."
assert i1.size == i2.size, "Different sizes."
pairs = izip(i1.getdata(), i2.getdata())
if len(i1.getbands()) == 1:
# for gray-scale jpegs
dif = sum(abs(p1-p2) for p1,p2 in pairs)
else:
dif = sum(abs(c1-c2) for p1,p2 in pairs for c1,c2 in zip(p1,p2))
ncomponents = i1.size[0] * i1.size[1] * 3
res.append((dif / 255.0 * 100) / ncomponents)
print "Difference (percentage):", res
minimum = min(res) # Find minimum value in res
print(minimum)