I am doing a CNN project to estimate pitch from a spectrogram. The project is already finished and waiting to be presented to my institution, but I would like to improve a little detail to my work.
CNN that I have built must be tested using test (val) datas. I store the data in Google Drive (I build my CNN using Google Colab), and before doing testing with do_test method, I have to load the data. Path are given in the code snippet below.
I am able to load the data. The loaded data is then tested, but my problem is, I do not know which file that I have loaded. Testing result is exported to a Python DataFrame. DataFrame example output is attached here.
I want to put the filename of each file loaded and then put it to Dataframe output result, so I will know each output of a specific file. Right now, I can only know the overall data from 1 song without knowing which file is which (see DataFrame). What should I add to my code to get the filenames? Should I modify image_data code in function get_image_and_label?
Some (I hope) useful items:
Folder containing image. Folder link is given here
Folder containing labels can be accessed here
DataFrame output screenshot
Method do_test. This method is used to execute testing.
model = Sequential()
song_titles = "014_twinkle_twinkle 017_old_mcdonald 020_happy_birthday 022_brother_john 030_london_bridge".split()
image_data_train_global_path = "./drive/MyDrive/1117002_Code Skripsi/Dataset/Dataset TA_1_spectrograms/spectrogram_B_2/train/"
image_data_test_global_path = "./drive/MyDrive/1117002_Code Skripsi/Dataset/Dataset TA_1_spectrograms/spectrogram_B_2/val/"
label_global_path = "./drive/MyDrive/1117002_Code Skripsi/Dataset/Dataset TA_2_label_floor/"
def do_test():
for title in song_titles:
print("Song title: ", title)
label_and_image_test = load_image_and_label(title, process='test') # this is where I load my data.
images_test, labels_test = populate_test_data(label_and_image_test)
X_test, y_test = label_processing(images_test, labels_test)
y_pred = model.predict(X_test)
song_number = 0
unique_pitch_result = []
pitch_class_result = []
distance_result = []
while song_number <= (len(y_pred)-1):
label_sequence = []
for i in y_pred[song_number]:
max = np.max(i)
ind = np.argmax(i)
label_sequence.append(ind)
result_pitch_sequence = [k for element in label_sequence for k,v in unique_pitch_dictionary.items() if element == v]
unique_pitch = np.unique(result_pitch_sequence)
unique_pitch = unique_pitch.tolist()
if (title=="014_twinkle_twinkle"):
s1 = s1_014_twinkle
elif (title=="017_old_mcdonald"):
s1 = s1_017_mcd
elif (title=="020_happy_birthday"):
s1 = s1_020_hbd
elif (title=="022_brother_john"):
s1 = s1_022_john
elif (title=="030_london_bridge"):
s1 = s1_030_london
cost_matrix = DTW.dtw_cost_matrix(s1, result_pitch_sequence)
distance = DTW.dtw_distance(cost_matrix)
pitch_class = [v for element in unique_pitch for k,v in pitch_class_dictionary.items() if element == k]
unique_pitch_result.append(unique_pitch)
pitch_class_result.append(pitch_class)
distance_result.append(distance)
song_number+=1
print("+"*100)
df = DTW.test_result_dataframe(unique_pitch_result, pitch_class_result, distance_result, song_number)
print(df)
filename = title + " " + str(epochs) + " " + str(learning_rate) + " dataframe.xlsx"
df.to_excel(filename)
print("*"*120)
Function load_image_and_label. This is used to get data path, then execute file loading from directory.
def load_image_and_label(title, process):
print("Title: ", title)
label_specific_path = label_global_path + title
if (process == "train-test"):
image_data_train_specific_path = image_data_train_global_path + title
image_training_data = get_image_and_label(image_data_train_specific_path, label_specific_path)
image_data_test_specific_path = image_data_test_global_path + title
image_testing_data = get_image_and_label(image_data_test_specific_path, label_specific_path)
if (process == "train-test"):
return image_training_data, image_testing_data
else:
return image_testing_data
Function get_image_and_label. This is where I try to get my files. I think I have to modify something here to output the filename.
def get_image_and_label(image_path, label_path):
print("Getting image data from: ", image_path)
image_data = [] # contains array of loaded image and its label. Maybe, should I add code to append the filename here?
tm = time.time()
for img_item in os.listdir(image_path): #for every image in path
try:
img_array = cv2.imread(os.path.join(image_path, img_item))
# preprocess image
spectrogram_preprocessing = resize_recolor_spectrogram(img_array)
# imread to array
spectrogram_preprocessing = np.array(spectrogram_preprocessing)
# image shape - transpose
spectrogram_preprocessing = np.transpose(spectrogram_preprocessing)
# extract image labels
label = extract_pitch_label(os.path.join(label_path, img_item))
image_data.append([spectrogram_preprocessing, label]) # add image data and its label to image_data array. What code should I add here to get filename?
except Exception as e:
raise e
print("Got "+str(len(image_data))+" images from " + image_path + " in "+str(time.time() - tm)+ " seconds")
print("-"*110)
return image_data
This is the output if I have succeeded in loading my data.
Song title: 014_twinkle_twinkle
Title: 014_twinkle_twinkle
Getting image data from: ./drive/MyDrive/1117002_Code Skripsi/Dataset/Dataset TA_1_spectrograms/spectrogram_B_2/val/014_twinkle_twinkle
Got 32 images from ./drive/MyDrive/1117002_Code Skripsi/Dataset/Dataset TA_1_spectrograms/spectrogram_B_2/val/014_twinkle_twinkle in 0.29237914085388184 seconds
---------------------------------------------------------------------------------------------------------
I have found the solution. it turns out that at this loop declaration:
for img_item in os.listdir(image_path):
img_item is actually the filename, and I do not need to add any more code.
Related
with the following code, I augmented some images and I would like to know how I can make each generated image have the corresponding label
I try with save_prefix = +label but it just prints the same name for all images
I appreciate any help or comment
def get_augmt(image,model):
i = 0
img_list =[]
for batch in model.flow (x= image,
batch_size = 1,
save_to_dir ='probando',
save_prefix =" " + label,
save_format ='jpeg',
subset = None):
i += 1
img_list.append(batch)
if i > 5:
return img_list ```
pr["img_aug"] = pr["images"].apply(lambda x: get_augmt(x,datagen))
It prints the same name for all images 'couse you're taking the label variable as global and never change it in the function.
If in pr["image"] you are able to attach to each image a label it will be easier to access it from the function get_augmt.
Otherways you can use i instead and the code would be:
def get_augmt(image,model):
i = 0
img_list =[]
for batch in model.flow (x= image,
batch_size = 1,
save_to_dir ='probando',
save_prefix =" " + str(i),
save_format ='jpeg',
subset = None):
i += 1
img_list.append(batch)
if i > 5:
return img_list ```
pr["img_aug"] = pr["images"].apply(lambda x: get_augmt(x,datagen))
I'm creating an image popularity algorithm that cuts a video% .mp4 into frames. With the help of AI, the program examines which frames probably display the most beautiful images; the result of this is expressed in 'score'.
This works but I encounter a problem. Because certain frames in a video are very similar, I have many frames with (almost) the same score.
In the end result, a list is generated with [score, frame number]. I want, for example, if 3 items in the list are almost identical frame numbers and therefore (almost) identical scores, I only keep the frame number in the list with the highest score in order to remove duplicates.
It has something to do with this line: result.append((predict(pil_image, model), name))
Here is the code:
import os
import torch
import torchvision.models
import torchvision.transforms as transforms
from PIL import Image
import json
import cv2
def prepare_image(image):
if image.mode != 'RGB':
image = image.convert("RGB")
Transform = transforms.Compose([
transforms.Resize([224, 224]),
transforms.ToTensor(),
])
image = Transform(image)
image = image.unsqueeze(0)
return image
def predict(image, model):
image = prepare_image(image)
with torch.no_grad():
preds = model(image)
score = preds.detach().numpy().item()
print("Picture score: " + str(round(score, 2)) + " | frames left: " +str(framesToDo))
return str(round(score, 2))
if __name__ == '__main__':
model = torchvision.models.resnet50()
model.fc = torch.nn.Linear(in_features=2048, out_features=1)
model.load_state_dict(torch.load('model/model-resnet50.pth', map_location=torch.device('cpu')))
model.eval()
result = []
# In de folder videos are videos saved with the name of 1 until 23
for i in range(1, 23):
vidcap = cv2.VideoCapture('./video/' + str(i) + '.mp4')
succes, vidcap_image = vidcap.read()
count = 0
framestep = 500 #for Stackoverflow example
framesToDo = vidcap.get(cv2.CAP_PROP_FRAME_COUNT)
# while succes and count < max_frames
while succes and count < int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT)): #maximum amount of frames in video
name = str(i) + '_' + str(count)
cv2.imwrite("./frames_saved/" + 'vid' + '_' + name + ".jpg", vidcap_image) # save frame as jpg image
count += framestep # 500 frames further
framesToDo = framesToDo - framestep
cv2_image = cv2.cvtColor(vidcap_image, cv2.COLOR_BGR2RGB)
pil_image = Image.fromarray(cv2_image)
result.append((predict(pil_image, model), name))
succes, vidcap_image = vidcap.read()
result.sort(reverse=False)
print(result)
with open('result.json', 'w') as filehandle:
filehandle.write(json.dumps(result))````
Since there is no reproducible example, you can adapt this to solve your problem, this analyses each frame data and skips unnecessary ones, updates the best values and append new values.
MAX_FRAME_NUMBER_DIFF = 60
MAX_SCORE_DIFF = 0.5
current_frame = count
current_score = predict(pil_image, model)
data = (current_score, current_frame)
if not results:
results.append(data)
else:
last_score, last_frame = results[-1]
is_similar_frame = current_frame - last_frame <= MAX_FRAME_NUMBER_DIFF
is_score_better = current_score > last_score
is_score_way_better = current_score - last_score <= MAX_SCORE_DIFF
if is_similar_frame:
if is_score_better:
if is_score_way_better: # if diff between current score and previous score bigger than MAX_SCORE_DIFF
results.append(data)
else: # current score better than previous but not so better
results[-1] = data # update last value
else: # current score not better than previous
continue # skip this one
else: # if not similar frames
results.append(data)
I have a folder with >100,000 images, which I would like to classify using TensorFlow. I wrote a for loop that iterates over each image, returns a confidence score, and stores the predictions to a csv file.
The problem is: The script starts very quickly (approx. 10 images per second for images 1-1000) and gradually slows down with each iteration (only about 1 image per second for images >1000).
For similar slow-down issues of for loops in Python, I read that pre-allocation might be a solution. However, I am writing directly to a csv and not a list so I am unsure how this should help.
Is there any way to ensure consistent speed during the entire loop?
Thank you in advance for any pointer!
Please find my code below, which is based on this tutorial (https://codelabs.developers.google.com/codelabs/tensorflow-for-poets/#0):
filename = "predictions.csv"
f = open(filename, "w")
headers = "id;image_name;confidence\n"
f.write(headers)
start = 1
end = 20000
testdata = "C:/files/"
files = list(os.listdir(testdata))
for index in range(start, end+1):
filename = files[index]
if not filename.startswith('.'):
print(str(index) + " - " + str(filename))
image=testdata+filename
results = label_image(image, graph, session, input_height=299, input_width=299, input_layer="Mul")
f.write(str(index) + ";" + str(filename) + ";" + str(results[0]) + "\n")
print("\n")
f.close()
EDIT:
I am loading the graph just once before running the loop.
from scripts.label_image import load_graph, label_image, get_session
model_file = "retrained_graph.pb"
graph = load_graph(model_file)
session = get_session(graph)
EDIT 2:
This is the code of the label_image function.
def label_image(file_name, graph, session, label_file="retrained_labels.txt", input_height=224, input_width=224, input_mean=128, input_std=128, input_layer="input", output_layer="final_result"):
t = read_tensor_from_image_file(file_name,
input_height=input_height,
input_width=input_width,
input_mean=input_mean,
input_std=input_std)
input_name = "import/" + input_layer
output_name = "import/" + output_layer
input_operation = graph.get_operation_by_name(input_name);
output_operation = graph.get_operation_by_name(output_name);
start = time.time()
results = session.run(output_operation.outputs[0],
{input_operation.outputs[0]: t})
end=time.time()
results = np.squeeze(results)
top_k = results.argsort()[-5:][::-1]
labels = load_labels(label_file)
print('\nEvaluation time (1-image): {:.3f}s\n'.format(end-start))
template = "{} (score={:0.5f})"
for i in top_k:
print(template.format(labels[i], results[i]))
return results
EDIT 3:
This is the code of the read_tensor_from_image_file function.
def read_tensor_from_image_file(file_name, input_height=299, input_width=299,
input_mean=0, input_std=255):
input_name = "file_reader"
output_name = "normalized"
file_reader = tf.read_file(file_name, input_name)
if file_name.endswith(".png"):
image_reader = tf.image.decode_png(file_reader, channels = 3,
name='png_reader')
elif file_name.endswith(".gif"):
image_reader = tf.squeeze(tf.image.decode_gif(file_reader,
name='gif_reader'))
elif file_name.endswith(".bmp"):
image_reader = tf.image.decode_bmp(file_reader, name='bmp_reader')
else:
image_reader = tf.image.decode_jpeg(file_reader, channels = 3,
name='jpeg_reader')
float_caster = tf.cast(image_reader, tf.float32)
dims_expander = tf.expand_dims(float_caster, 0);
resized = tf.image.resize_bilinear(dims_expander, [input_height, input_width])
normalized = tf.divide(tf.subtract(resized, [input_mean]), [input_std])
sess = tf.Session()
result = sess.run(normalized)
return result
EDIT 4:
This is my refactored code, which throws me the error:
AttributeError: 'Tensor' object has no attribute 'endswith'
def process_image(file_name):
input_name = "file_reader"
output_name = "normalized"
file_reader = tf.read_file(file_name, input_name)
if file_name.endswith(".png"):
image_reader = tf.image.decode_png(file_reader, channels = 3,
name='png_reader')
elif file_name.endswith(".gif"):
image_reader = tf.squeeze(tf.image.decode_gif(file_reader,
name='gif_reader'))
elif file_name.endswith(".bmp"):
image_reader = tf.image.decode_bmp(file_reader, name='bmp_reader')
else:
image_reader = tf.image.decode_jpeg(file_reader, channels = 3,
name='jpeg_reader')
float_caster = tf.cast(image_reader, tf.float32)
dims_expander = tf.expand_dims(float_caster, 0);
resized = tf.image.resize_bilinear(dims_expander, [input_height, input_width])
normalized = tf.divide(tf.subtract(resized, [input_mean]), [input_std])
return normalized
filename_placeholder = tf.placeholder(tf.string)
processed = process_image(filename_placeholder)
def label_image(file_name, graph, session, label_file="tf_files/retrained_labels.txt", input_height=224, input_width=224, input_mean=128, input_std=128, input_layer="input", output_layer="final_result"):
result = sess.run(processed, feed_dict={filename_placeholder: file_name})
input_name = "import/" + input_layer
output_name = "import/" + output_layer
input_operation = graph.get_operation_by_name(input_name);
output_operation = graph.get_operation_by_name(output_name);
start = time.time()
results = session.run(output_operation.outputs[0],
{input_operation.outputs[0]: t})
end=time.time()
results = np.squeeze(results)
top_k = results.argsort()[-5:][::-1]
labels = load_labels(label_file)
print('\nEvaluation time (1-image): {:.3f}s\n'.format(end-start))
template = "{} (score={:0.5f})"
for i in top_k:
print(template.format(labels[i], results[i]))
return results
The problem lies within the read_tensor_from_image_file function. This function is called in each iteration of the loop. Within the function you are creating Tensorflow ops. As a rule of thumb, tf.anything calls are responsible for building the computational graph. They should only ever be called once and then ran repeatedly using a tf.Session. As it is, you are constantly growing the size of your computational graph with "clones" of the same image processing ops, which slows down execution gradually as your graph grows larger.
You should refactor your code such that the op definitions in read_tensor_from_image_file are only executed once, and only do the sess.run(normalized) part within the loop. You can use a tf.placeholder for the input (file name). Also, you shouldn't create a new session each time the function is called -- instead pass through the session from label_image.
Here is a reduced example of how to refactor code like this. Let's say we have a function to create the image processing ops:
def process_image(file_name):
file_reader = tf.read_file(file_name, input_name)
...
normalized = tf.divide(tf.subtract(resized, [input_mean]), [input_std])
return normalized
This is basically your read_tensor_from_image_file function except for the last part involving a session. What you currently do is basically
def label_image(file_name, ...):
processed = process_image(file_name)
sess = tf.Session()
result = sess.run(processed)
....
for file_name in files:
label_image(file_name, ...)
Instead, what you should do is
filename_placeholder = tf.placeholder(tf.string)
processed = process_image(filename_placeholder)
def label_image(file_name, ...):
result = sess.run(processed, feed_dict={filename_placeholder: file_name})
....
for file_name in files:
label_image(file_name, ...)
The important difference is that we moved the process_image call out of the loop and only run it inside. Also we do not create a new session continuously. The global variables are a bit icky but you should get the idea.
The only thing I'm not sure about is whether you can use the session you got from get_session(graph) to run the processed tensor. If this doesn't work (i.e. crashes) then you will need to create a second session to run this stuff, however you should only do this once after you call process_image, not repeatedly inside the loop.
Pre-allocation is still possible. Read the size of the full file, then allocate memory for whatever object.
Where do you save files? If performance takes a dive during a loop it's usually indicative of accessing an ever-growing container.
Try writing all information in a pre-allocated array and then write all data entries at once in the *.csv, instead of opening the ever-growing *.csv file once every loop. This will fix the issue in this case. I'm assuming that results = label_image(image, graph, session, input_height=299, input_width=299, input_layer="Mul") does not open any container but works only on the input arguments.
I am currently writing a Bag Of Words analyser using OpenCV and Python. I have been able to extract the info from the images, learn but the problem is the training part. As a baseline, I am using this and translating it into Python, but I get to the learn part and when it should train the classifier it fails saying that it has an unsupported response type (I assume labels is not in the correct format). I have tried a bit of everything but I cannot get it to work. Any ideas?
import cv2
import os
import numpy as np
dictSize = 1000
retries = 1
flags = cv2.KMEANS_PP_CENTERS
tc = (cv2.TERM_CRITERIA_MAX_ITER, 10, 0.001)
matcher = cv2.DescriptorMatcher_create("FlannBased")
extractor = cv2.DescriptorExtractor_create("SURF")
detector = cv2.FeatureDetector_create("SURF")
bowTrainer = cv2.BOWKMeansTrainer(dictSize,tc,retries,flags)
bowDE = cv2.BOWImgDescriptorExtractor(extractor,matcher)
def extractTrainingVocabulary(path):
global bowTrainer
global extractor
lst=os.listdir(path)
for i in range(0,len(lst)):
if lst[i][0] != ".":
fullPath = path + lst[i]
print "Processing Image " + fullPath
img = cv2.imread(fullPath)
if not (len(img) == 0):
keypoints = detector.detect(img)
if (len(keypoints) == 0):
print "Warning! Could not find any keypoints in image " + fullPath
else:
# Returns 2 vars. The underscore is used to discard the first one
_,features = extractor.compute(img,keypoints)
bowTrainer.add(features)
else:
print "Could not read image " + fullPath
def extractBOWDescriptor(path, descriptors, labels):
global bowTrainer
global extractor
lst=os.listdir(path)
for i in range(0,len(lst)):
if lst[i][0] != ".":
fullPath = path + lst[i]
print "Processing Image " + fullPath
img = cv2.imread(fullPath)
if not (len(img) == 0):
keypoints = detector.detect(img)
if (len(keypoints) == 0):
print "Warning! Could not find any keypoints in image " + fullPath
else:
bowDescriptor = bowDE.compute(img,keypoints)
# descriptors.append(bowDescriptor)
#np.vstack((descriptors,bowDescriptor))
descriptors = np.vstack((descriptors,bowDescriptor))
#labels.append(lst[i][:-4])
labels = np.vstack((labels,float(lst[i][:-4])))
else:
print "Could not read image " + fullPath
return labels, descriptors
def main():
global bowDE
# LEARN
print "Creating Dict..."
extractTrainingVocabulary("./testImages/")
descriptors = bowTrainer.getDescriptors()
print "Clustering " + str(len(descriptors)) + " features. This might take a while..."
dictionary = bowTrainer.cluster()
print "Done clustering"
# EXTRACT
size1 = 0,dictSize
trainingData = np.zeros(size1,dtype=np.float32)
size2 = 0,1
labels = np.zeros(size2,dtype=np.float32)
bowDE.setVocabulary(dictionary)
labels,trainingData = extractBOWDescriptor("./evalImages/",trainingData,labels)
print(trainingData)
print(labels)
print "Training classifier"
size3 = len(trainingData),len(trainingData[0])
responseData = np.zeros(size3,dtype=np.float32)
classifier = cv2.NormalBayesClassifier()
classifier.train(trainingData,labels)
main()
EDIT
as per #berak's suggestion, I changed the following:
labels = np.vstack((labels,float(lst[i][:-4]))) -> labels = np.vstack((labels,int(lst[i][:-4])))
labels = np.zeros(size2,dtype=np.float32) -> labels = np.zeros(size2,dtype=np.int32)
Unfortunately still fails. Now I get the following:
error: (-5) There is only a single class in function cvPreprocessCategoricalResponses
I am working on a logo classifier/recognizer using Python 2.7.5 and OpenCV 2.4.8,
I have several images of the same logo but in different forms and presentations, I would like to train the classifier with that information and at the final recover the name of that logo regardless the form or presentation.
I would like to know how to train a KNN classifier using that information, I have the code that extracts the keypoints and descriptors using SURF, and I am storing that data directly on the hard disk.
def FeatureDetector(cvImage=None, filename=None):
template = dict()
hessian_threshold = 5000
if(filename is not None):
inputImage = cv.imread(filename)
if(cvImage is not None):
inputImage = cvImage
imageGray = cv.cvtColor(inputImage, cv.COLOR_BGR2GRAY)
detector = cv.SURF(hessian_threshold)
keypoints, descriptors = detector.detectAndCompute(imageGray, None, useProvidedKeypoints = False)
template["image"] = inputImage
template["array"] = imageGray
template["keypoints"] = keypoints
template["descriptors"] = descriptors
return template
def saveKeypoints(filename, keypoints):
kArray = []
for point in keypoints:
keypoint = (point.pt, point.size, point.angle, point.response, point.octave, point.class_id)
kArray.append(keypoint)
with open(filename, "wb") as outputFile:
pickle.dump(kArray, outputFile)
return
def detection(logoName, extension, show=False):
imagePath = PATHS["logos"] + logoName + "/"
if(os.path.exists(imagePath)):
count = 1
while(True):
filename = imagePath + str(count) + "." + extension
if(not os.path.exists(filename)):
print "[!] File '%s' not found, the end of sequence was reached"%(filename)
break
temp = FeatureDetector(filename = filename)
saveKeypoints(PATHS["keypoints"] + inputName + "/" + str(count) + ".kp", temp["keypoints"])
np.save(PATHS["descriptors"] + inputName + "/" + str(count) + ".npy", temp["descriptors"])
np.save(PATHS["arrays"] + inputName + "/" + str(count) + ".npy", temp["array"])
if(show):
showFeatures(filename, temp)
print "[O] Processed '%s'"%(filename)
count += 1
else:
print "[X] Logo not found\n"
return
Then, I have another script that load the data and trains the KNN but only with one form of a logo. I would like to train the classifier with all the forms of the logo, using all the keypoints and descriptors that I have and recover only one result.
def loadKeypoints(path):
keypoints = []
try:
with open(PATHS["keypoints"] + path + ".kp", "rb") as inputFile:
kArray = pickle.load(inputFile)
for point in kArray:
feature = cv.KeyPoint(
x=point[0][0],
y=point[0][1],
_size=point[1],
_angle=point[2],
_response=point[3],
_octave=point[4],
_class_id=point[5]
)
keypoints.append(feature)
except:
return False
return keypoints
def loadSURF():
global TEMPLATES, LOGOS
for logo in LOGOS:
TEMPLATES[logo] = list()
count = 1
while(True):
path = "%s/%d"%(logo, count)
keypoints = loadKeypoints(path)
if(not keypoints):
print "[!] Template for '%s' not found, the end of sequence was reached"%(path)
break
descriptors = np.load(PATHS["descriptors"] + path + ".npy")
array = np.load(PATHS["arrays"] + path + ".npy")
template = {
"keypoints": keypoints,
"descriptors": descriptors,
"array": array
}
print "[O] Template loaded from %s"%(path)
TEMPLATES[logo].append(template)
count += 1
return
def SURFCompare(temp, image):
samples = temp["descriptors"]
responses = np.arange(len(temp["keypoints"]), dtype=np.float32)
knn = cv.KNearest()
knn.train(samples, responses)
for template in TEMPLATES:
pattern = TEMPLATES[template]
for t in pattern:
for h, des in enumerate(t["descriptors"]):
des = np.array(des,np.float32).reshape((1,128))
retval, results, neigh_resp, dists = knn.find_nearest(des,1)
res, dist = int(results[0][0]), dists[0][0]
if dist < 0.1: # draw matched keypoints in red color
color = (0,0,255)
print template
else: # draw unmatched in blue color
color = (255,0,0)
#Draw matched key points on original image
x,y = temp["keypoints"][res].pt
center = (int(x),int(y))
cv.circle(image,center,2,color,-1)
return True
Is that possible?
Is the KNN classifier the best approaching or there are another better options? Also I am thinking on use a FLANN matcher.
I don't know if it is the best options because actually I'm only recongnize one logo in one form but I expect to have the possibility of recognize more than one logo in several forms each one.
Thanks in advance.