I have this code here wherein I have imported the pre-trained Tensorflow Hub model to find the semantic similarity for sentences.
My code:
module_url = "https://tfhub.dev/google/universal-sentence-encoder/4"
model = hub.load(module_url)
print ("module %s loaded" % module_url)
doc = ["#banjosh i barely did survive. you have no idea. i should have definitely worn stretchy pants. but, anyhow, we missed you ",
"#BanAndCustMov Thanks, I got complete series four of Doctor Who X","#bananchips Thanks, might need it anyway ",
"#azandiaMJBB Doing well, thanks. One of the jobs I interviewed for got filled. No feedback on the 2nd interview. ",
"#azandiaMJBB You are very welcome, Mary - it is a lovely quote. ",
"#AZBlueEyes He is... but they don't make donuts. Bread, cookies, brownies... etc. Donuts, use different equipment & fryers. Pizza... YAY!"]
query = ["make donuts"]
bank_vec = model(doc)
query_vec = model(query)
correlation = np.transpose(np.inner(query_vec,bank_vec))
print("Closest match found to '",query[0],"' is '",doc[np.argmax(correlation, axis=0)[0]],"'")
print("Correlation matrix shape: ",correlation.shape)
sns.set(font_scale=1)
g = sns.heatmap(
correlation,
xticklabels=query,
yticklabels=doc,
vmin=0,
vmax=1,
cmap="YlOrRd")
Here doc contains sentences and query contains the text I want to search the similarity for. Here I want to pass a twitter corpus of 50k tweets in a csv or txt format. And for the query part I want the user to input the sentence after I run the code. How can I do that ?
Mentioning the solution in this (Answer) section, even though it is present in Comments Section, for the benefit of the Community.
Complete working code for an example is shown below:
!pip install apache_beam
!pip install sklearn
!pip install annoy
import os
import sys
import pickle
from collections import namedtuple
from datetime import datetime
import numpy as np
import apache_beam as beam
from apache_beam.transforms import util
import tensorflow as tf
import tensorflow_hub as hub
import annoy
from sklearn.random_projection import gaussian_random_matrix
print('TF version: {}'.format(tf.__version__))
print('TF-Hub version: {}'.format(hub.__version__))
print('Apache Beam version: {}'.format(beam.__version__))
!wget 'https://dataverse.harvard.edu/api/access/datafile/3450625?format=tab&gbrecs=true' -O raw.tsv
!wc -l raw.tsv
!head raw.tsv
!rm -r corpus
!mkdir corpus
with open('corpus/text.txt', 'w') as out_file:
with open('raw.tsv', 'r') as in_file:
for line in in_file:
headline = line.split('\t')[1].strip().strip('"')
out_file.write(headline+"\n")
!tail corpus/text.txt
embed_fn = None
def generate_embeddings(text, module_url, random_projection_matrix=None):
# Beam will run this function in different processes that need to
# import hub and load embed_fn (if not previously loaded)
global embed_fn
if embed_fn is None:
embed_fn = hub.load(module_url)
embedding = embed_fn(text).numpy()
if random_projection_matrix is not None:
embedding = embedding.dot(random_projection_matrix)
return text, embedding
def to_tf_example(entries):
examples = []
text_list, embedding_list = entries
for i in range(len(text_list)):
text = text_list[i]
embedding = embedding_list[i]
features = {
'text': tf.train.Feature(
bytes_list=tf.train.BytesList(value=[text.encode('utf-8')])),
'embedding': tf.train.Feature(
float_list=tf.train.FloatList(value=embedding.tolist()))
}
example = tf.train.Example(
features=tf.train.Features(
feature=features)).SerializeToString(deterministic=True)
examples.append(example)
return examples
# Beam pipeline
def run_hub2emb(args):
'''Runs the embedding generation pipeline'''
options = beam.options.pipeline_options.PipelineOptions(**args)
args = namedtuple("options", args.keys())(*args.values())
with beam.Pipeline(args.runner, options=options) as pipeline:
(
pipeline
| 'Read sentences from files' >> beam.io.ReadFromText(
file_pattern=args.data_dir)
| 'Batch elements' >> util.BatchElements(
min_batch_size=args.batch_size, max_batch_size=args.batch_size)
| 'Generate embeddings' >> beam.Map(
generate_embeddings, args.module_url, args.random_projection_matrix)
| 'Encode to tf example' >> beam.FlatMap(to_tf_example)
| 'Write to TFRecords files' >> beam.io.WriteToTFRecord(
file_path_prefix='{}/emb'.format(args.output_dir),
file_name_suffix='.tfrecords')
)
def generate_random_projection_weights(original_dim, projected_dim):
random_projection_matrix = None
random_projection_matrix = gaussian_random_matrix(
n_components=projected_dim, n_features=original_dim).T
print("A Gaussian random weight matrix was creates with shape of {}".format(random_projection_matrix.shape))
print('Storing random projection matrix to disk...')
with open('random_projection_matrix', 'wb') as handle:
pickle.dump(random_projection_matrix,
handle, protocol=pickle.HIGHEST_PROTOCOL)
return random_projection_matrix
# Set parameters
module_url = 'https://tfhub.dev/google/tf2-preview/nnlm-en-dim128/1' ##param {type:"string"}
projected_dim = 64 ##param {type:"number"}
# Run pipeline
import tempfile
output_dir = tempfile.mkdtemp()
original_dim = hub.load(module_url)(['']).shape[1]
random_projection_matrix = None
if projected_dim:
random_projection_matrix = generate_random_projection_weights(
original_dim, projected_dim)
args = {
'job_name': 'hub2emb-{}'.format(datetime.utcnow().strftime('%y%m%d-%H%M%S')),
'runner': 'DirectRunner',
'batch_size': 1024,
'data_dir': 'corpus/*.txt',
'output_dir': output_dir,
'module_url': module_url,
'random_projection_matrix': random_projection_matrix,
}
print("Pipeline args are set.")
args
print("Running pipeline...")
%time run_hub2emb(args)
print("Pipeline is done.")
!ls {output_dir}
# Read some of the generated embeddings...
embed_file = os.path.join(output_dir, 'emb-00000-of-00001.tfrecords')
sample = 5
# Create a description of the features.
feature_description = {
'text': tf.io.FixedLenFeature([], tf.string),
'embedding': tf.io.FixedLenFeature([projected_dim], tf.float32)
}
def _parse_example(example):
# Parse the input `tf.Example` proto using the dictionary above.
return tf.io.parse_single_example(example, feature_description)
dataset = tf.data.TFRecordDataset(embed_file)
for record in dataset.take(sample).map(_parse_example):
print("{}: {}".format(record['text'].numpy().decode('utf-8'), record['embedding'].numpy()[:10]))
## 3. Build the ANN Index for the Embeddings
def build_index(embedding_files_pattern, index_filename, vector_length,
metric='angular', num_trees=100):
'''Builds an ANNOY index'''
annoy_index = annoy.AnnoyIndex(vector_length, metric=metric)
# Mapping between the item and its identifier in the index
mapping = {}
embed_files = tf.io.gfile.glob(embedding_files_pattern)
num_files = len(embed_files)
print('Found {} embedding file(s).'.format(num_files))
item_counter = 0
for i, embed_file in enumerate(embed_files):
print('Loading embeddings in file {} of {}...'.format(i+1, num_files))
dataset = tf.data.TFRecordDataset(embed_file)
for record in dataset.map(_parse_example):
text = record['text'].numpy().decode("utf-8")
embedding = record['embedding'].numpy()
mapping[item_counter] = text
annoy_index.add_item(item_counter, embedding)
item_counter += 1
if item_counter % 100000 == 0:
print('{} items loaded to the index'.format(item_counter))
print('A total of {} items added to the index'.format(item_counter))
print('Building the index with {} trees...'.format(num_trees))
annoy_index.build(n_trees=num_trees)
print('Index is successfully built.')
print('Saving index to disk...')
annoy_index.save(index_filename)
print('Index is saved to disk.')
print("Index file size: {} GB".format(
round(os.path.getsize(index_filename) / float(1024 ** 3), 2)))
annoy_index.unload()
print('Saving mapping to disk...')
with open(index_filename + '.mapping', 'wb') as handle:
pickle.dump(mapping, handle, protocol=pickle.HIGHEST_PROTOCOL)
print('Mapping is saved to disk.')
print("Mapping file size: {} MB".format(
round(os.path.getsize(index_filename + '.mapping') / float(1024 ** 2), 2)))
embedding_files = "{}/emb-*.tfrecords".format(output_dir)
embedding_dimension = projected_dim
index_filename = "index"
!rm {index_filename}
!rm {index_filename}.mapping
%time build_index(embedding_files, index_filename, embedding_dimension)
!ls
# 4. Use the Index for Similarity Matching
# Load the index and the mapping files
index = annoy.AnnoyIndex(embedding_dimension)
index.load(index_filename, prefault=True)
print('Annoy index is loaded.')
with open(index_filename + '.mapping', 'rb') as handle:
mapping = pickle.load(handle)
print('Mapping file is loaded.')
# Similarity matching method
def find_similar_items(embedding, num_matches=5):
'''Finds similar items to a given embedding in the ANN index'''
ids = index.get_nns_by_vector(
embedding, num_matches, search_k=-1, include_distances=False)
items = [mapping[i] for i in ids]
return items
# Extract embedding from a given query
# Load the TF-Hub module
print("Loading the TF-Hub module...")
%time embed_fn = hub.load(module_url)
print("TF-Hub module is loaded.")
random_projection_matrix = None
if os.path.exists('random_projection_matrix'):
print("Loading random projection matrix...")
with open('random_projection_matrix', 'rb') as handle:
random_projection_matrix = pickle.load(handle)
print('random projection matrix is loaded.')
def extract_embeddings(query):
'''Generates the embedding for the query'''
query_embedding = embed_fn([query])[0].numpy()
if random_projection_matrix is not None:
query_embedding = query_embedding.dot(random_projection_matrix)
return query_embedding
extract_embeddings("Hello Machine Learning!")[:10]
# Enter a query to find the most similar items
##title { run: "auto" }
query = "confronting global challenges" ##param {type:"string"}
print("Generating embedding for the query...")
%time query_embedding = extract_embeddings(query)
print("")
print("Finding relevant items in the index...")
%time items = find_similar_items(query_embedding, 10)
print("")
print("Results:")
print("=========")
for item in items:
print(item)
For more details and for the explanation of the code, please refer the Google Colab Example in the link, https://colab.sandbox.google.com/github/tensorflow/hub/blob/master/examples/colab/tf2_semantic_approximate_nearest_neighbors.ipynb#scrollTo=9qOVy-_vmuUP
Related
I have an object detection dataset with RGB images and annotations in Json. I use a custom DataLoader class to read the images and the labels. One issue that I’m facing is that I would like to skip images when training my model if/when labels don’t contain certain objects.
For example, If one image doesn’t contain any target labels belonging to the class ‘Cars’, I would like to skip them. When parsing my Json annotation, I tried checking for labels that don’t contain the class ‘Cars’ and returned None. Subsequently, I used a collate function to filter the None but unfortunately, It is not working.
import torch
from torch.utils.data.dataset import Dataset
import json
import os
from PIL import Image
from torchvision import transforms
#import cv2
import numpy as np
general_classes = {
# Cars
"Toyota Corolla" : 0,
"VW Golf" : 0,
"VW Beetle" : 0,
# Motor-cycles
"Harley Davidson" : 1,
"Yamaha YZF-R6" : 1,
}
car_classes={
"Toyota Corolla" : 0,
"VW Golf" : 0,
"VW Beetle" : 0
}
def get_transform(train):
transforms = []
# converts the image, a PIL image, into a PyTorch Tensor
transforms.append(T.ToTensor())
if train:
# during training, randomly flip the training images
# and ground-truth for data augmentation
transforms.append(T.RandomHorizontalFlip(0.5))
return T.Compose(transforms)
def my_collate(batch):
batch = list(filter(lambda x: x is not None, batch))
return torch.utils.data.dataloader.default_collate(batch)
class FilteredDataset(Dataset):
# The dataloader will skip the image and corresponding labels based on the dictionary 'car_classes'
def __init__(self, data_dir, transforms):
self.data_dir = data_dir
img_folder_list = os.listdir(self.data_dir)
self.transforms = transforms
imgs_list = []
json_list = []
self.filter_count=0
self.filtered_label_list=[]
for img_path in img_folder_list:
#img_full_path = self.data_dir + img_path
img_full_path=os.path.join(self.data_dir,img_path)
json_file = os.path.join(img_full_path, 'annotations-of-my-images.json')
img_file = os.path.join(img_full_path, 'Image-Name.png')
json_list.append(json_file)
imgs_list.append(img_file)
self.imgs = imgs_list
self.annotations = json_list
total_count=0
for one_annotation in self.annotations:
filtered_obj_id=[]
with open(one_annotation) as f:
img_annotations = json.load(f)
parts_list = img_annotations['regions']
for part in parts_list:
current_obj_id = part['tags'][0] # bbox label
check_obj_id = general_classes[current_obj_id]
if(check_obj_id==0):
subclass_id=car_classes[current_obj_id]
filtered_obj_id.append(subclass_id)
total_count=total_count+1
if(len(filtered_obj_id)>0):
self.filter_count=self.filter_count+1
self.filtered_label_list.append(one_annotation)
print("The total number of the objects in all images: ",total_count)
# get one image and the bboxes,img_id, labels of parts, etc in the image as target.
def __getitem__(self, idx):
img_path = self.imgs[idx]
image_id = torch.tensor([idx])
with open(self.annotations[idx]) as f:
img_annotations = json.load(f)
parts_list = img_annotations['regions']
obj_ids = []
boxes = []
for part in parts_list:
obj_id = part['tags'][0]
check_obj_id = general_classes[obj_id]
if(check_obj_id==0):
obj_id=car_classes[obj_id]
obj_ids.append(obj_id)
#print("---------------------------------------------------")
if(len(obj_ids)>0):
img = Image.open(img_path).convert("RGB")
labels = torch.as_tensor(obj_ids, dtype = torch.int64)
target = {}
target['labels'] = labels
if self.transforms is not None:
img, target = self.transforms(img, target)
return img, target
else:
return None
def __len__(self):
return len(self.filtered_label_list)
train_data_path = "path-to-my-annotation"
# Generators
train_dataset = FilteredDataset(train_data_path,get_transform(train=True))
print("Total files in the train_dataset: ",len(train_dataset))
#print("The first instance in the train dataset : ",train_dataset[0])
#training_generator = torch.utils.data.DataLoader(train_dataset)
training_generator = torch.utils.data.DataLoader(train_dataset,collate_fn=my_collate)
print("\n\n Iterator in action! ")
print("---------------------------------------------------------")
count=0
for img,target in training_generator:
#print("The img name : ",img[0])
count=count+1
print("target name : ",target)
print("count : ",count)
print("**************************************************")
However, I get the following error,
Could anyone please suggest a way to skip the images that do not contain a particular categorical label?
I am trying to implement the NER example using BERT and pytorch from the huggingface guide (https://huggingface.co/transformers/custom_datasets.html#ft-trainer). Reading in the data works fine, but when I want to start the training, I get the error
Expected input batch_size (16) to match target batch_size (4000)
When I try to change the input batch_size, the target batch_size is always the input batch_size*250. I would greatly appreciate, if someone could look over the following code and sees my mistakes.
from pathlib import Path
import re
def read_data(file_path):
file_path = Path(file_path)
raw_text = file_path.read_text().strip()
raw_docs = re.split(r'\n\t?\n', raw_text)
token_docs = []
tag_docs = []
for doc in raw_docs:
tokens = []
tags = []
for line in doc.split('\n'):
token, tag = line.split('\t')
tokens.append(token)
tags.append(tag)
token_docs.append(tokens)
tag_docs.append(tags)
return token_docs, tag_docs
train_texts, train_tags = read_data('data/train.tsv')
val_texts, val_tags = read_data('data/test.tsv')
unique_tags = set(tag for doc in tags for tag in doc)
tag2id = {tag: id for id, tag in enumerate(unique_tags)}
id2tag = {id: tag for tag, id in tag2id.items()}
from transformers import DistilBertTokenizerFast
tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-cased')
train_encodings = tokenizer(train_texts, is_split_into_words=True, return_offsets_mapping=True, padding=True, truncation=True)
val_encodings = tokenizer(val_texts, is_split_into_words=True, return_offsets_mapping=True, padding=True, truncation=True)
import numpy as np
def encode_tags(tags, encodings):
labels = [[tag2id[tag] for tag in doc] for doc in tags]
encoded_labels = []
for doc_labels, doc_offset in zip(labels, encodings.offset_mapping):
# create an empty array of -100
doc_enc_labels = np.ones(len(doc_offset),dtype=int) * -100
arr_offset = np.array(doc_offset)
# set labels whose first offset position is 0 and the second is not 0
doc_enc_labels[(arr_offset[:,0] == 0) & (arr_offset[:,1] != 0)] = doc_labels
encoded_labels.append(doc_enc_labels.tolist())
return encoded_labels
train_labels = encode_tags(train_tags, train_encodings)
val_labels = encode_tags(val_tags, val_encodings)
import torch
class TestDataset(torch.utils.data.Dataset):
def __init__(self, encodings, labels):
self.encodings = encodings
self.labels = labels
def __getitem__(self, idx):
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
item['labels'] = torch.tensor(self.labels[idx])
return item
def __len__(self):
return len(self.labels)
train_encodings.pop("offset_mapping") # we don't want to pass this to the model
val_encodings.pop("offset_mapping")
train_dataset = TestDataset(train_encodings, train_labels)
val_dataset = TestDataset(val_encodings, val_labels)
from transformers import DistilBertForTokenClassification
model = DistilBertForTokenClassification.from_pretrained('distilbert-base-cased', num_labels=len(unique_tags))
from transformers import DistilBertForSequenceClassification, Trainer, TrainingArguments
training_args = TrainingArguments(
output_dir='./results', # output directory
num_train_epochs=3, # total number of training epochs
per_device_train_batch_size=16, # batch size per device during training
per_device_eval_batch_size=64, # batch size for evaluation
warmup_steps=500, # number of warmup steps for learning rate scheduler
weight_decay=0.01, # strength of weight decay
logging_dir='./logs', # directory for storing logs
logging_steps=10,
)
model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-cased")
trainer = Trainer(
model=model, # the instantiated 🤗 Transformers model to be trained
args=training_args, # training arguments, defined above
train_dataset=train_dataset, # training dataset
eval_dataset=val_dataset # evaluation dataset
)
trainer.train()
Instead of DistilBertForSequenceClassification Model, you want to use DistilBertForTokenClassification in the last cell.
I am attempting to apply an exiting function to a TensorFlow Dataset but running into some issues with the proper way to reference a feature column. If there is just one input, the function works as expected.
import pandas as pd
import tensorflow as tf
import tensorflow_datasets as tfds
from collections import Counter
from tensorflow.keras.preprocessing.sequence import pad_sequences
text = ["I played it a while but it was alright. The steam was a bit of trouble."
" The more they move these game to steam the more of a hard time I have"
" activating and playing a game. But in spite of that it was fun, I "
"liked it. Now I am looking forward to anno 2205 I really want to "
"play my way to the moon.",
"This game is a bit hard to get the hang of, but when you do it's great."]
df = pd.DataFrame({"text": text})
dataset = (
tf.data.Dataset.from_tensor_slices(
tf.cast(df.text.values, tf.string)))
tokenizer = tfds.features.text.Tokenizer()
lowercase = True
vocabulary = Counter()
for text in dataset:
if lowercase:
text = tf.strings.lower(text)
tokens = tokenizer.tokenize(text.numpy())
vocabulary.update(tokens)
vocab_size = 5000
vocabulary, _ = zip(*vocabulary.most_common(vocab_size))
max_len = 15
max_sent = 5
encoder = tfds.features.text.TokenTextEncoder(vocabulary,
lowercase=True,
tokenizer=tokenizer)
def encode(text):
sent_list = []
sents = tf.strings.split(text, sep=". ").numpy()
if max_sent:
sents = sents[:max_sent]
for sent in sents:
text_encoded = encoder.encode(sent.decode())
if max_len:
text_encoded = text_encoded[:max_len]
sent_list.append(pad_sequences([text_encoded], max_len))
if len(sent_list) < 5:
sent_list.append([tf.zeros(max_len) for _ in range(5 - len(sent_list))])
return tf.concat(sent_list, axis=0)
def encode_pyfn(text):
[text_encoded] = tf.py_function(encode, inp=[text], Tout=[tf.int32])
return text_encoded
dataset = dataset.map(encode_pyfn).batch(batch_size=2)
next(iter(dataset))
But when I attempt to apply the same function on a single feature column resulting from a make_csv_dataset:
import pandas as pd
import tensorflow as tf
import tensorflow_datasets as tfds
from collections import Counter
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
text = ["I played it a while but it was alright. The steam was a bit of trouble."
" The more they move these game to steam the more of a hard time I have"
" activating and playing a game. But in spite of that it was fun, I "
"liked it. Now I am looking forward to anno 2205 I really want to "
"play my way to the moon.",
"This game is a bit hard to get the hang of, but when you do it's great."]
target = [0, 1]
gender = [1, 0]
age = [45, 35]
df = pd.DataFrame({"text": text,
"target": target,
"gender": gender,
"age": age})
df.to_csv('test.csv', index=False)
dataset = tf.data.experimental.make_csv_dataset(
'test.csv',
batch_size=2,
label_name='target',
num_epochs=1)
tokenizer = tfds.features.text.Tokenizer()
lowercase = True
vocabulary = Counter()
for features, _ in dataset:
text = features['text']
if lowercase:
text = tf.strings.lower(text)
for t in text:
tokens = tokenizer.tokenize(t.numpy())
vocabulary.update(tokens)
vocab_size = 5000
vocabulary, _ = zip(*vocabulary.most_common(vocab_size))
max_len = 15
max_sent = 5
encoder = tfds.features.text.TokenTextEncoder(vocabulary,
lowercase=True,
tokenizer=tokenizer)
def encode(text):
sent_list = []
sents = tf.strings.split(text, sep=". ").numpy()
if max_sent:
sents = sents[:max_sent]
for sent in sents:
text_encoded = encoder.encode(sent.decode())
if max_len:
text_encoded = text_encoded[:max_len]
sent_list.append(pad_sequences([text_encoded], max_len, padding='post'))
if len(sent_list) < 5:
sent_list.append([tf.zeros(max_len) for _ in range(5 - len(sent_list))])
return tf.concat(sent_list, axis=0)
def encode_pyfn(features, targets):
features['text'] = tf.py_function(encode, inp=features[text], Tout=[tf.int32])
return features, targets
dataset = dataset.map(encode_pyfn)
next(iter(dataset))
it raises the following:
TypeError: in user code:
<ipython-input-9-30172a796c2e>:69 encode_pyfn *
features['text'] = tf.py_function(encode, inp=features[text], Tout=[tf.int32])
/Users/username/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/framework/ops.py:823 __hash__
raise TypeError("Tensor is unhashable. "
TypeError: Tensor is unhashable. Instead, use tensor.ref() as the key.
What is the proper way to apply the function to a single feature?
In tensorflow Object Detection API we are using ssd_mobilenet_v1_coco_2017_11_17 model to detect 90 general objects. I want to use this model for detection.
Next, I have trained faster_rcnn_inception_v2_coco_2018_01_28 model to detect a custom object. I wish to use this in the same code where I will be able to detect those 90 objects as well as my new trained custom object. How to achieve this with single code?
I have achieved this by doing the following code in detect_object.py
import numpy as np
import tensorflow as tf
import sys
from PIL import Image
import cv2
from utils import label_map_util
from utils import visualization_utils as vis_util
# ------------------ Knife Model Initialization ------------------------------ #
knife_label_map = label_map_util.load_labelmap('training/labelmap.pbtxt')
knife_categories = label_map_util.convert_label_map_to_categories(
knife_label_map, max_num_classes=1, use_display_name=True)
knife_category_index = label_map_util.create_category_index(knife_categories)
knife_detection_graph = tf.Graph()
with knife_detection_graph.as_default():
knife_od_graph_def = tf.GraphDef()
with tf.gfile.GFile('inference_graph_3/frozen_inference_graph.pb', 'rb') as fid:
knife_serialized_graph = fid.read()
knife_od_graph_def.ParseFromString(knife_serialized_graph)
tf.import_graph_def(knife_od_graph_def, name='')
knife_session = tf.Session(graph=knife_detection_graph)
knife_image_tensor = knife_detection_graph.get_tensor_by_name('image_tensor:0')
knife_detection_boxes = knife_detection_graph.get_tensor_by_name(
'detection_boxes:0')
knife_detection_scores = knife_detection_graph.get_tensor_by_name(
'detection_scores:0')
knife_detection_classes = knife_detection_graph.get_tensor_by_name(
'detection_classes:0')
knife_num_detections = knife_detection_graph.get_tensor_by_name(
'num_detections:0')
# ---------------------------------------------------------------------------- #
# ------------------ General Model Initialization ---------------------------- #
general_label_map = label_map_util.load_labelmap('data/mscoco_label_map.pbtxt')
general_categories = label_map_util.convert_label_map_to_categories(
general_label_map, max_num_classes=90, use_display_name=True)
general_category_index = label_map_util.create_category_index(
general_categories)
general_detection_graph = tf.Graph()
with general_detection_graph.as_default():
general_od_graph_def = tf.GraphDef()
with tf.gfile.GFile('ssd_mobilenet_v1_coco_2017_11_17/frozen_inference_graph.pb', 'rb') as fid:
general_serialized_graph = fid.read()
general_od_graph_def.ParseFromString(general_serialized_graph)
tf.import_graph_def(general_od_graph_def, name='')
general_session = tf.Session(graph=general_detection_graph)
general_image_tensor = general_detection_graph.get_tensor_by_name(
'image_tensor:0')
general_detection_boxes = general_detection_graph.get_tensor_by_name(
'detection_boxes:0')
general_detection_scores = general_detection_graph.get_tensor_by_name(
'detection_scores:0')
general_detection_classes = general_detection_graph.get_tensor_by_name(
'detection_classes:0')
general_num_detections = general_detection_graph.get_tensor_by_name(
'num_detections:0')
# ---------------------------------------------------------------------------- #
def knife(image_path):
try:
image = cv2.imread(image_path)
image_expanded = np.expand_dims(image, axis=0)
(boxes, scores, classes, num) = knife_session.run(
[knife_detection_boxes, knife_detection_scores,
knife_detection_classes, knife_num_detections],
feed_dict={knife_image_tensor: image_expanded})
classes = np.squeeze(classes).astype(np.int32)
scores = np.squeeze(scores)
boxes = np.squeeze(boxes)
for c in range(0, len(classes)):
class_name = knife_category_index[classes[c]]['name']
if class_name == 'knife' and scores[c] > .80:
confidence = scores[c] * 100
break
else:
confidence = 0.00
except:
print("Error occurred in knife detection")
confidence = 0.0 # Some error has occurred
return confidence
def general(image_path):
try:
image = cv2.imread(image_path)
image_expanded = np.expand_dims(image, axis=0)
(boxes, scores, classes, num) = general_session.run(
[general_detection_boxes, general_detection_scores,
general_detection_classes, general_num_detections],
feed_dict={general_image_tensor: image_expanded})
classes = np.squeeze(classes).astype(np.int32)
scores = np.squeeze(scores)
boxes = np.squeeze(boxes)
object_name = []
object_score = []
for c in range(0, len(classes)):
class_name = general_category_index[classes[c]]['name']
if scores[c] > .30: # If confidence level is good enough
object_name.append(class_name)
object_score.append(str(scores[c] * 100)[:5])
except:
print("Error occurred in general detection")
object_name = ['']
object_score = ['']
return object_name, object_score
if __name__ == '__main__':
print(' in main')
I can do
import detect_object
detect_object.knife("image.jpg") # to detect whether knife is present in image(this is custom trained model)
detect_object.general("image.jpg") # to detect those 90 objects from TF API
I know there is knife model in TF API but it is not that much accurate so I retrained it for only knife. Finally I have two models
1. First model is to detect only knife,
2. Second model is to detect general object as usual
You cant combine both the models. Have two sections of code which will load one model at a time and identify whatever it can see in the image.
Other option is to re-train a single model that can identify all objects you are interested in
I have successfully trained a Keras model on a 13" Macbook Pro with Theano, albeit at a slow speed, but when I train the exact same model with the same data on a more powerful computer (32 GB RAM, 8 GB Nvidia Quadro GPU, 8 CPU cores) with TensorFlow on Ubuntu, the following error occurs:
Here is the script that I use:
from keras import backend as K
from keras.callbacks import Callback
from keras.constraints import maxnorm
from keras.models import Sequential, load_model
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Flatten
from keras.layers.convolutional import Convolution3D
from keras.layers.convolutional import MaxPooling3D
from keras.optimizers import Nadam
from keras.preprocessing.image import random_rotation, random_shift, random_shear, random_zoom
from keras.utils import np_utils
from keras.utils.io_utils import HDF5Matrix
from pprint import pprint
from random import shuffle
from sklearn.utils import shuffle
K.set_image_dim_ordering("th")
import cv2
import h5py
import json
import os
import sys
import numpy as np
class OpticalSpeechRecognizer(object):
def __init__(self, rows, columns, frames_per_sequence, samples_generated_per_sample, config_file, training_save_fn, osr_save_fn):
self.rows = rows
self.columns = columns
self.frames_per_sequence = frames_per_sequence
self.samples_generated_per_sample = samples_generated_per_sample
self.config_file = config_file
self.training_save_fn = training_save_fn
self.osr_save_fn = osr_save_fn
self.osr = None
def save_osr_model(self):
""" Save the OSR model to an HDF5 file
"""
# delete file if it already exists
try:
print "Saved file \"{0}\" already exists! Overwriting previous saved file.\n".format(self.osr_save_fn)
os.remove(self.osr_save_fn)
except OSError:
pass
print "Saving OSR model to \"{0}\"".format(self.osr_save_fn)
self.osr.save(self.osr_save_fn)
def load_osr_model(self):
""" Load the OSR model from an HDF5 file
"""
print "Loading OSR model from \"{0}\"".format(self.osr_save_fn)
self.osr = load_model(self.osr_save_fn)
def train_osr_model(self):
""" Train the optical speech recognizer
"""
print "\nTraining OSR"
validation_ratio = 0.3
batch_size = 25
training_sequence_generator = self.generate_training_sequences(batch_size=batch_size)
validation_sequence_generator = self.generate_training_sequences(batch_size=batch_size, validation_ratio=validation_ratio)
with h5py.File(self.training_save_fn, "r") as training_save_file:
sample_count = training_save_file.attrs["sample_count"]
pbi = ProgressDisplay()
self.osr.fit_generator(generator=training_sequence_generator,
validation_data=validation_sequence_generator,
samples_per_epoch=sample_count,
nb_val_samples=int(round(validation_ratio*sample_count)),
nb_epoch=10,
max_q_size=1,
verbose=2,
callbacks=[pbi],
class_weight=None,
nb_worker=1)
def generate_training_sequences(self, batch_size, validation_ratio=0):
""" Generates training sequences from HDF5 file on demand
"""
while True:
with h5py.File(self.training_save_fn, "r") as training_save_file:
sample_count = int(training_save_file.attrs["sample_count"])
sample_idxs = range(0, sample_count)
shuffle(sample_idxs)
training_sample_idxs = sample_idxs[0:int((1-validation_ratio)*sample_count)]
validation_sample_idxs = sample_idxs[int((1-validation_ratio)*sample_count):]
# generate sequences for validation
if validation_ratio:
validation_sample_count = len(validation_sample_idxs)
batches = int(validation_sample_count/batch_size)
remainder_samples = validation_sample_count%batch_size
# generate batches of samples
for idx in xrange(0, batches):
X = training_save_file["X"][validation_sample_idxs[idx*batch_size:idx*batch_size+batch_size]]
Y = training_save_file["Y"][validation_sample_idxs[idx*batch_size:idx*batch_size+batch_size]]
yield (X, Y)
# send remainder samples as one batch, if there are any
if remainder_samples:
X = training_save_file["X"][validation_sample_idxs[-remainder_samples:]]
Y = training_save_file["Y"][validation_sample_idxs[-remainder_samples:]]
yield (X, Y)
# generate sequences for training
else:
training_sample_count = len(training_sample_idxs)
batches = int(training_sample_count/batch_size)
remainder_samples = training_sample_count%batch_size
# generate batches of samples
for idx in xrange(0, batches):
X = training_save_file["X"][training_sample_idxs[idx*batch_size:idx*batch_size+batch_size]]
Y = training_save_file["Y"][training_sample_idxs[idx*batch_size:idx*batch_size+batch_size]]
yield (X, Y)
# send remainder samples as one batch, if there are any
if remainder_samples:
X = training_save_file["X"][training_sample_idxs[-remainder_samples:]]
Y = training_save_file["Y"][training_sample_idxs[-remainder_samples:]]
yield (X, Y)
def print_osr_summary(self):
""" Prints a summary representation of the OSR model
"""
print "\n*** MODEL SUMMARY ***"
self.osr.summary()
def generate_osr_model(self):
""" Builds the optical speech recognizer model
"""
print "".join(["\nGenerating OSR model\n",
"-"*40])
with h5py.File(self.training_save_fn, "r") as training_save_file:
class_count = len(training_save_file.attrs["training_classes"].split(","))
osr = Sequential()
print " - Adding convolution layers"
osr.add(Convolution3D(nb_filter=32,
kernel_dim1=3,
kernel_dim2=3,
kernel_dim3=3,
border_mode="same",
input_shape=(1, self.frames_per_sequence, self.rows, self.columns),
activation="relu"))
osr.add(MaxPooling3D(pool_size=(3, 3, 3)))
osr.add(Convolution3D(nb_filter=64,
kernel_dim1=3,
kernel_dim2=3,
kernel_dim3=3,
border_mode="same",
activation="relu"))
osr.add(MaxPooling3D(pool_size=(3, 3, 3)))
osr.add(Convolution3D(nb_filter=128,
kernel_dim1=3,
kernel_dim2=3,
kernel_dim3=3,
border_mode="same",
activation="relu"))
osr.add(MaxPooling3D(pool_size=(3, 3, 3)))
osr.add(Dropout(0.2))
osr.add(Flatten())
print " - Adding fully connected layers"
osr.add(Dense(output_dim=128,
init="normal",
activation="relu"))
osr.add(Dense(output_dim=128,
init="normal",
activation="relu"))
osr.add(Dense(output_dim=128,
init="normal",
activation="relu"))
osr.add(Dropout(0.2))
osr.add(Dense(output_dim=class_count,
init="normal",
activation="softmax"))
print " - Compiling model"
optimizer = Nadam(lr=0.002,
beta_1=0.9,
beta_2=0.999,
epsilon=1e-08,
schedule_decay=0.004)
osr.compile(loss="categorical_crossentropy",
optimizer=optimizer,
metrics=["categorical_accuracy"])
self.osr = osr
print " * OSR MODEL GENERATED * "
def process_training_data(self):
""" Preprocesses training data and saves them into an HDF5 file
"""
# load training metadata from config file
training_metadata = {}
training_classes = []
with open(self.config_file) as training_config:
training_metadata = json.load(training_config)
training_classes = sorted(list(training_metadata.keys()))
print "".join(["\n",
"Found {0} training classes!\n".format(len(training_classes)),
"-"*40])
for class_label, training_class in enumerate(training_classes):
print "{0:<4d} {1:<10s} {2:<30s}".format(class_label, training_class, training_metadata[training_class])
print ""
# count number of samples
sample_count = 0
sample_count_by_class = [0]*len(training_classes)
for class_label, training_class in enumerate(training_classes):
# get training class sequeunce paths
training_class_data_path = training_metadata[training_class]
training_class_sequence_paths = [os.path.join(training_class_data_path, file_name)
for file_name in os.listdir(training_class_data_path)
if (os.path.isfile(os.path.join(training_class_data_path, file_name))
and ".mov" in file_name)]
# update sample count
sample_count += len(training_class_sequence_paths)
sample_count_by_class[class_label] = len(training_class_sequence_paths)
print "".join(["\n",
"Found {0} training samples!\n".format(sample_count),
"-"*40])
for class_label, training_class in enumerate(training_classes):
print "{0:<4d} {1:<10s} {2:<6d}".format(class_label, training_class, sample_count_by_class[class_label])
print ""
# initialize HDF5 save file, but clear older duplicate first if it exists
try:
print "Saved file \"{0}\" already exists! Overwriting previous saved file.\n".format(self.training_save_fn)
os.remove(self.training_save_fn)
except OSError:
pass
# process and save training data into HDF5 file
print "Generating {0} samples from {1} samples via data augmentation\n".format(sample_count*self.samples_generated_per_sample,
sample_count)
sample_count = sample_count*self.samples_generated_per_sample
with h5py.File(self.training_save_fn, "w") as training_save_file:
training_save_file.attrs["training_classes"] = np.string_(",".join(training_classes))
training_save_file.attrs["sample_count"] = sample_count
x_training_dataset = training_save_file.create_dataset("X",
shape=(sample_count, 1, self.frames_per_sequence, self.rows, self.columns),
dtype="f")
y_training_dataset = training_save_file.create_dataset("Y",
shape=(sample_count, len(training_classes)),
dtype="i")
# iterate through each class data
sample_idx = 0
for class_label, training_class in enumerate(training_classes):
# get training class sequeunce paths
training_class_data_path = training_metadata[training_class]
training_class_sequence_paths = [os.path.join(training_class_data_path, file_name)
for file_name in os.listdir(training_class_data_path)
if (os.path.isfile(os.path.join(training_class_data_path, file_name))
and ".mov" in file_name)]
# iterate through each sequence
for idx, training_class_sequence_path in enumerate(training_class_sequence_paths):
sys.stdout.write("Processing training data for class \"{0}\": {1}/{2} sequences\r"
.format(training_class, idx+1, len(training_class_sequence_paths)))
sys.stdout.flush()
# accumulate samples and labels
samples_batch = self.process_frames(training_class_sequence_path)
label = [0]*len(training_classes)
label[class_label] = 1
for sample in samples_batch:
x_training_dataset[sample_idx] = sample
y_training_dataset[sample_idx] = label
# update sample index
sample_idx += 1
print "\n"
training_save_file.close()
print "Training data processed and saved to {0}".format(self.training_save_fn)
def process_frames(self, video_file_path):
""" Preprocesses sequence frames
"""
# haar cascades for localizing oral region
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
mouth_cascade = cv2.CascadeClassifier('haarcascade_mcs_mouth.xml')
video = cv2.VideoCapture(video_file_path)
success, frame = video.read()
frames = []
success = True
# convert to grayscale, localize oral region, equalize frame dimensions, and accumulate valid frames
while success:
success, frame = video.read()
if success:
# convert to grayscale
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# localize single facial region
faces_coords = face_cascade.detectMultiScale(frame, 1.3, 5)
if len(faces_coords) == 1:
face_x, face_y, face_w, face_h = faces_coords[0]
frame = frame[face_y:face_y + face_h, face_x:face_x + face_w]
# localize oral region
mouth_coords = mouth_cascade.detectMultiScale(frame, 1.3, 5)
threshold = 0
for (mouth_x, mouth_y, mouth_w, mouth_h) in mouth_coords:
if (mouth_y > threshold):
threshold = mouth_y
valid_mouth_coords = (mouth_x, mouth_y, mouth_w, mouth_h)
else:
pass
mouth_x, mouth_y, mouth_w, mouth_h = valid_mouth_coords
frame = frame[mouth_y:mouth_y + mouth_h, mouth_x:mouth_x + mouth_w]
# equalize frame dimensions
frame = cv2.resize(frame, (self.columns, self.rows)).astype('float32')
# accumulate frames
frames.append(frame)
# ignore multiple facial region detections
else:
pass
# equalize sequence lengths
if len(frames) < self.frames_per_sequence:
frames = [frames[0]]*(self.frames_per_sequence - len(frames)) + frames
frames = np.asarray(frames[0:self.frames_per_sequence])
# pixel normalizer
pix_norm = lambda frame: frame / 255.0
samples_batch = [[map(pix_norm, frames)]]
# random transformations for data augmentation
for _ in xrange(0, self.samples_generated_per_sample-1):
rotated_frames = random_rotation(frames, rg=45)
shifted_frames = random_shift(rotated_frames, wrg=0.25, hrg=0.25)
sheared_frames = random_shear(shifted_frames, intensity=0.79)
zoomed_frames = random_zoom(sheared_frames, zoom_range=(1.25, 1.25))
samples_batch.append([map(pix_norm, zoomed_frames)])
return samples_batch
class ProgressDisplay(Callback):
""" Progress display callback
"""
def on_batch_end(self, epoch, logs={}):
print " Batch {0:<4d} => Accuracy: {1:>8.4f} | Loss: {2:>8.4f} | Size: {3:>4d}".format(int(logs["batch"])+1,
float(logs["categorical_accuracy"]),
float(logs["loss"]),
int(logs["size"]))
if __name__ == "__main__":
# Example usage
osr = OpticalSpeechRecognizer(rows=100,
columns=150,
frames_per_sequence=45,
samples_generated_per_sample=10,
config_file="training_config.json",
training_save_fn="training_data.h5",
osr_save_fn="osr_model.h5")
osr.process_training_data()
osr.generate_osr_model()
osr.print_osr_summary()
osr.train_osr_model()
osr.save_osr_model()
osr.load_osr_model()
As of this writing, the problem seems to be a TensorFlow issue. For me, the solution was to switch the backend to Theano. To switch the Keras backend, perform the following steps:
Find the Keras configuration file
~/.keras/keras.json
Change the value of the backend field and, optionally, the ordering field
{
"image_dim_ordering": "th",
"epsilon": 1e-07,
"floatx": "float32",
"backend": "theano"
}