Infinite loop When I import sentence_transformers in FastAPI - python

I try to serve STS model by FastAPI framework, but when I import 'from sentence_transformers import SentenceTransformer', It infinitly loops. I want to get pred in content.py and post it to 'predicts/' in main.py.
# main.py
from fastapi import FastAPI
from fastapi import File
import torch
from pydantic import BaseModel
from content import predict_model
app = FastAPI()
class Item(BaseModel):
sentence_1: str
sentence_2: str
#app.post("/predicts")
async def predict(item:Item):
predict_model()
return {}
# content.py
import torch
from sentence_transformers import SentenceTransformer
def cosine_similarity_manual(x, y, small_number=1e-8):
result = torch.dot(x, y) / (torch.linalg.norm(x) * torch.linalg.norm(y) + small_number)
return result
def predict_model():
sent1 = '무엇보다도 호스트분들이 너무 친절하셨습니다.'
sent2 = '무엇보다도, 호스트들은 매우 친절했습니다.'
predict = 0
texts = [sent1, sent2]
model_path = "training_sts-Huffon-sentence-klue-roberta-base"
model = SentenceTransformer(model_path)
corpus_embeddings = model.encode(texts[0], convert_to_tensor=True)
query_embeddings = model.encode(texts[1], convert_to_tensor=True)
print(corpus_embeddings.shape)
print(query_embeddings.shape)
score = cosine_similarity_manual(corpus_embeddings,query_embeddings)
print(score)
if score >= 0.6:
pred = 1
else:
pred = 0
print(pred)```

Related

Udacity Self Driving Car Simulator

I am working on Udacity's self-driving car simulator. I am facing a problem in this when I run the drive.py file with my model as argument model.h5 nothing happens in the simulator.
The model has been trained completely without any errors but still, there is no response from the simulator.
Here is the drive.py python code and a link to the video to show what is actually happening
drive.py
import argparse
import base64
from datetime import datetime
import os
import shutil
import numpy as np
import socketio
import eventlet
import eventlet.wsgi
from PIL import Image
from flask import Flask
from io import BytesIO
from keras.models import load_model
import h5py
from keras import __version__ as keras_version
sio = socketio.Server()
app = Flask(__name__)
model = None
prev_image_array = None
class SimplePIController:
def __init__(self, Kp, Ki):
self.Kp = Kp
self.Ki = Ki
self.set_point = 0.
self.error = 0.
self.integral = 0.
def set_desired(self, desired):
self.set_point = desired
def update(self, measurement):
# proportional error
self.error = self.set_point - measurement
# integral error
self.integral += self.error
return self.Kp * self.error + self.Ki * self.integral
controller = SimplePIController(0.1, 0.002)
set_speed = 30
controller.set_desired(set_speed)
def crop_image(img, img_height=75, img_width=200):
height = img.shape[0]
width = img.shape[1]
y_start = 60
#x_start = int(width/2)-int(img_width/2)
return img[y_start:y_start+img_height, 0:width ]#x_start:x_start+img_width]
#sio.on('telemetry')
def telemetry(sid, data):
if data:
# The current steering angle of the car
steering_angle = data["steering_angle"]
# The current throttle of the car
throttle = data["throttle"]
# The current speed of the car
speed = data["speed"]
# The current image from the center camera of the car
imgString = data["image"]
image = Image.open(BytesIO(base64.b64decode(imgString)))
image_array = np.asarray(image)
image_array = crop_image(image_array)
steering_angle = float(model.predict(image_array[None, :, :, :], batch_size=1))
throttle = controller.update(float(speed))
print(steering_angle, throttle)
send_control(steering_angle, throttle)
# save frame
if args.image_folder != '':
timestamp = datetime.utcnow().strftime('%Y_%m_%d_%H_%M_%S_%f')[:-3]
image_filename = os.path.join(args.image_folder, timestamp)
image.save('{}.jpg'.format(image_filename))
else:
# NOTE: DON'T EDIT THIS.
sio.emit('manual', data={}, skip_sid=True)
#sio.on('connect')
def connect(sid, environ):
print("connect ", sid)
send_control(0, 0)
def send_control(steering_angle, throttle):
sio.emit(
"steer",
data={
'steering_angle': steering_angle.__str__(),
'throttle': throttle.__str__()
},
skip_sid=True)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Remote Driving')
parser.add_argument(
'model',
type=str,
help='Path to model h5 file. Model should be on the same path.'
)
parser.add_argument(
'image_folder',
type=str,
nargs='?',
default='',
help='Path to image folder. This is where the images from the run will be saved.'
)
args = parser.parse_args()
# check that model Keras version is same as local Keras version
f = h5py.File(args.model, mode='r')
model_version = f.attrs.get('keras_version')
keras_version = str(keras_version).encode('utf8')
if model_version != keras_version:
print('You are using Keras version ', keras_version,
', but the model was built using ', model_version)
model = load_model(args.model)
if args.image_folder != '':
print("Creating image folder at {}".format(args.image_folder))
if not os.path.exists(args.image_folder):
os.makedirs(args.image_folder)
else:
shutil.rmtree(args.image_folder)
os.makedirs(args.image_folder)
print("RECORDING THIS RUN ...")
else:
print("NOT RECORDING THIS RUN ...")
# wrap Flask application with engineio's middleware
app = socketio.Middleware(sio, app)
# deploy as an eventlet WSGI server
eventlet.wsgi.server(eventlet.listen(('', 4567)), app)
problem video link
https://youtu.be/nP8WH8pM29Q
This is due to the socketio version. Use 4.2.1, that should fix your problem

locust ignoring extended sub classes and instantiates base classes

i want to write base http user class and base load test shape and then extend them in sub classes but locust doesn't under stands extended classes and instatiate bas classes
these are base classes
helpers.py:
from locust.contrib.fasthttp import FastHttpUser
import string
from locust import LoadTestShape, constant_pacing
from dotenv import load_dotenv
import os
load_dotenv()
# init parameters
host_address = "127.0.0.1"
class BaseHttpUser(FastHttpUser):
host = host_address
wait_time = constant_pacing(5)
chars = string.ascii_uppercase + string.ascii_lowercase + string.digits
start_time = 0
class BaseRps(LoadTestShape):
time_limit = 600
user_spawn = {1: (1500, 10)}
def tick(self):
step = len(self.user_spawn.keys())
run_time = self.get_run_time()
print(step, )
for idx in range(1, step+1):
print(run_time , idx , self.time_limit)
if run_time < idx * self.time_limit / step:
print("here", self.user_spawn.get(idx))
return self.user_spawn.get(idx)
return None
and this is the file that I run
minio.py
from locust import task
from helpers import BaseHttpUser, BaseRps
import os
host_address = "127.0.0.1"
test_name = "minio"
log_file_path = 'log.log'
base_url = os.getenv("MINIO_URL")
class HttpUser(BaseHttpUser):
host = host_address
base_url = base_url
#task
def download(self):
self.client.get(f'{self.base_url}/magnix-server-media/ads-images/ff.png', name='download')
class Rps(BaseRps):
user_spawn = {1: (10000, 100)}
Base User classes need an attribute abstract = True to not be instantiated. https://docs.locust.io/en/stable/api.html#locust.User.abstract
I dont think you can do the same with load shape classes, but you can use class attributes (which you can manipulate in your locustfile after importing it)
Like removing Rps class and instead just doing
BaseRps.user_spawn = {1: (10000, 100)}

how do i reduce the loading time of a pre-trained model?

While loading the weights of "Imagenet" using ResNet50 it nearly takes 10-11sec each time while loading the weights.
Is there any way to reduces the loading time ?
Code:
from flask import Flask, render_template, request
from werkzeug import secure_filename
from flask import request,Flask
import json
import os
import time
from keras.preprocessing import image as image_util
from keras.applications.imagenet_utils import preprocess_input
from keras.applications.imagenet_utils import decode_predictions
# from keras.applications import ResNet50
from keras.applications.inception_v3 import InceptionV3
import numpy as np
app = Flask(__name__)
#app.route('/object_rec', methods=['POST'])
def object_rec():
f = request.files['file']
file_path = ("./upload/"+secure_filename(f.filename))
f.save(file_path)
image = image_util.load_img(file_path,target_size=(299,299))
image = image_util.img_to_array(image)
image = np.expand_dims(image,axis=0) #(224,224,3) --> (1,224,224,3)
image = preprocess_input(image)
start_time = time.time()
model = InceptionV3(weights="imagenet")
pred = model.predict(image)
p = decode_predictions(pred)
ans = p[0][0]
acc = ans[2]
acc = str(acc)
if ans[1] == "Granny_Smith":
ans = ans[1]
ans = 'Apple'
else:
ans = ans[1]
print("THE PREDICTED IMAGE IS: "+ans)
print("THE ACCURACY IS: "+acc)
print("--- %s seconds ---" % (time.time() - start_time))
result = {
"status": True,
"object": ans,
"score":acc
}
result = json.dumps(result)
return result
if __name__ == '__main__':
app.run(host='0.0.0.0',port=6000,debug=True)
time taken would differ between 8-11 sec.
I would be good if it loads the model in 3-4sec and does classification.
Thanks in advance
The way you can do it, is to load the model in a specific session and then every time you want to use the model just set that specific session, then just call predict where you need it:
app = Flask(__name__)
sess = tf.Session(config=tf_config)
graph = tf.get_default_graph()
# IMPORTANT: models have to be loaded AFTER SETTING THE SESSION for keras!
# Otherwise, their weights will be unavailable in the threads after the
session there has been set
set_session(sess)
model = InceptionV3(weights="imagenet")
#app.route('/object_rec', methods=['POST'])
def object_rec():
global sess
global graph
with graph.as_default():
set_session(sess)
model.predict(...)
if __name__ == '__main__':
app.run(host='0.0.0.0',port=6000,debug=True)

How to handle Multi Label DataSet from Directory for image captioning in PyTorch

I need a help in PyTorch,
Regarding Dataloader, and dataset
Can someone aid/guide me
Here is my query :
I am trying for Image Captioning using https://github.com/yunjey/pytorch-tutorial/tree/master/tutorials/03-advanced/image_captioning.
Here they have used Standard COCO Dataset.
I have dataset as images/ and captions/ directory .
Example
Directory Structure:
images/T001.jpg
images/T002.jpg
...
...
captions/T001.txt
captions/T002.txt
....
....
The above is the relation. Caption file has 'n' number of captions in each separate line.
I am able to create a custom Dataset class, in that the complete caption file content is being returned. But I want only one line alone gas to be returned.
Any guidance/suggestion on how to achieving this.
++++++++++++++++++++++++++++++++++++++++++++++++
Here is the class that i have designed:
from __future__ import print_function
import torch
from torchvision import datasets, models, transforms
from torchvision import transforms
from torch.autograd import Variable
from torch.nn.utils.rnn import pack_padded_sequence
import torch.optim as optim
import torch.nn as nn
#from torch import np
import numpy as np
import utils_c
from data_loader_c import get_cust_data_loader
from models import CNN, RNN
from vocab_custom import Vocabulary, load_vocab
import os
class ImageCaptionDataSet(data.Dataset):
def __init__(self, path, json, vocab=None, transform=None):
self.vocab = vocab
self.transform = transform
self.img_dir_path = path
self.cap_dir_path = json
self.all_imgs_path = glob.glob(os.path.join(self.img_dir_path,'*.jpg'))
self.all_caps_path = glob.glob(os.path.join(self.cap_dir_path,'*.txt'))
pass
def __getitem__(self,index):
vocab = self.vocab
img_path = self.all_imgs_path[index]
img_base_name = os.path.basename(img_path)
cap_base_name = img_base_name.replace(".jpg",".txt")
cap_path = os.path.join(self.cap_dir_path,cap_base_name)
caption_all_for_a_image = open(cap_path).read().split("\n")
image = Image.open(img_path)
image = image.convert('RGB')
if self.transform != None:
# apply image preprocessing
image = self.transform(image)
#captions_combined = []
#max_len = 0
#for caption in caption_all_for_a_image:
# caption_str = str(caption).lower()
# tokens = nltk.tokenize.word_tokenize(caption_str)
# m = len(tokens) + 2
# if m>max_len:
# max_len = m
# caption = torch.Tensor([vocab(vocab.start_token())] +
# [vocab(token) for token in tokens] +
# [vocab(vocab.end_token())])
# captions_combined.append(caption)
# #yield image, caption
#return image,torch.Tensor(captions_combined)
caption_str = str(caption_all_for_a_image).lower()
tokens = nltk.tokenize.word_tokenize(caption_str)
caption = torch.Tensor([vocab(vocab.start_token())] +
[vocab(token) for token in tokens] +
[vocab(vocab.end_token())])
return image,caption
def __len__(self):
return len(self.all_imgs_path)
+++++++++++++++++++++++++++++++++
First, using str() to convert the list of captions into a single string (caption_str = str(caption_all_for_a_image)) is a bad idea:
cap = ['a sentence', 'bla bla bla']
str(cap)
Returns this sting:
"['a sentence', 'bla bla bla']"
Note that [', and ', ' are part of the resulting string!
You can pick one of the captions at random:
import random
...
cap_idx = random.randi(0, len(caption_all_for_a_image)-1) # pick one at random
caption_str = caption_all_for_a_image[cap_idx].lower() # actual selection

Swift Client with DecisionTreeRegressor

I am working with bluemix object storage container, What i want to do that i want to store my "RandomForestRegressor" into a pkl file with joblib. But when i run the code with the Swift client i receives the error.
TypeError: object of type 'DecisionTreeRegressor' has no len()
Here is my code please help.
import os
from flask import Flask,render_template, request,json
from flask.ext.cors import CORS
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
import random
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error
import os
from sklearn.externals import joblib
import pickle
import sys
import json
import csv
import swiftclient
app = Flask(__name__)
CORS(app)
cloudant_service = json.loads(os.environ['VCAP_SERVICES'])['Object-Storage'][0]
objectstorage_creds = cloudant_service['credentials']
if objectstorage_creds:
auth_url = objectstorage_creds['auth_url'] + '/v3' #authorization URL
password = objectstorage_creds['password'] #password
project_id = objectstorage_creds['projectId'] #project id
user_id = objectstorage_creds['userId'] #user id
region_name = objectstorage_creds['region'] #region name
def predict_joblib():
conn = swiftclient.Connection(key=password,
authurl=auth_url,
auth_version='3',
os_options={"project_id": project_id,
"user_id": user_id,
"region_name": region_name})
container_name = 'my-container'
# File name for testing
file_name = 'example_file.txt'
# Create a new container
conn.put_container(container_name)
print ("nContainer %s created successfully." % container_name)
# List your containers
print ("nContainer List:")
for container in conn.get_account()[1]:
print (container['name'])
# List objects in a container, and prints out each object name, the file size, and last modified date
print ("nObject List:")
for container in conn.get_account()[1]:
for data in conn.get_container(container['name'])[1]:
print ('object: {0}t size: {1}t date: {2}'.format(data['name'], data['bytes'], data['last_modified']))
print ("-----------LEARN-----------\n")
with open('training_set.json') as json_data:
df_train= pd.read_json(json_data)
train_X = df_train.drop('Price', 1)
train_y = df_train['Price']
print ("Training...")
rfreg = RandomForestRegressor(n_estimators=100, n_jobs=-1)
rfreg.fit(train_X, train_y)
print("\nPerformance on training set:")
print('R^2: %f' % rfreg.score(train_X, train_y))
# print('MSE: %f' % mean_squared_error(rfreg.predict(train_X), train_y))
# print('ABS: %f' % mean_absolute_error(rfreg.predict(train_X), train_y))
importances = rfreg.feature_importances_
std = np.std([tree.feature_importances_ for tree in rfreg.estimators_], axis=0)
indices = np.argsort(importances)[::-1]
# Print the feature ranking
print("\nFeature ranking:")
for f in range(len(importances)):
print("%d. feature %d %s (%f)" % (f + 1, indices[f], df_train.columns[indices[f]], importances[indices[f]]))
# SERIALIZE MODEL USING joblib
print ("Serializing models using joblib...")
conn.put_object(container_name,'v3.pkl', contents= rfreg)
print ("Serializing vectorizers using joblib...")
for feature in ['Fluorescence', 'Culet']:
conn.put_object(container_name,feature+'_v3.pkl', contents= vectorizers[feature])
return rfreg, vectorizers
#app.route('/')
def hello():
predict_joblib()
return 'Welcome to Python Flask!'
#app.route('/signUp')
def signUp():
return 'signUp'
port = os.getenv('PORT', '5000')
if __name__ == "__main__":
app.debug = True
app.run(host='0.0.0.0', port=int(port))

Categories