Error when using a custom dataset with fastai - python

I am getting an error when trying to use my custom fastai dataset
The error:
Exception: Can't infer the type of your targets.
It's either because your data source is empty or because your labeling function raised an error.
The code:
from fastai import *
from fastai.vision import *
class URL:
MURDERHORNETS = f"https://superdata.quinniboi10.repl.co/MurderHornetImages"
path = untar_data(URL.MURDERHORNETS)
'''
path = untar_data(URLs.PETS)
files = get_image_files(path)
import PIL
img = PIL.Image.open(files[0])
img
'''
fnames = get_image_files(path)
fnames[:5]
np.random.seed (2)
pat = r'/([^/]+)_\d+\.(png|jpg|jpeg)$'
data = ImageDataBunch.from_folder(path, train=path, test=None, valid_pct=0.2,
ds_tfms=get_transforms(),
size=160)
data.normalize (imagenet_stats)
data.show_batch(rows=3, figsize=(7,6))
print (data.classes)
len (data.classes),data.c
learn = cnn_learner(data, models.resnet50, metrics=error_rate)
learn.fit_one_cycle(5)
learn.save ('stage-1')
The dataset is here, don't comment on the name, I don't know why that is what I chose :/
Get the zip file of the dataset here

Related

How to use from_tensor_slices properly on MRI images?

I'm working with MRI images and I'd like to use from_tensor_slices to preprocess the paths but I don't know how to use that properly. Below are my code, the problem message and link for the dataset.
First I rearrange my data. 484 images and 484 labels
image_data_path = './drive/MyDrive/Brain Tumour/Task01_BrainTumour/imagesTr/'
label_data_path = './drive/MyDrive/Brain Tumour/Task01_BrainTumour/labelsTr/'
image_paths = [image_data_path + name
for name in os.listdir(image_data_path)
if not name.startswith(".")]
label_paths = [label_data_path + name
for name in os.listdir(label_data_path)
if not name.startswith(".")]
image_paths = sorted(image_paths)
label_paths = sorted(label_paths)
Then, the function to load 1 example (I use nibabel to load nii files)
def load_one_sample(image_path, label_path):
image = nib.load(image_path).get_fdata()
image = tf.convert_to_tensor(image, dtype = 'float32')
label = nib.load(label_path).get_fdata()
label = tf.convert_to_tensor(label, dtype = 'uint8')
return image, label
Next, I tried using from_tensor_slices
image_filenames = tf.constant(image_paths)
label_filenames = tf.constant(label_paths)
dataset = tf.data.Dataset.from_tensor_slices((image_filenames, label_filenames))
all_data = dataset.map(load_one_sample)
And the error comes: TypeError: stat: path should be string, bytes, os.PathLike or integer, not Tensor
What can be wrong and how can I fix it?
Datalink: https://drive.google.com/drive/folders/1HqEgzS8BV2c7xYNrZdEAnrHk7osJJ--2 (task 1 - Brain Tumour)
Please tell me if you need more information.
nib.load is not a TensorFlow function.
If you want to use anything in tf.data pipeline that is not a TensorFlow function then you have to wrap it using a tf.py_function.
Code:
image_data_path = 'Task01_BrainTumour/imagesTr/'
label_data_path = 'Task01_BrainTumour/labelsTr/'
image_paths = [image_data_path + name
for name in os.listdir(image_data_path)
if not name.startswith(".")]
label_paths = [label_data_path + name
for name in os.listdir(label_data_path)
if not name.startswith(".")]
image_paths = sorted(image_paths)
label_paths = sorted(label_paths)
def load_one_sample(image_path, label_path):
image = nib.load(image_path.numpy().decode()).get_fdata()
image = tf.convert_to_tensor(image, dtype = 'float32')
label = nib.load(label_path.numpy().decode()).get_fdata()
label = tf.convert_to_tensor(label, dtype = 'uint8')
return image, label
def wrapper_load(img_path, label_path):
img, label = tf.py_function(func = load_one_sample, inp = [img_path, label_path], Tout = [tf.float32, tf.uint8])
return img, label
dataset = tf.data.Dataset.from_tensor_slices((image_paths, label_paths)).map(wrapper_load)
The error is not due to the from_tensor_slices function but arises as nibs.load is expecting a string but gets a tensor.
However, a better way would be to create tfrecords and use them to train the model.

Python: PyCUDA ERROR: The context stack was not empty upon module cleanup

I have created a Streamlit App to as a demo of a project on Multilingual Text Classification using mBERT in PyTorch. When I run the app with the command python app.py it works fine but when I try to use Streamlit with the command streamlit run app.py it throws a PyCUDA Error.
Following is the code present in app.py:
import torch
from typing import Text
import streamlit as st
import pandas as pd
from textblob import TextBlob
from inference.inference_onnx import run_onnx_inference
from inference.inference_tensorRT import run_trt_inference
from googletrans import Translator
st.title("LinClass: Multilingual Text Classifier")
input_text = st.text_input('Text:')
####################
# Google Translate API
####################
translator = Translator()
input_text = translator.translate(
input_text,
dest= "en"
)
input_text = input_text.text
####################
#Select Precision and Inference Method
####################
df = pd.DataFrame()
df["lang"] = ["en"]
precision = st.sidebar.selectbox("Select Precision:",
("16 Bit", "32 Bit")
)
inference = st.sidebar.selectbox("Inference Method:",
("ONNX", "TensorRT")
)
if st.button('Show Selected Configuration'):
st.subheader("Selected Configuration:")
st.write("Precision: ", precision)
st.write("Inference: ", inference)
st.subheader("Results")
def result(x):
"""
Function to classify the comment toxicity based on the probability and given threshold
params: x(float) - Probability of Toxicity
"""
if x >= 0.4:
st.write("Toxic")
else:
st.write("Non Toxic")
####################
# Implement Selected Configuration
####################
if precision=="16 Bit":
if inference=="ONNX":
df["comment_text"] = [input_text]
predictions = run_onnx_inference(
onnx_model_path = "/workspace/data/multilingual-text-classifier/output models/mBERT_lightning_fp16_2GPU.onnx",
stage="inference",
df_test = df
)
predictions = torch.sigmoid(torch.tensor(predictions))
st.write(input_text)
st.write(predictions)
result(predictions)
if inference=="TensorRT":
df["content"] = [input_text]
predictions = run_trt_inference(
trt_model_path = "/workspace/data/multilingual-text-classifier/output models/mBERT_lightning_fp16_bs16.engine",
stage="inference",
df_test = df
)
predictions = predictions.astype("float32")
predictions = torch.sigmoid(torch.tensor(predictions))
st.write(input_text)
st.write(predictions)
result(predictions)
if precision=="32 Bit":
if inference=="ONNX":
df["comment_text"] = [input_text]
predictions = run_onnx_inference(
onnx_model_path = "/workspace/data/multilingual-text-classifier/output models/mBERT_fp32.onnx",
stage="inference",
df_test = df
)
predictions = torch.sigmoid(torch.tensor(predictions))
st.write(input_text)
st.write(predictions)
result(predictions)
if inference=="TensorRT":
df["content"] = [input_text]
predictions = run_trt_inference(
trt_model_path = "/workspace/data/multilingual-text-classifier/output models/mBERT_fp32.engine",
stage="inference",
df_test = df
)
predictions = predictions.astype("float32")
predictions = torch.sigmoid(torch.tensor(predictions))
st.write(input_text)
st.write(predictions)
result(predictions)
####################
# Take Feedback
####################
st.subheader("Feedback:")
feedback = st.radio(
"Are you satisfied with the results?",
('Yes', 'No'))
st.write("Thanks for the Feedback!")
Error
-------------------------------------------------------------------
PyCUDA ERROR: The context stack was not empty upon module cleanup.
-------------------------------------------------------------------
A context was still active when the context stack was being
cleaned up. At this point in our execution, CUDA may already
have been deinitialized, so there is no way we can finish
cleanly. The program will be aborted now.
Use Context.pop() to avoid this problem.
-------------------------------------------------------------------
Aborted (core dumped)

Invalid pointer error whily running python in C++ using pybind11 and pytorch

While running the following python code in C++ using pybind11, pytorch 1.6.0, I get "Invalid Pointer" error. In python, the code runs successfully without any error. Whats the reason? How can I solve this problem?
import torch
import torch.nn.functional as F
import numpy as np
import cv2
import torchvision
import eval_widerface
import torchvision_model
def resize(image, size):
image = F.interpolate(image.unsqueeze(0), size=size, mode="nearest").squeeze(0)
return image
# define constants
model_path = '/path/to/model.pt'
image_path = '/path/to/image_pad.jpg'
scale = 1.0 #Image resize scale (2 for half size)
font = cv2.FONT_HERSHEY_SIMPLEX
MIN_SCORE = 0.9
image_bgr = cv2.imread(image_path)
image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)#skimage.io.imread(args.image_path)
cv2.imshow("input image",image_bgr)
cv2.waitKey()
cv2.destroyAllWindows()
# load pre-trained model
return_layers = {'layer2':1,'layer3':2,'layer4':3}
RetinaFace = torchvision_model.create_retinaface(return_layers)
print('RetinaFace.state_dict().')
retina_dict = RetinaFace.state_dict()
the following function generates error.
def create_retinaface(return_layers,backbone_name='resnet50',anchors_num=3,pretrained=True):
print('In create_retinaface.')
print(resnet.__dict__)
backbone = resnet.__dict__[backbone_name](pretrained=pretrained)
print('backbone.')
# freeze layer1
for name, parameter in backbone.named_parameters():
print('freeze layer 1.');
# if 'layer2' not in name and 'layer3' not in name and 'layer4' not in name:
# parameter.requires_grad_(False)
if name == 'conv1.weight':
# print('freeze first conv layer...')
parameter.requires_grad_(False)
model = RetinaFace(backbone,return_layers,anchor_nums=3)
return model
The statement backbone = resnet.__dict__ [backbone_name](pretrained=pretrained) generated error that looks like
*** Error in `./p': munmap_chunk(): invalid pointer: 0x00007f4461866db0 ***
======= Backtrace: =========
/usr/lib64/libc.so.6(+0x7f3e4)[0x7f44736b43e4]
/usr/local/lib64/libopencv_gapi.so.4.1(_ZNSt10_HashtableISsSsSaISsENSt8__detail9_IdentityESt8equal_toISsESt4hashISsENS1_18_Mod_range_hashingENS1_20_Default_ranged_hashENS1_20_Prime_rehash_policyENS1_17_Hashtable_traitsILb1ELb1ELb1EEEE21_M_insert_unique_nodeEmmPNS1_10_Hash_nodeISsLb1EEE+0xc9)[0x7f4483dee1a9]
/home/20face/.virtualenvs/torch/lib64/python3.6/site-packages/torch/lib/libtorch_python.so(+0x4403b5)[0x7f4460bb73b5]
/home/20face/.virtualenvs/torch/lib64/python3.6/site-packages/torch/lib/libtorch_python.so(+0x44570a)[0x7f4460bbc70a]
/home/20face/.virtualenvs/torch/lib64/python3.6/site-packages/torch/lib/libtorch_python.so(+0x275b20)[0x7f44609ecb20]
/usr/lib64/libpython3.6m.so.1.0(_PyCFunction_FastCallDict+0x147)[0x7f4474307167]
/usr/lib64/libpython3.6m.so.1.0(+0x1507df)[0x7f44743727df]
/usr/lib64/libpython3.6m.so.1.0(_PyEval_EvalFrameDefault+0x3a7)[0x7f44743670f7]
/usr/lib64/libpython3.6m.so.1.0(+0x1505ca)[0x7f44743725ca]
/usr/lib64/libpython3.6m.so.1.0(+0x150903)[0x7f4474372903]
/usr/lib64/libpython3.6m.so.1.0(_PyEval_EvalFrameDefault+0x3a7)[0x7f44743670f7]
/usr/lib64/libpython3.6m.so.1.0(+0x14fb69)[0x7f4474371b69]
/usr/lib64/libpython3.6m.so.1.0(_PyFunction_FastCallDict+0x24f)[0x7f44743739ff]
/usr/lib64/libpython3.6m.so.1.0(_PyObject_FastCallDict+0x10e)[0x7f44742ca1de]
/usr/lib64/libpython3.6m.so.1.0(_PyObject_Call_Prepend+0x61)[0x7f44742ca2f1]
/usr/lib64/libpython3.6m.so.1.0(PyObject_Call+0x43)[0x7f44742c9f63]
/usr/lib64/libpython3.6m.so.1.0(+0xfa7e5)[0x7f447431c7e5]
/usr/lib64/libpython3.6m.so.1.0(+0xf71e2)[0x7f44743191e2]
/usr/lib64/libpython3.6m.so.1.0(PyObject_Call+0x43)[0x7f44742c9f63]
/usr/lib64/libpython3.6m.so.1.0(_PyEval_EvalFrameDefault+0x2067)[0x7f4474368db7]
/usr/lib64/libpython3.6m.so.1.0(PyEval_EvalCodeEx+0x24f)[0x7f4474372c9f]
This line is causing the error because it assumes __dict__ has a backbone_name element:
backbone = resnet.__dict__[backbone_name](pretrained=pretrained)
When that isn't the case, it basically tries to access invalid memory. Check __dict__ first with an if statement or make sure that it has the backbone_name element before trying to use it.

Gensim Doc2Vec Exception AttributeError: 'str' object has no attribute 'decode'

I am trying to make a model with the Gensim library. I am using python 3 and Spyder. I also want to incorporate the wiki corpus. The code is shown below:
enter code hereimport os
import sys
import bz2
import logging
import multiprocessing
import gensim
SCRIPT_PATH = os.path.dirname(os.path.realpath(__file__))
DATA_PATH = os.path.join(SCRIPT_PATH, 'data/')
MODEL_PATH = os.path.join(SCRIPT_PATH, 'model/')
DICTIONARY_FILEPATH = os.path.join(DATA_PATH, 'wiki-english_wordids.txt.bz2')
WIKI_DUMP_FILEPATH = os.path.join(DATA_PATH, 'enwiki-latest-pages-
articles.xml.bz2')
if __name__ == '__main__':
# Check if the required files have been downloaded
if not WIKI_DUMP_FILEPATH:
print('Wikipedia articles dump could not be found..')
print('Please see README.md for instructions!')
sys.exit()
# Get number of available cpus
cores = multiprocessing.cpu_count()
if not os.path.exists(MODEL_PATH):
os.makedirs(MODEL_PATH)
# Initialize logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
if not os.path.isfile(DICTIONARY_FILEPATH):
logging.info('Dictionary has not been created yet..')
logging.info('Creating dictionary (takes about 9h)..')
# Construct corpus
wiki = gensim.corpora.WikiCorpus(WIKI_DUMP_FILEPATH)
# Remove words occuring less than 20 times, and words occuring in more
# than 10% of the documents. (keep_n is the vocabulary size)
wiki.dictionary.filter_extremes(no_below=20, no_above=0.1, keep_n=100000)
# Save dictionary to file
wiki.dictionary.save_as_text(DICTIONARY_FILEPATH)
del wiki
# Load dictionary from file
dictionary = gensim.corpora.Dictionary.load_from_text(DICTIONARY_FILEPATH)
# Construct corpus using dictionary
wiki = gensim.corpora.WikiCorpus(WIKI_DUMP_FILEPATH, dictionary=dictionary)
class SentencesIterator:
def __init__(self, wiki):
self.wiki = wiki
def __iter__(self):
for sentence in self.wiki.get_texts():
yield list(map(lambda x: x.decode('utf-8'), sentence))
# Initialize simple sentence iterator required for the Word2Vec model
sentences = SentencesIterator(wiki)
logging.info('Training word2vec model..')
model = gensim.models.Word2Vec(sentences=sentences, size=300, min_count=1, window=5, workers=cores)
# Save model
logging.info('Saving model..')
model.save(os.path.join(MODEL_PATH, 'word2vec.model'))
logging.info('Done training word2vec model!')
But I am getting the following error:
File "C:/Users/elli/.spyder-py3/temp.py", line 60, in <lambda>
yield list(map(lambda x: x.decode('utf-8'), sentence))
AttributeError: 'str' object has no attribute 'decode'
This code was from github from this link:
https://github.com/LasseRegin/gensim-word2vec-model/blob/master/train.py.
I suspect this should be something simple to sort. Could you please advise?
It's the Unicode issue in your class SentencesIterator, your sample code is for python2. For python3, you can remove the decode part and make it as follows:
class TaggedWikiDocument(object):
def __init__(self, wiki):
self.wiki = wiki
self.wiki.metadata = True
def __iter__(self):
for content, (page_id, title) in self.wiki.get_texts():
yield TaggedDocument(content, [title])

How to classify images using Spark and Caffe

I am using Caffe to do image classification, can I am using MAC OS X, Pyhton.
Right now I know how to classify a list of images using Caffe with Spark python, but if I want to make it faster, I want to use Spark.
Therefore, I tried to apply the image classification on each element of an RDD, the RDD created from a list of image_path. However, Spark does not allow me to do so.
Here is my code:
This is the code for image classification:
# display image name, class number, predicted label
def classify_image(image_path, transformer, net):
image = caffe.io.load_image(image_path)
transformed_image = transformer.preprocess('data', image)
net.blobs['data'].data[...] = transformed_image
output = net.forward()
output_prob = output['prob'][0]
pred = output_prob.argmax()
labels_file = caffe_root + 'data/ilsvrc12/synset_words.txt'
labels = np.loadtxt(labels_file, str, delimiter='\t')
lb = labels[pred]
image_name = image_path.split(images_folder_path)[1]
result_str = 'image: '+image_name+' prediction: '+str(pred)+' label: '+lb
return result_str
This this the code generates Caffe parameters and apply the classify_image method on each element of the RDD:
def main():
sys.path.insert(0, caffe_root + 'python')
caffe.set_mode_cpu()
model_def = caffe_root + 'models/bvlc_reference_caffenet/deploy.prototxt'
model_weights = caffe_root + 'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel'
net = caffe.Net(model_def,
model_weights,
caffe.TEST)
mu = np.load(caffe_root + 'python/caffe/imagenet/ilsvrc_2012_mean.npy')
mu = mu.mean(1).mean(1)
transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
transformer.set_transpose('data', (2,0,1))
transformer.set_mean('data', mu)
transformer.set_raw_scale('data', 255)
transformer.set_channel_swap('data', (2,1,0))
net.blobs['data'].reshape(50,
3,
227, 227)
image_list= []
for image_path in glob.glob(images_folder_path+'*.jpg'):
image_list.append(image_path)
images_rdd = sc.parallelize(image_list)
transformer_bc = sc.broadcast(transformer)
net_bc = sc.broadcast(net)
image_predictions = images_rdd.map(lambda image_path: classify_image(image_path, transformer_bc, net_bc))
print image_predictions
if __name__ == '__main__':
main()
As you can see, here I tried to broadcast the caffe parameters, transformer_bc = sc.broadcast(transformer), net_bc = sc.broadcast(net)
The error is:
RuntimeError: Pickling of "caffe._caffe.Net" instances is not enabled
Before I am doing the broadcast, the error was :
Driver stacktrace.... Caused by: org.apache.spark.api.python.PythonException: Traceback (most recent call last):....
So, do you know, is there any way I can classify images using Caffe and Spark but also take advantage of Spark?
When you work with complex, non-native objects initialization has to moved directly to the workers for example with singleton module:
net_builder.py:
import cafe
net = None
def build_net(*args, **kwargs):
... # Initialize net here
return net
def get_net(*args, **kwargs):
global net
if net is None:
net = build_net(*args, **kwargs)
return net
main.py:
import net_builder
sc.addPyFile("net_builder.py")
def classify_image(image_path, transformer, *args, **kwargs):
net = net_builder.get_net(*args, **kwargs)
It means you'll have to distribute all required files as well. It can be done either manually or using SparkFiles mechanism.
On a side note you should take a look at the SparkNet package.

Categories