Invalid Syntax while importing dataset - python

So I have a script that trains on video files, and so have a dataset.py file to work on it.
Whenever I am trying to run the script it throws an SyntaxError: invalid syntax error even though I have
1) set the path correctly
2) gave the whole path
My folder structure is /home/Videos/project_folder_name/data/train
and my script is
class Dataset:
def __init__(self,
folder:/data/train,
resize:(int, int),
batch_size:int,
timesteps:int,
windowsteps:int,
shift:int,
train:bool):
self.folder = folder
self.resize = resize
self.batch_size = batch_size
self.timesteps = timesteps
self.train = train
self.images = sorted(os.listdir(folder + 'images/'))
self.labels = open(folder + 'labels.txt').readlines()
self.data = self._sliding_window(self.images, shift, windowsteps)
This error has been troubling a lot.

After putting it into SublimeText3 (I highly recommend you use a text editor which will help you find syntax errors like this)
Most of your errors boil down to not properly indenting the lines. I was able to complete my build once it was formatted like this
class Dataset:
def __init__(self,
folder:data/train,
resize:(int, int),
batch_size:int,
timesteps:int,
windowsteps:int,
shift:int,
train:bool):
self.folder = folder
self.resize = resize
self.batch_size = batch_size
self.timesteps = timesteps
self.train = train
self.images = sorted(os.listdir(folder + 'images/'))
self.labels = open(folder + 'labels.txt').readlines()
self.data = self._sliding_window(self.images, shift, windowsteps)
EDIT:
I know formatting on the website can be odd so if this code didn't solve your problem let me know and I'll delete or edit.

Related

Pydrake: Creating a Trajectory Source for RigidTransformations

I've been trying to create a TrajectorySource for RigidTransforms to pass into a DifferentialInverseKinematicsIntegrator which only takes in RigidTransforms in its input port.
def createTraj(time, pose):
times = []
poses = []
for step in time:
times.append(time[step])
poses.append(pose[step])
return PiecewisePose.MakeLinear(times, poses)
Initially, I tried to directly pass in the output from createTraj above into TrajectorySource but ran into the issue of my trajectory having more than one columns: Failure at systems/primitives/trajectory_source.cc:21 in TrajectorySource(): condition 'trajectory.cols() == 1' failed.
import matplotlib.pyplot as plt, mpld3
class DexterTest():
# Output from createTraj is passed as parameter: traj into constructor
def __init__(self, traj):
builder = DiagramBuilder()
self.station = DexterPPStation(1e-4, "/opt/drake/share/drake/manipulation/models/final_dexter_description/urdf/dexter.urdf")
self.station.CreateBins("/opt/drake/share/drake/examples/manipulation_station/models/bin.sdf", RigidTransform(np.array([0.5,0,0])), RigidTransform(np.array([0,0.5,0])))
self.station.CreateRandomPickingObjects(3)
self.station.AddDexter()
builder.AddSystem(self.station)
self.station.Finalize()
self.diff_ik = DifferentialInverseKinematicsIntegrator(self.station.controller_plant, self.station.plant.GetFrameByName("link6", self.station.dexter["instance"]), self.station.time_step, DifferentialInverseKinematicsParameters(7,7))
builder.AddSystem(self.diff_ik)
#=========================================== Likely Source of Error ===========================================
pose = builder.AddSystem(PoseSystem())
p_G_source = builder.AddSystem(TrajectorySource(traj.get_position_trajectory()))
w_G_source = builder.AddSystem(TrajectorySource(traj.get_orientation_trajectory()))
builder.Connect(p_G_source.get_output_port(), pose.GetInputPort("p_G"))
builder.Connect(w_G_source.get_output_port(), pose.GetInputPort("r_G"))
builder.Connect(pose.get_output_port(), self.diff_ik.get_input_port())
#======================================================================================
MeshcatVisualizerCpp.AddToBuilder(builder, self.station.GetOutputPort("query_object"), meshcat)
self.diagram = builder.Build()
self.simulator = Simulator(self.diagram)
self.diagram_context = self.simulator.get_mutable_context()
self.station_context = self.station.GetMyMutableContextFromRoot(self.diagram_context)
self.plant_context = self.station.GetSubsystemContext(self.station.plant, self.station_context)
self.station.SetRandomPoses(self.plant_context)
builder.Connect(self.diff_ik.get_output_port(), self.station.GetInputPort("dexter_position"))
def run(self):
self.simulator.set_target_realtime_rate(2.0)
self.simulator.AdvanceTo(1)
class PoseSystem(LeafSystem):
def __init__(self):
LeafSystem.__init__(self)
self.p_G = self.DeclareVectorInputPort("p_G", BasicVector(3))
self.r_G = self.DeclareVectorInputPort("r_G", BasicVector(4))
self.DeclareAbstractOutputPort("X_G", Value[RigidTransform], self.CalcOutput)
def CalcOutput(self, context, output):
pose = RigidTransform(Quaternion(self.r_G.Eval(context)), self.p_G.Eval(context))
output.set_value(pose)
Instead, I tried to break up my trajectory into its orientation and position parts, add them to the input ports of a custom system, and then reconstruct them together in the output port. However, this gives me the following RuntimeError once the run method is called: RuntimeError: This multibody element does not belong to the supplied MultibodyTree.
Any help would be greatly appreciated!
I think you are very close. The PoseSystem looks like it should be a solution to the problem you've articulated in your post. (The error about MultibodyTree must be coming from the other part of your code.
You don't actually need to break the RigidTransform up into orientation / translation to create your PoseSystem, your CalcOutput could just call output.set_value(poses.Eval(t)) if poses is a PiecewisePose trajectory.
I have an example of doing this in the PickAndPlaceTrajectory class in this notebook: https://github.com/RussTedrake/manipulation/blob/008cec6343dd39063705287e6664a3fee71a43b8/pose.ipynb

How to use from_tensor_slices properly on MRI images?

I'm working with MRI images and I'd like to use from_tensor_slices to preprocess the paths but I don't know how to use that properly. Below are my code, the problem message and link for the dataset.
First I rearrange my data. 484 images and 484 labels
image_data_path = './drive/MyDrive/Brain Tumour/Task01_BrainTumour/imagesTr/'
label_data_path = './drive/MyDrive/Brain Tumour/Task01_BrainTumour/labelsTr/'
image_paths = [image_data_path + name
for name in os.listdir(image_data_path)
if not name.startswith(".")]
label_paths = [label_data_path + name
for name in os.listdir(label_data_path)
if not name.startswith(".")]
image_paths = sorted(image_paths)
label_paths = sorted(label_paths)
Then, the function to load 1 example (I use nibabel to load nii files)
def load_one_sample(image_path, label_path):
image = nib.load(image_path).get_fdata()
image = tf.convert_to_tensor(image, dtype = 'float32')
label = nib.load(label_path).get_fdata()
label = tf.convert_to_tensor(label, dtype = 'uint8')
return image, label
Next, I tried using from_tensor_slices
image_filenames = tf.constant(image_paths)
label_filenames = tf.constant(label_paths)
dataset = tf.data.Dataset.from_tensor_slices((image_filenames, label_filenames))
all_data = dataset.map(load_one_sample)
And the error comes: TypeError: stat: path should be string, bytes, os.PathLike or integer, not Tensor
What can be wrong and how can I fix it?
Datalink: https://drive.google.com/drive/folders/1HqEgzS8BV2c7xYNrZdEAnrHk7osJJ--2 (task 1 - Brain Tumour)
Please tell me if you need more information.
nib.load is not a TensorFlow function.
If you want to use anything in tf.data pipeline that is not a TensorFlow function then you have to wrap it using a tf.py_function.
Code:
image_data_path = 'Task01_BrainTumour/imagesTr/'
label_data_path = 'Task01_BrainTumour/labelsTr/'
image_paths = [image_data_path + name
for name in os.listdir(image_data_path)
if not name.startswith(".")]
label_paths = [label_data_path + name
for name in os.listdir(label_data_path)
if not name.startswith(".")]
image_paths = sorted(image_paths)
label_paths = sorted(label_paths)
def load_one_sample(image_path, label_path):
image = nib.load(image_path.numpy().decode()).get_fdata()
image = tf.convert_to_tensor(image, dtype = 'float32')
label = nib.load(label_path.numpy().decode()).get_fdata()
label = tf.convert_to_tensor(label, dtype = 'uint8')
return image, label
def wrapper_load(img_path, label_path):
img, label = tf.py_function(func = load_one_sample, inp = [img_path, label_path], Tout = [tf.float32, tf.uint8])
return img, label
dataset = tf.data.Dataset.from_tensor_slices((image_paths, label_paths)).map(wrapper_load)
The error is not due to the from_tensor_slices function but arises as nibs.load is expecting a string but gets a tensor.
However, a better way would be to create tfrecords and use them to train the model.

Keep getting StopIteration in jupyter notebook

I am using the following code of Pytorch to generate Dataset class:
class BlockSet(Dataset):
def __init__(self, imList, mkList): #, transform=None):
self.imList = imList
self.mkList = mkList
# self.transform = transform
def __len__(self):
return len(self.imList)
def __getitem__(self, index):
im = Image.open(self.imList[index]).convert('RGB')
mk = Image.open(self.mkList[index]).convert('L')
im = np.array(im)
mk = np.array(mk)
return im, mk
When I try to take a unit sample for test with:
unitSet = BlockSet(refFiles, mskFiles)
x,y = next(iter(unitSet))
I get StopIteration error in jupyter notebook. But when I run it in some .py script using pycharm it runs and give me expected results.
What's wrong with .ipynb files?

Got an error during UBM speaker-adaptation with sidekit

I've already trained a UBM model and now I'm trying to implement the speaker-adaptation when I got following error.
Exception: show enroll/something.wav is not in the HDF5 file
I got two files "enroll" and "test" under the file "feat" which contains respectively features(.h5) for training and test, and my enroll_idmap is generated with the audios(.wav) only for training. And, my wav files and feat files are separated. I think I got a problem of idmap. "enroll/something.wav" is the rightid of my enroll_idmap, but what does that "HDF5 file" refer to?
Could anyone tell me what this error means and how to fix it?
Here's the code of my enroll_idmap
def __init__(self):
BASE_DIR = "./Database/sidekit_data"
self.AUDIO_DIR = os.path.join(BASE_DIR, "audio")
self.FEATURE_DIR = os.path.join(BASE_DIR, "feat")
self.TASK_DIR = os.path.join(BASE_DIR, "task")
def create_idMap(self, group):
# Make enrollment (IdMap) file list
group_dir = os.path.join(self.AUDIO_DIR, group) # enrollment data directory
group_files = os.listdir(group_dir)
group_models = [files.split('_')[0] for files in group_files] # list of model IDs
group_segments = [group+"/"+f for f in group_files]
# Generate IdMap
group_idmap = sidekit.IdMap()
group_idmap.leftids = np.asarray(group_models)
group_idmap.rightids = np.asarray(group_segments)
group_idmap.start = np.empty(group_idmap.rightids.shape, '|O')
group_idmap.stop = np.empty(group_idmap.rightids.shape, '|O')
if group_idmap.validate():
group_idmap.write(os.path.join(self.TASK_DIR, group+'_idmap.h5'))
else:
raise RuntimeError('Problems with creating idMap file')
And after that I got enroll_idmap and test_idmap with :
create_idMap("enroll")
create_idMap("test")
And here's the code of speaker-adaptation, the error above comes out during the execution of enroll_stat.accumulate_stat(…):
BASE_DIR = "./Database/sidekit_data"
enroll_idmap = sidekit.IdMap.read(os.path.join(BASE_DIR, "task", "enroll_idmap.h5"))
ubm = sidekit.Mixture()
model_name = "ubm_{}.h5".format(NUM_GUASSIANS)
ubm.read(os.path.join(BASE_DIR, "ubm", model_name))
server_eval = sidekit.FeaturesServer(feature_filename_structure="./Database/sidekit_data/feat/{}.h5",
...
...)
print("Compute the sufficient statistics")
enroll_stat.accumulate_stat(ubm=ubm,
feature_server=server_eval,
seg_indices=range(enroll_stat.segset.shape[0]),
num_thread=nbThread
)
This seems not to be a big problem but it stops me for a few days, help please.
I finally got this problem fixed by changing the path of training and test features, making it outside the "BASEDIR"
server_eval = sidekit.FeaturesServer(feature_filename_structure="./enroll/{}.h5",
...)

How to classify images using Spark and Caffe

I am using Caffe to do image classification, can I am using MAC OS X, Pyhton.
Right now I know how to classify a list of images using Caffe with Spark python, but if I want to make it faster, I want to use Spark.
Therefore, I tried to apply the image classification on each element of an RDD, the RDD created from a list of image_path. However, Spark does not allow me to do so.
Here is my code:
This is the code for image classification:
# display image name, class number, predicted label
def classify_image(image_path, transformer, net):
image = caffe.io.load_image(image_path)
transformed_image = transformer.preprocess('data', image)
net.blobs['data'].data[...] = transformed_image
output = net.forward()
output_prob = output['prob'][0]
pred = output_prob.argmax()
labels_file = caffe_root + 'data/ilsvrc12/synset_words.txt'
labels = np.loadtxt(labels_file, str, delimiter='\t')
lb = labels[pred]
image_name = image_path.split(images_folder_path)[1]
result_str = 'image: '+image_name+' prediction: '+str(pred)+' label: '+lb
return result_str
This this the code generates Caffe parameters and apply the classify_image method on each element of the RDD:
def main():
sys.path.insert(0, caffe_root + 'python')
caffe.set_mode_cpu()
model_def = caffe_root + 'models/bvlc_reference_caffenet/deploy.prototxt'
model_weights = caffe_root + 'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel'
net = caffe.Net(model_def,
model_weights,
caffe.TEST)
mu = np.load(caffe_root + 'python/caffe/imagenet/ilsvrc_2012_mean.npy')
mu = mu.mean(1).mean(1)
transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
transformer.set_transpose('data', (2,0,1))
transformer.set_mean('data', mu)
transformer.set_raw_scale('data', 255)
transformer.set_channel_swap('data', (2,1,0))
net.blobs['data'].reshape(50,
3,
227, 227)
image_list= []
for image_path in glob.glob(images_folder_path+'*.jpg'):
image_list.append(image_path)
images_rdd = sc.parallelize(image_list)
transformer_bc = sc.broadcast(transformer)
net_bc = sc.broadcast(net)
image_predictions = images_rdd.map(lambda image_path: classify_image(image_path, transformer_bc, net_bc))
print image_predictions
if __name__ == '__main__':
main()
As you can see, here I tried to broadcast the caffe parameters, transformer_bc = sc.broadcast(transformer), net_bc = sc.broadcast(net)
The error is:
RuntimeError: Pickling of "caffe._caffe.Net" instances is not enabled
Before I am doing the broadcast, the error was :
Driver stacktrace.... Caused by: org.apache.spark.api.python.PythonException: Traceback (most recent call last):....
So, do you know, is there any way I can classify images using Caffe and Spark but also take advantage of Spark?
When you work with complex, non-native objects initialization has to moved directly to the workers for example with singleton module:
net_builder.py:
import cafe
net = None
def build_net(*args, **kwargs):
... # Initialize net here
return net
def get_net(*args, **kwargs):
global net
if net is None:
net = build_net(*args, **kwargs)
return net
main.py:
import net_builder
sc.addPyFile("net_builder.py")
def classify_image(image_path, transformer, *args, **kwargs):
net = net_builder.get_net(*args, **kwargs)
It means you'll have to distribute all required files as well. It can be done either manually or using SparkFiles mechanism.
On a side note you should take a look at the SparkNet package.

Categories