I am working with the pykitti package for visualizing the KITTI data set in python. It's having trouble locating the calibration files needed. It looks in a specified directory for these calibration files. I specified the directory in which it should look for these files. However, it still gives me an error:
FileNotFoundError: [Errno 2] No such file or directory: '/home/spb5151/Downloads/KITTI_Data/2011_09_26/calib_imu_to_velo.txt'
It says that it's getting stuck on the open(filepath, 'r') line in my code.However, I have confirmed that this file is located in this directory. I'm using Pycharm as my IDE on linux. I'm new to python and linux so is there anything as far as syntax that I might be missing?
import sys
sys.path.insert(0, '/home/spb5151/Documents/pykitti-master')
import pykitti
basedir = '/home/spb5151/Downloads/KITTI_Data'
date = '2011_09_26'
drive = '0019'
# The 'frames' argument is optional - default: None, which loads the whole dataset.
# Calibration and timestamp data are read automatically.
# Other sensor data (cameras, IMU, Velodyne) are available via properties
# that create generators when accessed.
data = pykitti.raw(basedir, date, drive, frames=range(0, 50, 5))
# dataset.calib: Calibration data are accessible as a named tuple
# dataset.timestamps: Timestamps are parsed into a list of datetime objects
# dataset.oxts: Returns a generator that loads OXTS packets as named tuples
# dataset.camN: Returns a generator that loads individual images from camera N
# dataset.gray: Returns a generator that loads monochrome stereo pairs (cam0, cam1)
# dataset.rgb: Returns a generator that loads RGB stereo pairs (cam2, cam3)
# dataset.velo: Returns a generator that loads velodyne scans as [x,y,z,reflectance]
point_velo = np.array([0,0,0,1])
point_cam0 = data.calib.T_cam0_velo.dot(point_velo)
point_imu = np.array([0,0,0,1])
point_w = [o.T_w_imu.dot(point_imu) for o in data.oxts]
for cam0_image in data.cam0:
pass
rgb_iterator = data.rgb # Assign the generator so it doesn't
cam2_image, cam3_image = next(rgb_iterator)
And here is the raw.py file which is included in the pykitti package
"""Provides 'raw', which loads and parses raw KITTI data."""
import datetime as dt
import glob
import os
from collections import namedtuple
import numpy as np
import pykitti.utils as utils
__author__ = "Lee Clement"
__email__ = "lee.clement#robotics.utias.utoronto.ca"
class raw:
"""Load and parse raw data into a usable format."""
def __init__(self, base_path, date, drive, **kwargs):
"""Set the path and pre-load calibration data and timestamps."""
self.drive = date + '_drive_' + drive + '_sync'
self.calib_path = os.path.join(base_path, date)
self.data_path = os.path.join(base_path, date, self.drive)
self.frames = kwargs.get('frames', None)
# Setting imformat='cv2' will convert the images to uint8 and BGR for
# easy use with OpenCV.
self.imformat = kwargs.get('imformat', None)
# Pre-load data that isn't returned as a generator
self._load_calib()
self._load_timestamps()
def __len__(self):
"""Return the number of frames loaded."""
return len(self.timestamps)
#property
def oxts(self):
"""Generator to read OXTS data from file."""
# Find all the data files
oxts_path = os.path.join(self.data_path, 'oxts', 'data', '*.txt')
oxts_files = sorted(glob.glob(oxts_path))
# Subselect the chosen range of frames, if any
if self.frames is not None:
oxts_files = [oxts_files[i] for i in self.frames]
# Return a generator yielding OXTS packets and poses
return utils.get_oxts_packets_and_poses(oxts_files)
#property
def cam0(self):
"""Generator to read image files for cam0 (monochrome left)."""
impath = os.path.join(self.data_path, 'image_00', 'data', '*.png')
imfiles = sorted(glob.glob(impath))
# Subselect the chosen range of frames, if any
if self.frames is not None:
imfiles = [imfiles[i] for i in self.frames]
# Return a generator yielding the images
return utils.get_images(imfiles, self.imformat)
#property
def cam1(self):
"""Generator to read image files for cam1 (monochrome right)."""
impath = os.path.join(self.data_path, 'image_01', 'data', '*.png')
imfiles = sorted(glob.glob(impath))
# Subselect the chosen range of frames, if any
if self.frames is not None:
imfiles = [imfiles[i] for i in self.frames]
# Return a generator yielding the images
return utils.get_images(imfiles, self.imformat)
#property
def cam2(self):
"""Generator to read image files for cam2 (RGB left)."""
impath = os.path.join(self.data_path, 'image_02', 'data', '*.png')
imfiles = sorted(glob.glob(impath))
# Subselect the chosen range of frames, if any
if self.frames is not None:
imfiles = [imfiles[i] for i in self.frames]
# Return a generator yielding the images
return utils.get_images(imfiles, self.imformat)
#property
def cam3(self):
"""Generator to read image files for cam0 (RGB right)."""
impath = os.path.join(self.data_path, 'image_03', 'data', '*.png')
imfiles = sorted(glob.glob(impath))
# Subselect the chosen range of frames, if any
if self.frames is not None:
imfiles = [imfiles[i] for i in self.frames]
# Return a generator yielding the images
return utils.get_images(imfiles, self.imformat)
#property
def gray(self):
"""Generator to read monochrome stereo pairs from file.
"""
return zip(self.cam0, self.cam1)
#property
def rgb(self):
"""Generator to read RGB stereo pairs from file.
"""
return zip(self.cam2, self.cam3)
#property
def velo(self):
"""Generator to read velodyne [x,y,z,reflectance] scan data from binary files."""
# Find all the Velodyne files
velo_path = os.path.join(
self.data_path, 'velodyne_points', 'data', '*.bin')
velo_files = sorted(glob.glob(velo_path))
# Subselect the chosen range of frames, if any
if self.frames is not None:
velo_files = [velo_files[i] for i in self.frames]
# Return a generator yielding Velodyne scans.
# Each scan is a Nx4 array of [x,y,z,reflectance]
return utils.get_velo_scans(velo_files)
def _load_calib_rigid(self, filename):
"""Read a rigid transform calibration file as a numpy.array."""
filepath = os.path.join(self.calib_path, filename)
data = utils.read_calib_file(filepath)
return utils.transform_from_rot_trans(data['R'], data['T'])
def _load_calib_cam_to_cam(self, velo_to_cam_file, cam_to_cam_file):
# We'll return the camera calibration as a dictionary
data = {}
# Load the rigid transformation from velodyne coordinates
# to unrectified cam0 coordinates
T_cam0unrect_velo = self._load_calib_rigid(velo_to_cam_file)
# Load and parse the cam-to-cam calibration data
cam_to_cam_filepath = os.path.join(self.calib_path, cam_to_cam_file)
filedata = utils.read_calib_file(cam_to_cam_filepath)
# Create 3x4 projection matrices
P_rect_00 = np.reshape(filedata['P_rect_00'], (3, 4))
P_rect_10 = np.reshape(filedata['P_rect_01'], (3, 4))
P_rect_20 = np.reshape(filedata['P_rect_02'], (3, 4))
P_rect_30 = np.reshape(filedata['P_rect_03'], (3, 4))
data['P_rect_00'] = P_rect_00
data['P_rect_10'] = P_rect_10
data['P_rect_20'] = P_rect_20
data['P_rect_30'] = P_rect_30
# Create 4x4 matrices from the rectifying rotation matrices
R_rect_00 = np.eye(4)
R_rect_00[0:3, 0:3] = np.reshape(filedata['R_rect_00'], (3, 3))
R_rect_10 = np.eye(4)
R_rect_10[0:3, 0:3] = np.reshape(filedata['R_rect_01'], (3, 3))
R_rect_20 = np.eye(4)
R_rect_20[0:3, 0:3] = np.reshape(filedata['R_rect_02'], (3, 3))
R_rect_30 = np.eye(4)
R_rect_30[0:3, 0:3] = np.reshape(filedata['R_rect_03'], (3, 3))
data['R_rect_00'] = R_rect_00
data['R_rect_10'] = R_rect_10
data['R_rect_20'] = R_rect_20
data['R_rect_30'] = R_rect_30
# Compute the rectified extrinsics from cam0 to camN
T0 = np.eye(4)
T0[0, 3] = P_rect_00[0, 3] / P_rect_00[0, 0]
T1 = np.eye(4)
T1[0, 3] = P_rect_10[0, 3] / P_rect_10[0, 0]
T2 = np.eye(4)
T2[0, 3] = P_rect_20[0, 3] / P_rect_20[0, 0]
T3 = np.eye(4)
T3[0, 3] = P_rect_30[0, 3] / P_rect_30[0, 0]
# Compute the velodyne to rectified camera coordinate transforms
data['T_cam0_velo'] = T0.dot(R_rect_00.dot(T_cam0unrect_velo))
data['T_cam1_velo'] = T1.dot(R_rect_00.dot(T_cam0unrect_velo))
data['T_cam2_velo'] = T2.dot(R_rect_00.dot(T_cam0unrect_velo))
data['T_cam3_velo'] = T3.dot(R_rect_00.dot(T_cam0unrect_velo))
# Compute the camera intrinsics
data['K_cam0'] = P_rect_00[0:3, 0:3]
data['K_cam1'] = P_rect_10[0:3, 0:3]
data['K_cam2'] = P_rect_20[0:3, 0:3]
data['K_cam3'] = P_rect_30[0:3, 0:3]
# Compute the stereo baselines in meters by projecting the origin of
# each camera frame into the velodyne frame and computing the distances
# between them
p_cam = np.array([0, 0, 0, 1])
p_velo0 = np.linalg.inv(data['T_cam0_velo']).dot(p_cam)
p_velo1 = np.linalg.inv(data['T_cam1_velo']).dot(p_cam)
p_velo2 = np.linalg.inv(data['T_cam2_velo']).dot(p_cam)
p_velo3 = np.linalg.inv(data['T_cam3_velo']).dot(p_cam)
data['b_gray'] = np.linalg.norm(p_velo1 - p_velo0) # gray baseline
data['b_rgb'] = np.linalg.norm(p_velo3 - p_velo2) # rgb baseline
return data
def _load_calib(self):
"""Load and compute intrinsic and extrinsic calibration parameters."""
# We'll build the calibration parameters as a dictionary, then
# convert it to a namedtuple to prevent it from being modified later
data = {}
# Load the rigid transformation from velodyne to IMU
data['T_velo_imu'] = self._load_calib_rigid('calib_imu_to_velo.txt')
# Load the camera intrinsics and extrinsics
data.update(self._load_calib_cam_to_cam(
'calib_velo_to_cam.txt', 'calib_cam_to_cam.txt'))
# Pre-compute the IMU to rectified camera coordinate transforms
data['T_cam0_imu'] = data['T_cam0_velo'].dot(data['T_velo_imu'])
data['T_cam1_imu'] = data['T_cam1_velo'].dot(data['T_velo_imu'])
data['T_cam2_imu'] = data['T_cam2_velo'].dot(data['T_velo_imu'])
data['T_cam3_imu'] = data['T_cam3_velo'].dot(data['T_velo_imu'])
self.calib = namedtuple('CalibData', data.keys())(*data.values())
def _load_timestamps(self):
"""Load timestamps from file."""
timestamp_file = os.path.join(
self.data_path, 'oxts', 'timestamps.txt')
# Read and parse the timestamps
self.timestamps = []
with open(timestamp_file, 'r') as f:
for line in f.readlines():
# NB: datetime only supports microseconds, but KITTI timestamps
# give nanoseconds, so need to truncate last 4 characters to
# get rid of \n (counts as 1) and extra 3 digits
t = dt.datetime.strptime(line[:-4], '%Y-%m-%d %H:%M:%S.%f')
self.timestamps.append(t)
# Subselect the chosen range of frames, if any
if self.frames is not None:
self.timestamps = [self.timestamps[i] for i in self.frames]
Aha, for some reason they seemed to have hardcoded this part so that it doesn't consider your data path. Here's the culprit:
data['T_velo_imu'] = self._load_calib_rigid('calib_imu_to_velo.txt')
Search for this line in your raw.py file and replace the file with data_path + file to make sure it goes to the right path.
Related
Closed. This question needs debugging details. It is not currently accepting answers.
Edit the question to include desired behavior, a specific problem or error, and the shortest code necessary to reproduce the problem. This will help others answer the question.
Closed 7 days ago.
Improve this question
This program is for calculating the percentage of clouds in an image, the code is executed by a raspberry-pi 4.
In this program I use only one try-except, in the main function:
from pathlib import Path
from logzero import logger, logfile
from sense_hat import SenseHat
from picamera import PiCamera
from orbit import ISS
from time import sleep
from datetime import datetime, timedelta
import csv
import cv2
import numpy as np
import matplotlib.pyplot as plt
# this function analyzes the image creating a mask in which only clouds are colored, everything else will be black
def create_clouds_mask(img):
mask = 0
logger.info("STARTING GENERATING THE CLOUD MASK...")
img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) # from bgr to hsv
hsv_white = np.asarray([180, 30, 255]) # white
hsv_grey = np.asarray([0, 0, 125]) # grey
# pick colors from white to grey and creates a mask
mask = cv2.inRange(img_hsv, hsv_grey, hsv_white)
logger.info("MASK COMPLETED")
return mask
# this function calculates the percentage of cloud coverage starting from the Mask previously generated
def cloud_coverage_perc(img, mask):
res = 0
perc = 0
logger.info("CALCULATING CLOUD COVERAGE...")
hsv_img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
height, width, _ = hsv_img.shape
# here we calculate the circular area of the image to consider in the calculation (because the borders of the image are covered by the ISS window, which we do not want to include in the cloud coverage calculation)
area_circle = pow(((height-(height*0.055))/2), 2) * pi
# perform an AND bit wise operation
res = cv2.bitwise_and(hsv_img, hsv_img, mask=mask)
ratio = cv2.countNonZero(mask)/(area_circle) # calculate ratio
# calculate the percentage strarting from the ratio
perc = np.round(ratio*100, 2)
logger.info("CLOUD COVERAGE CALCULATION COMPLETED")
logger.info("CLOUD COVERAGE: "+str(perc))
return res, perc
def create_csv_file(data_file): # create the CVS file
"""Create a new CSV file and add the header row"""
with open(data_file, 'w') as f:
writer = csv.writer(f)
header = ("Counter", "Date/time", "Latitude", "Longitude",
"Temperature", "Humidity", "Magnetometer", "Clouds")
writer.writerow(header)
def add_csv_data(data_file, data): # add data to CVS file
"""Add a row of data to the data_file CSV"""
with open(data_file, 'a') as f:
writer = csv.writer(f)
writer.writerow(data)
def capture(camera, image): # capture image of the ground and add exif data to it
location = ISS.coordinates()
# convert the latitude and longitude to exif appropriate format
south, exif_latitude = exif_convert(location.latitude)
west, exif_longitude = exif_convert(location.longitude)
# set the exif tags specifying the current location
camera.exif_tags['GPS.GPSLatitude'] = exif_latitude
camera.exif_tags['GPS.GPSLatitudeRef'] = "S" if south else "N"
camera.exif_tags['GPS.GPSLongitude'] = exif_longitude
camera.exif_tags['GPS.GPSLongitudeRef'] = "W" if west else "E"
# capture the image
camera.capture(image)
logger.info("IMAGE CAPTURED")
def exif_convert(angle): # convert ISS location to appropriate format for exif data
sign, degrees, minutes, seconds = angle.signed_dms()
exif_angle = f'{degrees:.0f}/1,{minutes:.0f}/1,{seconds*10:.0f}/10'
return sign < 0, exif_angle
base_folder = Path(__file__).parent.resolve()
# set a logfile name
logfile(base_folder/"events.log")
# set up Sense Hat
sense = SenseHat()
# set up camera
cam = PiCamera()
cam.resolution = (1296, 972)
# initialise the CSV file
data_file = base_folder/"data.csv"
create_csv_file(data_file)
# initialise the photo counter
counter = 1
# record the start and current time
start_time = datetime.now()
now_time = datetime.now()
# run a loop for (almost) three hours
# pi
# |
# v
pi = 3.141592653589793238462643383279 # <- pi
# ^
# |
# pi
while (now_time < start_time + timedelta(minutes=178)):
try:
logger.info(f"--- START ITERATION {counter} ---")
humidity = round(sense.humidity, 4)
temperature = round(sense.temperature, 4)
magnetometer = sense.compass_raw
# get coordinates of location on Earth below the ISS
location = ISS.coordinates()
# capture image
image_file = f"{base_folder}/photo_{counter:03d}.jpg"
capture(cam, image_file)
# create image mask
img = cv2.imread(image_file)
mask = create_clouds_mask(img)
# calculate cloud coverage
res, perc = cloud_coverage_perc(img, mask)
# store mask as image
# save the mask as a JPG image
mask_path = f"{base_folder}/mask_{counter:03d}.jpg"
cv2.imwrite(mask_path, res)
# save the data to the file
data = (
counter,
datetime.now(),
location.latitude.degrees,
location.longitude.degrees,
temperature,
humidity,
magnetometer,
perc,
)
add_csv_data(data_file, data)
# log event
logger.info(f"--- END ITERATION {counter} ---")
counter += 1
sleep(30)
# update the current time
now_time = datetime.now()
except Exception as e:
logger.error(f'{e.__class__.__name__}: {e}')
I've tried generating some exception, and they have all been caught without problems. But after the submission, the reviewers said that some errors weren't handled correctly. Can anyone help me find some errors that might occur?
I am trying to generate 3D point cloud from 4 RGB-D images. I am able to do that with Open3D but I am unable to maintain the position of the images. You can find the camera_parameters.json here.
import open3d as o3d
import cv2
import os
import numpy as np
import matplotlib.pyplot as plt
import json
def load_json(path):
with open(path) as f:
return json.load(f)
def parse_camera_params(camera):
param = {}
param['camera'] = camera['camera']
param['depth'] = {}
param['depth']['fx'] = camera['K_depth'][0][0]
param['depth']['fy'] = camera['K_depth'][1][1]
param['depth']['cx'] = camera['K_depth'][0][2]
param['depth']['cy'] = camera['K_depth'][1][2]
param['depth']['K'] = camera['K_depth']
# ignore distCoeffs_depth's 5th (1000) and 6th (0) element
# since they are strange
param['depth']['distCoeffs'] = np.array(camera['distCoeffs_depth'][:5])
param['depth_width'] = camera['depth_width']
param['depth_height'] = camera['depth_height']
param['color'] = {}
param['color']['fx'] = camera['K_color'][0][0]
param['color']['fy'] = camera['K_color'][1][1]
param['color']['cx'] = camera['K_color'][0][2]
param['color']['cy'] = camera['K_color'][1][2]
param['color']['K'] = camera['K_color']
# ignore distCoeffs_color's 5th (1000) and 6th (0) element
# since they are strange
param['color']['distCoeffs'] = np.array(camera['distCoeffs_color'][:5])
param['color_width'] = camera['color_width']
param['color_height'] = camera['color_height']
# world to depth
w2d_T = np.array(camera['M_world2sensor'])
param['w2d_R'] = w2d_T[0:3, 0:3]
param['w2d_t'] = w2d_T[0:3, 3]
param['w2d_T'] = camera['M_world2sensor']
d2w_T = np.linalg.inv(w2d_T)
param['d2w_R'] = d2w_T[0:3, 0:3]
param['d2w_t'] = d2w_T[0:3, 3]
param['d2w_T'] = d2w_T
return param
if __name__ == '__main__':
data_dir = "data/"
camera_params = load_json(os.path.join(data_dir,
'camera_parameters.json'))
SVC_NUM = 4
pcd_combined = o3d.geometry.PointCloud()
for i in range(SVC_NUM):
param = parse_camera_params(camera_params['sensors'][i])
color = cv2.imread(os.path.join(data_dir, 'color_{:05d}.png'.format(i)))
color = cv2.cvtColor(color, cv2.COLOR_BGR2RGB)
depth = cv2.imread(os.path.join(data_dir, 'depth_{:05d}.png'.format(i)), -1)
# depth = depth * 0.001 # factor to scale the depth image from carla
o3d_color = o3d.geometry.Image(color)
o3d_depth = o3d.geometry.Image(depth)
rgbd_image = o3d.geometry.RGBDImage.create_from_tum_format(o3d_color, o3d_depth, False)
h, w = depth.shape
dfx, dfy, dcx, dcy = param['depth']['fx'], param['depth']['fy'], param['depth']['cx'], param['depth']['cy']
intrinsic = o3d.camera.PinholeCameraIntrinsic(w, h, dfx,dfy, dcx, dcy)
intrinsic.intrinsic_matrix = param['depth']['K']
cam = o3d.camera.PinholeCameraParameters()
cam.intrinsic = intrinsic
cam.extrinsic = np.array(param['w2d_T'])
pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_image, cam.intrinsic, cam.extrinsic)
o3d.io.write_point_cloud("svc_{:05d}_v13.pcd".format(i), pcd)
pcd_combined += pcd
o3d.io.write_point_cloud("svc_global_v13.pcd", pcd_combined)
With the above code I am getting output of svc_global_v13.pcd like below
As you can see, all the images are projected into center. As indicated in the json file, I would like the images to be positioned as left, right, front and rear in the 3D point cloud.
May I know what is it I am missing here?
I would have a voxel object on which I map a texture. The problem is it looks really weird because the faces of the voxel cubes have more than one colour to them. I would like to reduce the color down to a singular colour for each voxel face. I found a way to do that within blender but since I want to run this headlessly I need to do the same thing at a lower level and run it via api call.
Attaching the code below. Any and all help would be appreciated.
import bpy
import os
removeThese = bpy.context.copy()
removeThese['selected_objects'] = list(bpy.context.scene.objects)
bpy.ops.object.delete(removeThese)
sourceDirectory = "model_folder"
model_name = "model.obj"
path = os.path.join(sourceDirectory, model_name)
bpy.ops.import_scene.obj(filepath=path, axis_forward='-Z', axis_up='Y', filter_glob="*.obj;*.mtl")
model_obj = bpy.context.scene.objects[model_name.split(".")[0]]
bpy.ops.object.select_all(action='DESELECT')
bpy.context.view_layer.objects.active = model_obj
model_obj.select_set(True)
sourceName = bpy.context.object.name
source = bpy.data.objects[sourceName]
#duplicating source model
bpy.ops.object.duplicate_move(OBJECT_OT_duplicate={"linked":False, "mode":'TRANSLATION'})
duplicate_obj = bpy.context.scene.objects[model_name.split(".")[0]+".001"]
bpy.ops.object.select_all(action='DESELECT')
bpy.context.view_layer.objects.active = duplicate_obj
duplicate_obj.select_set(True)
bpy.context.object.name = sourceName + "_Voxelized"
bpy.ops.object.transform_apply(location=False, rotation=True, scale=False)
bpy.ops.object.convert(target='MESH')
#source.hide_render = True
#source.hide_viewport = True
targetName = bpy.context.object.name
target = bpy.data.objects[targetName]
#converting to blocks
bpy.ops.object.modifier_add(type='REMESH')
bpy.context.object.modifiers["Remesh"].mode = 'BLOCKS'
bpy.context.object.modifiers["Remesh"].octree_depth = 7
bpy.context.object.modifiers["Remesh"].scale = 0.5
bpy.context.object.modifiers["Remesh"].use_remove_disconnected = True
bpy.ops.object.modifier_apply(modifier="Remesh")
#transferring UVs from source to target
bpy.ops.object.modifier_add(type='DATA_TRANSFER')
bpy.context.object.modifiers["DataTransfer"].use_loop_data = True
bpy.context.object.modifiers["DataTransfer"].data_types_loops = {'UV'}
bpy.context.object.modifiers["DataTransfer"].loop_mapping = 'POLYINTERP_NEAREST'
bpy.context.object.modifiers["DataTransfer"].object = source
bpy.ops.object.datalayout_transfer(modifier="DataTransfer")
bpy.ops.object.modifier_apply(modifier="DataTransfer")
#this is the chunk that reduces each voxel face to one colour
bpy.ops.object.editmode_toggle()
bpy.ops.mesh.select_mode(type='FACE')
bpy.context.area.ui_type = 'UV'
bpy.context.scene.tool_settings.use_uv_select_sync = False
bpy.context.space_data.uv_editor.sticky_select_mode = 'DISABLED'
bpy.context.scene.tool_settings.uv_select_mode = 'FACE'
bpy.context.space_data.pivot_point = 'INDIVIDUAL_ORIGINS'
bpy.ops.mesh.select_all(action='DESELECT')
#singularizing colours on each voxel face
count = 0
while count < 100:
bpy.ops.mesh.select_random(ratio=(count/100) + 0.01, seed=count)
bpy.ops.uv.select_all(action='SELECT')
bpy.ops.transform.resize(value=(0.01, 0.01, 0.01))
bpy.ops.mesh.hide(unselected=False)
count+=1
#returning to previous context
bpy.context.area.ui_type = 'VIEW_3D'
bpy.ops.mesh.reveal()
bpy.ops.object.editmode_toggle()
bpy.context.area.ui_type = 'TEXT_EDITOR'
#deleting source and keeping target
bpy.ops.object.select_all(action='DESELECT')
source.select_set(True)
bpy.ops.object.delete()
#selecting target
target.select_set(True)
#exporting voxelized model
output_directory = sourceDirectory
vox_model_name = targetName + '.obj'
output_file = os.path.join(output_directory, vox_model_name)
print(output_file)
bpy.ops.export_scene.obj(filepath=output_file)
I have an object detection dataset with RGB images and annotations in Json. I use a custom DataLoader class to read the images and the labels. One issue that I’m facing is that I would like to skip images when training my model if/when labels don’t contain certain objects.
For example, If one image doesn’t contain any target labels belonging to the class ‘Cars’, I would like to skip them. When parsing my Json annotation, I tried checking for labels that don’t contain the class ‘Cars’ and returned None. Subsequently, I used a collate function to filter the None but unfortunately, It is not working.
import torch
from torch.utils.data.dataset import Dataset
import json
import os
from PIL import Image
from torchvision import transforms
#import cv2
import numpy as np
general_classes = {
# Cars
"Toyota Corolla" : 0,
"VW Golf" : 0,
"VW Beetle" : 0,
# Motor-cycles
"Harley Davidson" : 1,
"Yamaha YZF-R6" : 1,
}
car_classes={
"Toyota Corolla" : 0,
"VW Golf" : 0,
"VW Beetle" : 0
}
def get_transform(train):
transforms = []
# converts the image, a PIL image, into a PyTorch Tensor
transforms.append(T.ToTensor())
if train:
# during training, randomly flip the training images
# and ground-truth for data augmentation
transforms.append(T.RandomHorizontalFlip(0.5))
return T.Compose(transforms)
def my_collate(batch):
batch = list(filter(lambda x: x is not None, batch))
return torch.utils.data.dataloader.default_collate(batch)
class FilteredDataset(Dataset):
# The dataloader will skip the image and corresponding labels based on the dictionary 'car_classes'
def __init__(self, data_dir, transforms):
self.data_dir = data_dir
img_folder_list = os.listdir(self.data_dir)
self.transforms = transforms
imgs_list = []
json_list = []
self.filter_count=0
self.filtered_label_list=[]
for img_path in img_folder_list:
#img_full_path = self.data_dir + img_path
img_full_path=os.path.join(self.data_dir,img_path)
json_file = os.path.join(img_full_path, 'annotations-of-my-images.json')
img_file = os.path.join(img_full_path, 'Image-Name.png')
json_list.append(json_file)
imgs_list.append(img_file)
self.imgs = imgs_list
self.annotations = json_list
total_count=0
for one_annotation in self.annotations:
filtered_obj_id=[]
with open(one_annotation) as f:
img_annotations = json.load(f)
parts_list = img_annotations['regions']
for part in parts_list:
current_obj_id = part['tags'][0] # bbox label
check_obj_id = general_classes[current_obj_id]
if(check_obj_id==0):
subclass_id=car_classes[current_obj_id]
filtered_obj_id.append(subclass_id)
total_count=total_count+1
if(len(filtered_obj_id)>0):
self.filter_count=self.filter_count+1
self.filtered_label_list.append(one_annotation)
print("The total number of the objects in all images: ",total_count)
# get one image and the bboxes,img_id, labels of parts, etc in the image as target.
def __getitem__(self, idx):
img_path = self.imgs[idx]
image_id = torch.tensor([idx])
with open(self.annotations[idx]) as f:
img_annotations = json.load(f)
parts_list = img_annotations['regions']
obj_ids = []
boxes = []
for part in parts_list:
obj_id = part['tags'][0]
check_obj_id = general_classes[obj_id]
if(check_obj_id==0):
obj_id=car_classes[obj_id]
obj_ids.append(obj_id)
#print("---------------------------------------------------")
if(len(obj_ids)>0):
img = Image.open(img_path).convert("RGB")
labels = torch.as_tensor(obj_ids, dtype = torch.int64)
target = {}
target['labels'] = labels
if self.transforms is not None:
img, target = self.transforms(img, target)
return img, target
else:
return None
def __len__(self):
return len(self.filtered_label_list)
train_data_path = "path-to-my-annotation"
# Generators
train_dataset = FilteredDataset(train_data_path,get_transform(train=True))
print("Total files in the train_dataset: ",len(train_dataset))
#print("The first instance in the train dataset : ",train_dataset[0])
#training_generator = torch.utils.data.DataLoader(train_dataset)
training_generator = torch.utils.data.DataLoader(train_dataset,collate_fn=my_collate)
print("\n\n Iterator in action! ")
print("---------------------------------------------------------")
count=0
for img,target in training_generator:
#print("The img name : ",img[0])
count=count+1
print("target name : ",target)
print("count : ",count)
print("**************************************************")
However, I get the following error,
Could anyone please suggest a way to skip the images that do not contain a particular categorical label?
I am trying to make an image mosaic generator using pyvips. So basically, given an image (called original in the following) create a new, bigger, image that resembles the original one except each pixel (or more realistically groups of pixels) are replaced by smaller distinct image tiles.
I was drawn to pyvips because it is said it can handle huge images and that it can process images without having to load them completely into memory.
However, I am having an issue creating a blank mosaic to then populate with tile images.
In the code below I try joining tiles together row by row to create a mosaic but this code unfortunately eats through my RAM and always segfaults.
import os
import pyvips
from os.path import join
from scipy.spatial import cKDTree
class Mosaic(object):
def __init__(self, dir_path, original_path, tree=None, averages=None):
self.dir_path = dir_path
self.original = original_path
self.tree = tree
if averages:
self.averages = averages
else:
self.averages = {}
def get_image(self, path):
return pyvips.Image.new_from_file(path, access="sequential")
def build_tree(self):
for root, dirs, files in os.walk(self.dir_path):
print('Loading images from', root, '...')
for file_name in files:
path = join(root, file_name)
try:
image = pyvips.Image.new_from_file(path)
self.averages[self.avg_rgb(image)] = path
except pyvips.error.Error:
print('File', path, 'not recognized as an image.')
self.tree = cKDTree(self.averages.keys())
print('Loaded', len(self.averages), 'images.')
def avg_rgb(self, image):
m = image.stats()
return tuple(m(4,i)[0] for i in range(1,4))
def get_tile_name(self, patch):
avg = self.avg_rgb(patch)
index = self.tree.query(avg)[1]
return self.averages[tuple(self.tree.data[index])]
def get_tile(self, x, y, step):
patch = self.get_image(self.original).crop(x, y, step, step)
patch_name = self.get_tile_name(patch)
return pyvips.Image.new_from_file(patch_name, access="sequential")
def make_mosaic(self, tile_num, tile_size, mosaic_path):
original = self.get_image(self.original)
mosaic = None
step = min(original.height, original.width) / tile_num
for y in range(0, original.height, step):
mosaic_row = None
print('Building row', y/step, '/', original.height/step)
for x in range(0, original.width, step):
tile = self.get_tile(x, y, step)
tile = tile.resize(float(tile_size) / float(min(tile.width, tile.height)))
tile = tile.crop(0, 0, tile_size, tile_size)
#mosaic.draw_image(tile, x, y)
mosaic_row = tile if not mosaic_row else mosaic_row.join(tile, "horizontal")
mosaic = mosaic_row if not mosaic else mosaic.join(mosaic_row, "vertical")
mosaic.write_to_file(mosaic_path)
I have also tried creating a mosaic by resizing the original image and then using draw_image like the following but this also crashes.
mosaic = self.get_image(self.original).resize(tile_size)
mosaic.draw_image(tile, x, y)
Finally, I have tried creating the mosaic from new_temp_file and I am having trouble writing to the temp image.
How can I make this mosaic program work?
libvips uses a recursive algorithm to work out which pixels to compute next, so for very long pipelines you can overflow the C stack and get a crash.
The simplest solution would be to use arrayjoin. This is a libvips operator which can join many images in a single call:
http://jcupitt.github.io/libvips/API/current/libvips-conversion.html#vips-arrayjoin
There's an example on the libvips github of using it to join 30,000 images at once:
https://github.com/jcupitt/libvips/issues/471
(though that's using the previous version of the libvips Python binding)
I adapted your program to use arrayjoin, and changed the way it loaded images. I noticed you were also reloading the original image for each output tile, so removing that gave a nice speedup.
#!/usr/bin/python2
from __future__ import print_function
import os
import sys
import pyvips
from os.path import join
from scipy.spatial import cKDTree
class Mosaic(object):
def __init__(self, dir_path, original_path, tile_size=128, tree=None, averages=None):
self.dir_path = dir_path
self.original_path = original_path
self.tile_size = tile_size
self.tree = tree
if averages:
self.averages = averages
else:
self.averages = {}
def avg_rgb(self, image):
m = image.stats()
return tuple(m(4,i)[0] for i in range(1,4))
def build_tree(self):
for root, dirs, files in os.walk(self.dir_path):
print('Loading images from', root, '...')
for file_name in files:
path = join(root, file_name)
try:
# load image as a square image of size tile_size X tile_size
tile = pyvips.Image.thumbnail(path, self.tile_size,
height=self.tile_size,
crop='centre')
# render into memory
tile = tile.copy_memory()
self.averages[self.avg_rgb(tile)] = tile
except pyvips.error.Error:
print('File', path, 'not recognized as an image.')
self.tree = cKDTree(self.averages.keys())
print('Loaded', len(self.averages), 'images.')
def fetch_tree(self, patch):
avg = self.avg_rgb(patch)
index = self.tree.query(avg)[1]
return self.averages[tuple(self.tree.data[index])]
def make_mosaic(self, tile_num, mosaic_path):
mosaic = None
original = pyvips.Image.new_from_file(self.original_path)
step = min(original.height, original.width) / tile_num
tiles_across = original.width / step
tiles_down = original.height / step
tiles = []
for y in range(0, tiles_down):
print('Building row', y, '/', tiles_down)
for x in range(0, tiles_across):
patch = original.crop(x * step, y * step,
min(step, original.width - x * step),
min(step, original.height - y * step))
tile = self.fetch_tree(patch)
tiles.append(tile)
mosaic = pyvips.Image.arrayjoin(tiles, across=tiles_across)
print('writing ', mosaic_path)
mosaic.write_to_file(mosaic_path)
mosaic = Mosaic(sys.argv[1], sys.argv[2])
mosaic.build_tree()
mosaic.make_mosaic(200, sys.argv[3])
I can run it like this:
$ time ./mosaic2.py samples/ k2.jpg x.png
Loading images from samples/ ...
Loaded 228 images.
Building row 0 / 292
...
Building row 291 / 292
writing x.png
real 7m19.333s
user 7m27.322s
sys 0m30.578s
making a 26496 x 37376 pixel image, in this case, and it runs in about 150mb of memory.