I am trying to make an image mosaic generator using pyvips. So basically, given an image (called original in the following) create a new, bigger, image that resembles the original one except each pixel (or more realistically groups of pixels) are replaced by smaller distinct image tiles.
I was drawn to pyvips because it is said it can handle huge images and that it can process images without having to load them completely into memory.
However, I am having an issue creating a blank mosaic to then populate with tile images.
In the code below I try joining tiles together row by row to create a mosaic but this code unfortunately eats through my RAM and always segfaults.
import os
import pyvips
from os.path import join
from scipy.spatial import cKDTree
class Mosaic(object):
def __init__(self, dir_path, original_path, tree=None, averages=None):
self.dir_path = dir_path
self.original = original_path
self.tree = tree
if averages:
self.averages = averages
else:
self.averages = {}
def get_image(self, path):
return pyvips.Image.new_from_file(path, access="sequential")
def build_tree(self):
for root, dirs, files in os.walk(self.dir_path):
print('Loading images from', root, '...')
for file_name in files:
path = join(root, file_name)
try:
image = pyvips.Image.new_from_file(path)
self.averages[self.avg_rgb(image)] = path
except pyvips.error.Error:
print('File', path, 'not recognized as an image.')
self.tree = cKDTree(self.averages.keys())
print('Loaded', len(self.averages), 'images.')
def avg_rgb(self, image):
m = image.stats()
return tuple(m(4,i)[0] for i in range(1,4))
def get_tile_name(self, patch):
avg = self.avg_rgb(patch)
index = self.tree.query(avg)[1]
return self.averages[tuple(self.tree.data[index])]
def get_tile(self, x, y, step):
patch = self.get_image(self.original).crop(x, y, step, step)
patch_name = self.get_tile_name(patch)
return pyvips.Image.new_from_file(patch_name, access="sequential")
def make_mosaic(self, tile_num, tile_size, mosaic_path):
original = self.get_image(self.original)
mosaic = None
step = min(original.height, original.width) / tile_num
for y in range(0, original.height, step):
mosaic_row = None
print('Building row', y/step, '/', original.height/step)
for x in range(0, original.width, step):
tile = self.get_tile(x, y, step)
tile = tile.resize(float(tile_size) / float(min(tile.width, tile.height)))
tile = tile.crop(0, 0, tile_size, tile_size)
#mosaic.draw_image(tile, x, y)
mosaic_row = tile if not mosaic_row else mosaic_row.join(tile, "horizontal")
mosaic = mosaic_row if not mosaic else mosaic.join(mosaic_row, "vertical")
mosaic.write_to_file(mosaic_path)
I have also tried creating a mosaic by resizing the original image and then using draw_image like the following but this also crashes.
mosaic = self.get_image(self.original).resize(tile_size)
mosaic.draw_image(tile, x, y)
Finally, I have tried creating the mosaic from new_temp_file and I am having trouble writing to the temp image.
How can I make this mosaic program work?
libvips uses a recursive algorithm to work out which pixels to compute next, so for very long pipelines you can overflow the C stack and get a crash.
The simplest solution would be to use arrayjoin. This is a libvips operator which can join many images in a single call:
http://jcupitt.github.io/libvips/API/current/libvips-conversion.html#vips-arrayjoin
There's an example on the libvips github of using it to join 30,000 images at once:
https://github.com/jcupitt/libvips/issues/471
(though that's using the previous version of the libvips Python binding)
I adapted your program to use arrayjoin, and changed the way it loaded images. I noticed you were also reloading the original image for each output tile, so removing that gave a nice speedup.
#!/usr/bin/python2
from __future__ import print_function
import os
import sys
import pyvips
from os.path import join
from scipy.spatial import cKDTree
class Mosaic(object):
def __init__(self, dir_path, original_path, tile_size=128, tree=None, averages=None):
self.dir_path = dir_path
self.original_path = original_path
self.tile_size = tile_size
self.tree = tree
if averages:
self.averages = averages
else:
self.averages = {}
def avg_rgb(self, image):
m = image.stats()
return tuple(m(4,i)[0] for i in range(1,4))
def build_tree(self):
for root, dirs, files in os.walk(self.dir_path):
print('Loading images from', root, '...')
for file_name in files:
path = join(root, file_name)
try:
# load image as a square image of size tile_size X tile_size
tile = pyvips.Image.thumbnail(path, self.tile_size,
height=self.tile_size,
crop='centre')
# render into memory
tile = tile.copy_memory()
self.averages[self.avg_rgb(tile)] = tile
except pyvips.error.Error:
print('File', path, 'not recognized as an image.')
self.tree = cKDTree(self.averages.keys())
print('Loaded', len(self.averages), 'images.')
def fetch_tree(self, patch):
avg = self.avg_rgb(patch)
index = self.tree.query(avg)[1]
return self.averages[tuple(self.tree.data[index])]
def make_mosaic(self, tile_num, mosaic_path):
mosaic = None
original = pyvips.Image.new_from_file(self.original_path)
step = min(original.height, original.width) / tile_num
tiles_across = original.width / step
tiles_down = original.height / step
tiles = []
for y in range(0, tiles_down):
print('Building row', y, '/', tiles_down)
for x in range(0, tiles_across):
patch = original.crop(x * step, y * step,
min(step, original.width - x * step),
min(step, original.height - y * step))
tile = self.fetch_tree(patch)
tiles.append(tile)
mosaic = pyvips.Image.arrayjoin(tiles, across=tiles_across)
print('writing ', mosaic_path)
mosaic.write_to_file(mosaic_path)
mosaic = Mosaic(sys.argv[1], sys.argv[2])
mosaic.build_tree()
mosaic.make_mosaic(200, sys.argv[3])
I can run it like this:
$ time ./mosaic2.py samples/ k2.jpg x.png
Loading images from samples/ ...
Loaded 228 images.
Building row 0 / 292
...
Building row 291 / 292
writing x.png
real 7m19.333s
user 7m27.322s
sys 0m30.578s
making a 26496 x 37376 pixel image, in this case, and it runs in about 150mb of memory.
Related
So i'm trying to launch a Python script (originally available from here : https://github.com/dvdtho/python-photo-mosaic). Full code at the bottom of this post.
This basically creates a mosaic (from a source image), with the final image (mosaic) is composed of several other images (tiles).
My question is how I am supposed to fill the variables (the ones at line 212) in order to run the script (through Eclispe in my case).
Should I put directly something like this? (in my case the folder Desktop/tiles contains all the jpg files) :
tile_paths = glob.glob("C:/Users/Sylvia/Desktop/tiles/*.jpg") # I've added this line myself
def create_mosaic(source_path="C:\\Users\\Sylvia\\Desktop\\source\\1.jpg", target="C:\\Users\\Sylvia\\Desktop\\source\\result.jpg", tile_ratio=1920/800, tile_width=75, enlargement=8, reuse=True, color_mode='RGB', tile_paths=None, shuffle_first=30):
Last time i tried i got this error :
def create_mosaic(source, target, tile_ratio=1920/800, tile_width=75,
enlargement=8, reuse=True, color_mode='RGB', tile_paths,
shuffle_first=30):
^ SyntaxError: non-default argument follows default argument
I'm very lost, hopefully someone can help me.
Here's the code :
import time
import itertools
import random
import sys
import numpy as np
from PIL import Image
from skimage import img_as_float
from skimage.measure import compare_mse
def shuffle_first_items(lst, i):
if not i:
return lst
first_few = lst[:i]
remaining = lst[i:]
random.shuffle(first_few)
return first_few + remaining
def bound(low, high, value):
return max(low, min(high, value))
class ProgressCounter:
def __init__(self, total):
self.total = total
self.counter = 0
def update(self):
self.counter += 1
sys.stdout.write("Progress: %s%% %s" % (100 * self.counter / self.total, "\r"))
sys.stdout.flush()
def img_mse(im1, im2):
"""Calculates the root mean square error (RSME) between two images"""
try:
return compare_mse(img_as_float(im1), img_as_float(im2))
except ValueError:
print(f'RMS issue, Img1: {im1.size[0]} {im1.size[1]}, Img2: {im2.size[0]} {im2.size[1]}')
raise KeyboardInterrupt
def resize_box_aspect_crop_to_extent(img, target_aspect, centerpoint=None):
width = img.size[0]
height = img.size[1]
if not centerpoint:
centerpoint = (int(width / 2), int(height / 2))
requested_target_x = centerpoint[0]
requested_target_y = centerpoint[1]
aspect = width / float(height)
if aspect > target_aspect:
# Then crop the left and right edges:
new_width = int(target_aspect * height)
new_width_half = int(new_width/2)
target_x = bound(new_width_half, width-new_width_half, requested_target_x)
left = target_x - new_width_half
right = target_x + new_width_half
resize = (left, 0, right, height)
else:
# ... crop the top and bottom:
new_height = int(width / target_aspect)
new_height_half = int(new_height/2)
target_y = bound(new_height_half, height-new_height_half, requested_target_y)
top = target_y - new_height_half
bottom = target_y + new_height_half
resize = (0, top, width, bottom)
return resize
def aspect_crop_to_extent(img, target_aspect, centerpoint=None):
'''
Crop an image to the desired perspective at the maximum size available.
Centerpoint can be provided to focus the crop to one side or another -
eg just cut the left side off if interested in the right side.
target_aspect = width / float(height)
centerpoint = (width, height)
'''
resize = resize_box_aspect_crop_to_extent(img, target_aspect, centerpoint)
return img.crop(resize)
class Config:
def __init__(self, tile_ratio=1920/800, tile_width=50, enlargement=8, color_mode='RGB'):
self.tile_ratio = tile_ratio # 2.4
self.tile_width = tile_width # height/width of mosaic tiles in pixels
self.enlargement = enlargement # mosaic image will be this many times wider and taller than original
self.color_mode = color_mode # mosaic image will be this many times wider and taller than original
#property
def tile_height(self):
return int(self.tile_width / self.tile_ratio)
#property
def tile_size(self):
return self.tile_width, self.tile_height # PIL expects (width, height)
class TileBox:
"""
Container to import, process, hold, and compare all of the tiles
we have to make the mosaic with.
"""
def __init__(self, tile_paths, config):
self.config = config
self.tiles = list()
self.prepare_tiles_from_paths(tile_paths)
def __process_tile(self, tile_path):
with Image.open(tile_path) as i:
img = i.copy()
img = aspect_crop_to_extent(img, self.config.tile_ratio)
large_tile_img = img.resize(self.config.tile_size, Image.ANTIALIAS).convert(self.config.color_mode)
self.tiles.append(large_tile_img)
return True
def prepare_tiles_from_paths(self, tile_paths):
print('Reading tiles from provided list...')
progress = ProgressCounter(len(tile_paths))
for tile_path in tile_paths:
progress.update()
self.__process_tile(tile_path)
print('Processed tiles.')
return True
def best_tile_block_match(self, tile_block_original):
match_results = [img_mse(t, tile_block_original) for t in self.tiles]
best_fit_tile_index = np.argmin(match_results)
return best_fit_tile_index
def best_tile_from_block(self, tile_block_original, reuse=False):
if not self.tiles:
print('Ran out of images.')
raise KeyboardInterrupt
#start_time = time.time()
i = self.best_tile_block_match(tile_block_original)
#print("BLOCK MATCH took --- %s seconds ---" % (time.time() - start_time))
match = self.tiles[i].copy()
if not reuse:
del self.tiles[i]
return match
class SourceImage:
"""Processing original image - scaling and cropping as needed."""
def __init__(self, image_path, config):
print('Processing main image...')
self.image_path = image_path
self.config = config
with Image.open(self.image_path) as i:
img = i.copy()
w = img.size[0] * self.config.enlargement
h = img.size[1] * self.config.enlargement
large_img = img.resize((w, h), Image.ANTIALIAS)
w_diff = (w % self.config.tile_width)/2
h_diff = (h % self.config.tile_height)/2
# if necesary, crop the image slightly so we use a
# whole number of tiles horizontally and vertically
if w_diff or h_diff:
large_img = large_img.crop((w_diff, h_diff, w - w_diff, h - h_diff))
self.image = large_img.convert(self.config.color_mode)
print('Main image processed.')
class MosaicImage:
"""Holder for the mosaic"""
def __init__(self, original_img, target, config):
self.config = config
self.target = target
# Lets just start with original image, scaled up, instead of a blank one
self.image = original_img
# self.image = Image.new(original_img.mode, original_img.size)
self.x_tile_count = int(original_img.size[0] / self.config.tile_width)
self.y_tile_count = int(original_img.size[1] / self.config.tile_height)
self.total_tiles = self.x_tile_count * self.y_tile_count
print(f'Mosaic will be {self.x_tile_count:,} tiles wide and {self.y_tile_count:,} tiles high ({self.total_tiles:,} total).')
def add_tile(self, tile, coords):
"""Adds the provided image onto the mosiac at the provided coords."""
try:
self.image.paste(tile, coords)
except TypeError as e:
print('Maybe the tiles are not the right size. ' + str(e))
def save(self):
self.image.save(self.target)
def coords_from_middle(x_count, y_count, y_bias=1, shuffle_first=0, ):
'''
Lets start in the middle where we have more images.
And we dont get "lines" where the same-best images
get used at the start.
y_bias - if we are using non-square coords, we can
influence the order to be closer to the real middle.
If width is 2x height, y_bias should be 2.
shuffle_first - We can suffle the first X coords
so that we dont use all the same-best images
in the same spot - in the middle
from movies.mosaic_mem import coords_from_middle
x = 10
y = 10
coords_from_middle(x, y, y_bias=2, shuffle_first=0)
'''
x_mid = int(x_count/2)
y_mid = int(y_count/2)
coords = list(itertools.product(range(x_count), range(y_count)))
coords.sort(key=lambda c: abs(c[0]-x_mid)*y_bias + abs(c[1]-y_mid))
coords = shuffle_first_items(coords, shuffle_first)
return coords
def create_mosaic(source_path, target, tile_ratio=1920/800, tile_width=75, enlargement=8, reuse=True, color_mode='RGB', tile_paths=None, shuffle_first=30):
"""Forms an mosiac from an original image using the best
tiles provided. This reads, processes, and keeps in memory
a copy of the source image, and all the tiles while processing.
Arguments:
source_path -- filepath to the source image for the mosiac
target -- filepath to save the mosiac
tile_ratio -- height/width of mosaic tiles in pixels
tile_width -- width of mosaic tiles in pixels
enlargement -- mosaic image will be this many times wider and taller than the original
reuse -- Should we reuse tiles in the mosaic, or just use each tile once?
color_mode -- L for greyscale or RGB for color
tile_paths -- List of filepaths to your tiles
shuffle_first -- Mosiac will be filled out starting in the center for best effect. Also,
we will shuffle the order of assessment so that all of our best images aren't
necessarily in one spot.
"""
config = Config(
tile_ratio = tile_ratio, # height/width of mosaic tiles in pixels
tile_width = tile_width, # height/width of mosaic tiles in pixels
enlargement = enlargement, # the mosaic image will be this many times wider and taller than the original
color_mode = color_mode, # L for greyscale or RGB for color
)
# Pull in and Process Original Image
print('Setting Up Target image')
source_image = SourceImage(source_path, config)
# Setup Mosaic
mosaic = MosaicImage(source_image.image, target, config)
# Assest Tiles, and save if needed, returns directories where the small and large pictures are stored
print('Assessing Tiles')
tile_box = TileBox(tile_paths, config)
try:
progress = ProgressCounter(mosaic.total_tiles)
for x, y in coords_from_middle(mosaic.x_tile_count, mosaic.y_tile_count, y_bias=config.tile_ratio, shuffle_first=shuffle_first):
progress.update()
# Make a box for this sector
box_crop = (x * config.tile_width, y * config.tile_height, (x + 1) * config.tile_width, (y + 1) * config.tile_height)
# Get Original Image Data for this Sector
comparison_block = source_image.image.crop(box_crop)
# Get Best Image name that matches the Orig Sector image
tile_match = tile_box.best_tile_from_block(comparison_block, reuse=reuse)
# Add Best Match to Mosaic
mosaic.add_tile(tile_match, box_crop)
# Saving Every Sector
mosaic.save()
except KeyboardInterrupt:
print('\nStopping, saving partial image...')
finally:
mosaic.save()
It's ok, this is the new file i have to create in order for it to work :
create_mosaic(
subject="/path/to/source/image",
target="/path/to/output/image",
tile_paths=["/path/to/tile_1" , ... "/path/to/tile_n"],
tile_ratio=1920/800, # Crop tiles to be height/width ratio
tile_width=300, # Tile will be scaled
enlargement=20, # Mosiac will be this times larger than original
reuse=False, # Should tiles be used multiple times?
color_mode='L', # RGB (color) L (greyscale)
)
Problem resovled.
I am working with the pykitti package for visualizing the KITTI data set in python. It's having trouble locating the calibration files needed. It looks in a specified directory for these calibration files. I specified the directory in which it should look for these files. However, it still gives me an error:
FileNotFoundError: [Errno 2] No such file or directory: '/home/spb5151/Downloads/KITTI_Data/2011_09_26/calib_imu_to_velo.txt'
It says that it's getting stuck on the open(filepath, 'r') line in my code.However, I have confirmed that this file is located in this directory. I'm using Pycharm as my IDE on linux. I'm new to python and linux so is there anything as far as syntax that I might be missing?
import sys
sys.path.insert(0, '/home/spb5151/Documents/pykitti-master')
import pykitti
basedir = '/home/spb5151/Downloads/KITTI_Data'
date = '2011_09_26'
drive = '0019'
# The 'frames' argument is optional - default: None, which loads the whole dataset.
# Calibration and timestamp data are read automatically.
# Other sensor data (cameras, IMU, Velodyne) are available via properties
# that create generators when accessed.
data = pykitti.raw(basedir, date, drive, frames=range(0, 50, 5))
# dataset.calib: Calibration data are accessible as a named tuple
# dataset.timestamps: Timestamps are parsed into a list of datetime objects
# dataset.oxts: Returns a generator that loads OXTS packets as named tuples
# dataset.camN: Returns a generator that loads individual images from camera N
# dataset.gray: Returns a generator that loads monochrome stereo pairs (cam0, cam1)
# dataset.rgb: Returns a generator that loads RGB stereo pairs (cam2, cam3)
# dataset.velo: Returns a generator that loads velodyne scans as [x,y,z,reflectance]
point_velo = np.array([0,0,0,1])
point_cam0 = data.calib.T_cam0_velo.dot(point_velo)
point_imu = np.array([0,0,0,1])
point_w = [o.T_w_imu.dot(point_imu) for o in data.oxts]
for cam0_image in data.cam0:
pass
rgb_iterator = data.rgb # Assign the generator so it doesn't
cam2_image, cam3_image = next(rgb_iterator)
And here is the raw.py file which is included in the pykitti package
"""Provides 'raw', which loads and parses raw KITTI data."""
import datetime as dt
import glob
import os
from collections import namedtuple
import numpy as np
import pykitti.utils as utils
__author__ = "Lee Clement"
__email__ = "lee.clement#robotics.utias.utoronto.ca"
class raw:
"""Load and parse raw data into a usable format."""
def __init__(self, base_path, date, drive, **kwargs):
"""Set the path and pre-load calibration data and timestamps."""
self.drive = date + '_drive_' + drive + '_sync'
self.calib_path = os.path.join(base_path, date)
self.data_path = os.path.join(base_path, date, self.drive)
self.frames = kwargs.get('frames', None)
# Setting imformat='cv2' will convert the images to uint8 and BGR for
# easy use with OpenCV.
self.imformat = kwargs.get('imformat', None)
# Pre-load data that isn't returned as a generator
self._load_calib()
self._load_timestamps()
def __len__(self):
"""Return the number of frames loaded."""
return len(self.timestamps)
#property
def oxts(self):
"""Generator to read OXTS data from file."""
# Find all the data files
oxts_path = os.path.join(self.data_path, 'oxts', 'data', '*.txt')
oxts_files = sorted(glob.glob(oxts_path))
# Subselect the chosen range of frames, if any
if self.frames is not None:
oxts_files = [oxts_files[i] for i in self.frames]
# Return a generator yielding OXTS packets and poses
return utils.get_oxts_packets_and_poses(oxts_files)
#property
def cam0(self):
"""Generator to read image files for cam0 (monochrome left)."""
impath = os.path.join(self.data_path, 'image_00', 'data', '*.png')
imfiles = sorted(glob.glob(impath))
# Subselect the chosen range of frames, if any
if self.frames is not None:
imfiles = [imfiles[i] for i in self.frames]
# Return a generator yielding the images
return utils.get_images(imfiles, self.imformat)
#property
def cam1(self):
"""Generator to read image files for cam1 (monochrome right)."""
impath = os.path.join(self.data_path, 'image_01', 'data', '*.png')
imfiles = sorted(glob.glob(impath))
# Subselect the chosen range of frames, if any
if self.frames is not None:
imfiles = [imfiles[i] for i in self.frames]
# Return a generator yielding the images
return utils.get_images(imfiles, self.imformat)
#property
def cam2(self):
"""Generator to read image files for cam2 (RGB left)."""
impath = os.path.join(self.data_path, 'image_02', 'data', '*.png')
imfiles = sorted(glob.glob(impath))
# Subselect the chosen range of frames, if any
if self.frames is not None:
imfiles = [imfiles[i] for i in self.frames]
# Return a generator yielding the images
return utils.get_images(imfiles, self.imformat)
#property
def cam3(self):
"""Generator to read image files for cam0 (RGB right)."""
impath = os.path.join(self.data_path, 'image_03', 'data', '*.png')
imfiles = sorted(glob.glob(impath))
# Subselect the chosen range of frames, if any
if self.frames is not None:
imfiles = [imfiles[i] for i in self.frames]
# Return a generator yielding the images
return utils.get_images(imfiles, self.imformat)
#property
def gray(self):
"""Generator to read monochrome stereo pairs from file.
"""
return zip(self.cam0, self.cam1)
#property
def rgb(self):
"""Generator to read RGB stereo pairs from file.
"""
return zip(self.cam2, self.cam3)
#property
def velo(self):
"""Generator to read velodyne [x,y,z,reflectance] scan data from binary files."""
# Find all the Velodyne files
velo_path = os.path.join(
self.data_path, 'velodyne_points', 'data', '*.bin')
velo_files = sorted(glob.glob(velo_path))
# Subselect the chosen range of frames, if any
if self.frames is not None:
velo_files = [velo_files[i] for i in self.frames]
# Return a generator yielding Velodyne scans.
# Each scan is a Nx4 array of [x,y,z,reflectance]
return utils.get_velo_scans(velo_files)
def _load_calib_rigid(self, filename):
"""Read a rigid transform calibration file as a numpy.array."""
filepath = os.path.join(self.calib_path, filename)
data = utils.read_calib_file(filepath)
return utils.transform_from_rot_trans(data['R'], data['T'])
def _load_calib_cam_to_cam(self, velo_to_cam_file, cam_to_cam_file):
# We'll return the camera calibration as a dictionary
data = {}
# Load the rigid transformation from velodyne coordinates
# to unrectified cam0 coordinates
T_cam0unrect_velo = self._load_calib_rigid(velo_to_cam_file)
# Load and parse the cam-to-cam calibration data
cam_to_cam_filepath = os.path.join(self.calib_path, cam_to_cam_file)
filedata = utils.read_calib_file(cam_to_cam_filepath)
# Create 3x4 projection matrices
P_rect_00 = np.reshape(filedata['P_rect_00'], (3, 4))
P_rect_10 = np.reshape(filedata['P_rect_01'], (3, 4))
P_rect_20 = np.reshape(filedata['P_rect_02'], (3, 4))
P_rect_30 = np.reshape(filedata['P_rect_03'], (3, 4))
data['P_rect_00'] = P_rect_00
data['P_rect_10'] = P_rect_10
data['P_rect_20'] = P_rect_20
data['P_rect_30'] = P_rect_30
# Create 4x4 matrices from the rectifying rotation matrices
R_rect_00 = np.eye(4)
R_rect_00[0:3, 0:3] = np.reshape(filedata['R_rect_00'], (3, 3))
R_rect_10 = np.eye(4)
R_rect_10[0:3, 0:3] = np.reshape(filedata['R_rect_01'], (3, 3))
R_rect_20 = np.eye(4)
R_rect_20[0:3, 0:3] = np.reshape(filedata['R_rect_02'], (3, 3))
R_rect_30 = np.eye(4)
R_rect_30[0:3, 0:3] = np.reshape(filedata['R_rect_03'], (3, 3))
data['R_rect_00'] = R_rect_00
data['R_rect_10'] = R_rect_10
data['R_rect_20'] = R_rect_20
data['R_rect_30'] = R_rect_30
# Compute the rectified extrinsics from cam0 to camN
T0 = np.eye(4)
T0[0, 3] = P_rect_00[0, 3] / P_rect_00[0, 0]
T1 = np.eye(4)
T1[0, 3] = P_rect_10[0, 3] / P_rect_10[0, 0]
T2 = np.eye(4)
T2[0, 3] = P_rect_20[0, 3] / P_rect_20[0, 0]
T3 = np.eye(4)
T3[0, 3] = P_rect_30[0, 3] / P_rect_30[0, 0]
# Compute the velodyne to rectified camera coordinate transforms
data['T_cam0_velo'] = T0.dot(R_rect_00.dot(T_cam0unrect_velo))
data['T_cam1_velo'] = T1.dot(R_rect_00.dot(T_cam0unrect_velo))
data['T_cam2_velo'] = T2.dot(R_rect_00.dot(T_cam0unrect_velo))
data['T_cam3_velo'] = T3.dot(R_rect_00.dot(T_cam0unrect_velo))
# Compute the camera intrinsics
data['K_cam0'] = P_rect_00[0:3, 0:3]
data['K_cam1'] = P_rect_10[0:3, 0:3]
data['K_cam2'] = P_rect_20[0:3, 0:3]
data['K_cam3'] = P_rect_30[0:3, 0:3]
# Compute the stereo baselines in meters by projecting the origin of
# each camera frame into the velodyne frame and computing the distances
# between them
p_cam = np.array([0, 0, 0, 1])
p_velo0 = np.linalg.inv(data['T_cam0_velo']).dot(p_cam)
p_velo1 = np.linalg.inv(data['T_cam1_velo']).dot(p_cam)
p_velo2 = np.linalg.inv(data['T_cam2_velo']).dot(p_cam)
p_velo3 = np.linalg.inv(data['T_cam3_velo']).dot(p_cam)
data['b_gray'] = np.linalg.norm(p_velo1 - p_velo0) # gray baseline
data['b_rgb'] = np.linalg.norm(p_velo3 - p_velo2) # rgb baseline
return data
def _load_calib(self):
"""Load and compute intrinsic and extrinsic calibration parameters."""
# We'll build the calibration parameters as a dictionary, then
# convert it to a namedtuple to prevent it from being modified later
data = {}
# Load the rigid transformation from velodyne to IMU
data['T_velo_imu'] = self._load_calib_rigid('calib_imu_to_velo.txt')
# Load the camera intrinsics and extrinsics
data.update(self._load_calib_cam_to_cam(
'calib_velo_to_cam.txt', 'calib_cam_to_cam.txt'))
# Pre-compute the IMU to rectified camera coordinate transforms
data['T_cam0_imu'] = data['T_cam0_velo'].dot(data['T_velo_imu'])
data['T_cam1_imu'] = data['T_cam1_velo'].dot(data['T_velo_imu'])
data['T_cam2_imu'] = data['T_cam2_velo'].dot(data['T_velo_imu'])
data['T_cam3_imu'] = data['T_cam3_velo'].dot(data['T_velo_imu'])
self.calib = namedtuple('CalibData', data.keys())(*data.values())
def _load_timestamps(self):
"""Load timestamps from file."""
timestamp_file = os.path.join(
self.data_path, 'oxts', 'timestamps.txt')
# Read and parse the timestamps
self.timestamps = []
with open(timestamp_file, 'r') as f:
for line in f.readlines():
# NB: datetime only supports microseconds, but KITTI timestamps
# give nanoseconds, so need to truncate last 4 characters to
# get rid of \n (counts as 1) and extra 3 digits
t = dt.datetime.strptime(line[:-4], '%Y-%m-%d %H:%M:%S.%f')
self.timestamps.append(t)
# Subselect the chosen range of frames, if any
if self.frames is not None:
self.timestamps = [self.timestamps[i] for i in self.frames]
Aha, for some reason they seemed to have hardcoded this part so that it doesn't consider your data path. Here's the culprit:
data['T_velo_imu'] = self._load_calib_rigid('calib_imu_to_velo.txt')
Search for this line in your raw.py file and replace the file with data_path + file to make sure it goes to the right path.
I am creating a program (to test a theory), and to get the data I need, I need a program to run as fast as possible.
Here's the problem - I have made it as fast as I could manage and it is still to slow. It is using a very small amount of my computer's RAM and CPU capacity. I am running the program with PyCharm 2017 Community Edition.
The code is below; How would I further optimize or change this to make it run faster?
Main:
from functions import *
from graphics import *
import time
Alpha = True
x = timestamp()
while Alpha:
master = GraphWin(title="Image", width=512, height=512)
build_image(master)
getter(master, x)
x = timestamp()
time.sleep(3)
master.close()
Module "Functions":
from graphics import *
import random
from PIL import ImageGrab
def build_image(window):
for i in range(513):
for j in range(513):
fig = Rectangle(Point(j, i), Point(j + 1, i + 1))
color = random.randrange(256)
fig.setFill(color_rgb(color, color, color))
fig.setOutline(color_rgb(color, color, color))
fig.draw(window)
def getter(widget, counter):
x = widget.winfo_rootx()+widget.winfo_x()
y = widget.winfo_rooty()+widget.winfo_y()
x1 = x+widget.winfo_width()
y1 = y+widget.winfo_height()
ImageGrab.grab().crop((x, y, x1, y1)).save("{}.png".format(str(counter)))
def timestamp():
timelist = time.gmtime()
filename = ("Image" + "_" + str(timelist[0]) + "_" + str(timelist[1]) + "_" + str(timelist[2]) + "_" +
str(timelist[3]) + "_" + str(timelist[4]) + "_" + str(timelist[5]) + "_UTC")
return filename
Note: Module "Graphics" is a module that allows for easy manipulation of Tkinter.
Your slowness is probably from treating the pixels as rectangles in your window.
If all you want to do is generate random images, you can skip the window part. I found this code laying about, after not too much ducking:
from PIL import Image
import random
def drawImage():
testImage = Image.new("RGB", (600,600), (255,255,255))
pixel = testImage.load()
for x in range(600):
for y in range(600):
red = random.randrange(0,255)
blue = random.randrange(0,255)
green = random.randrange(0,255)
pixel[x,y]=(red,blue,green)
return testImage
def main():
finalImage = drawImage()
finalImage.save("finalImage.jpg")
Use a profiler to see where your program is fast/slow. Here is a profile wrapper you can use on your functions to see what is taking too long in your program.
def line_profiler(view=None, extra_view=None):
import line_profiler
def wrapper(view):
def wrapped(*args, **kwargs):
prof = line_profiler.LineProfiler()
prof.add_function(view)
if extra_view:
[prof.add_function(v) for v in extra_view]
with prof:
resp = view(*args, **kwargs)
prof.print_stats()
return resp
return wrapped
if view:
return wrapper(view)
return wrapper
Now how to use it
#line_profiler
def simple():
print("Hello")
print("World")
Now when you run your function, you will get a printout of how long everything takes.
You might need to do pip install line_profiler
this may be a bit faster if you use numpy. loops inside loops will kill your speed.
from PIL import Image
import numpy as np
def drawImage():
return Image.fromarray(np.random.randint(255, size=(600, 600, 3)).astype(np.uint8))
Since you do a lot of independent tasks, you could benefit from parallelism. Something like:
from concurrent.futures import ThreadPoolExecutor
def build_image(window, start, end, step):
for i in range(start, end, step):
for j in range(end):
fig = Rectangle(Point(j, i), Point(j + 1, i + 1))
color = random.randrange(256)
fig.setFill(color_rgb(color, color, color))
fig.setOutline(color_rgb(color, color, color))
fig.draw(window)
max_workers = 8
with ThreadPoolExecutor(max_workers=max_workers) as executor:
for id in range(max_workers):
executor.submit(build_image, window, id, 513, max_workers)
I use Tesseract and python to read digits (from a energy meter).
Everything works well except for the number "1".
Tesseract can not read the "1" Digit.
This is the picture I send to tesseract :
And tesseract reads "0000027 ".
How can I tell Tesseract that the vertical rod is a "1" ?
This is my tesseract initialisation :
import tesseract
TESSERACT_LIBRARY_PATH = "C:\\Program Files (x86)\\Tesseract-OCR"
LANGUAGE = "eng"
CHARACTERS = "0123456789"
FALSE = "0"
TRUE = "1"
def init_ocr():
"""
.. py:function:: init_ocr()
Utilize the Tesseract-OCR library to create an tesseract_ocr that
predicts the numbers to be read off of the meter.
:return: tesseract_ocr Tesseracts OCR API.
:rtype: Class
"""
# Initialize the tesseract_ocr with the english language package.
tesseract_ocr = tesseract.TessBaseAPI()
tesseract_ocr.Init(TESSERACT_LIBRARY_PATH, LANGUAGE,
tesseract.OEM_DEFAULT)
# Limit the characters being seached for to numerics.
tesseract_ocr.SetVariable("tessedit_char_whitelist", CHARACTERS)
# Set the tesseract_ocr to predict for only one character.
tesseract_ocr.SetPageSegMode(tesseract.PSM_AUTO)
# Tesseract's Directed Acyclic Graph.
# Not necessary for number recognition.
tesseract_ocr.SetVariable("load_system_dawg", FALSE)
tesseract_ocr.SetVariable("load_freq_dawg", FALSE)
tesseract_ocr.SetVariable("load_number_dawg", TRUE)
tesseract_ocr.SetVariable("classify_enable_learning", FALSE)
tesseract_ocr.SetVariable("classify_enable_adaptive_matcher", FALSE)
return tesseract_ocr
Slightly irrelevant answer, though may serve your original goal.
I had similar problem with tesseract and I had very strict performance requirements as well. I found this simple solution on SO and crafted simple recogniser with OpenCV.
It boils down to finding bounding rectangles (from edges) on the very clear image that you have and then trying to match found objects versus templates. I believe the solution in your case will be both simple and precise though will require slightly more code than you have now.
I will follow this question, since it will be nice to have working solution with tesseract.
I have a limited time, but it seems to be a working solution:
import os
import cv2
import numpy
KNN_SQUARE_SIDE = 50 # Square 50 x 50 px.
def resize(cv_image, factor):
new_size = tuple(map(lambda x: x * factor, cv_image.shape[::-1]))
return cv2.resize(cv_image, new_size)
def crop(cv_image, box):
x0, y0, x1, y1 = box
return cv_image[y0:y1, x0:x1]
def draw_box(cv_image, box):
x0, y0, x1, y1 = box
cv2.rectangle(cv_image, (x0, y0), (x1, y1), (0, 0, 255), 2)
def draw_boxes_and_show(cv_image, boxes, title='N'):
temp_image = cv2.cvtColor(cv_image, cv2.COLOR_GRAY2RGB)
for box in boxes:
draw_box(temp_image, box)
cv2.imshow(title, temp_image)
cv2.waitKey(0)
class BaseKnnMatcher(object):
distance_threshold = 0
def __init__(self, source_dir):
self.model, self.label_map = self.get_model_and_label_map(source_dir)
#staticmethod
def get_model_and_label_map(source_dir):
responses = []
label_map = []
samples = numpy.empty((0, KNN_SQUARE_SIDE * KNN_SQUARE_SIDE), numpy.float32)
for label_idx, filename in enumerate(os.listdir(source_dir)):
label = filename[:filename.index('.png')]
label_map.append(label)
responses.append(label_idx)
image = cv2.imread(os.path.join(source_dir, filename), 0)
suit_image_standard_size = cv2.resize(image, (KNN_SQUARE_SIDE, KNN_SQUARE_SIDE))
sample = suit_image_standard_size.reshape((1, KNN_SQUARE_SIDE * KNN_SQUARE_SIDE))
samples = numpy.append(samples, sample, 0)
responses = numpy.array(responses, numpy.float32)
responses = responses.reshape((responses.size, 1))
model = cv2.KNearest()
model.train(samples, responses)
return model, label_map
def predict(self, image):
image_standard_size = cv2.resize(image, (KNN_SQUARE_SIDE, KNN_SQUARE_SIDE))
image_standard_size = numpy.float32(image_standard_size.reshape((1, KNN_SQUARE_SIDE * KNN_SQUARE_SIDE)))
closest_class, results, neigh_resp, distance = self.model.find_nearest(image_standard_size, k=1)
if distance[0][0] > self.distance_threshold:
return None
return self.label_map[int(closest_class)]
class DigitKnnMatcher(BaseKnnMatcher):
distance_threshold = 10 ** 10
class MeterValueReader(object):
def __init__(self):
self.digit_knn_matcher = DigitKnnMatcher(source_dir='templates')
#classmethod
def get_symbol_boxes(cls, cv_image):
ret, thresh = cv2.threshold(cv_image.copy(), 150, 255, cv2.THRESH_BINARY)
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
symbol_boxes = []
for contour in contours:
x, y, width, height = cv2.boundingRect(contour)
# You can test here for box size, though not required in your example:
# if cls.is_size_of_digit(width, height):
# symbol_boxes.append((x, y, x+width, y+height))
symbol_boxes.append((x, y, x+width, y+height))
return symbol_boxes
def get_value(self, meter_cv2_image):
symbol_boxes = self.get_symbol_boxes(meter_cv2_image)
symbol_boxes.sort() # x is first in tuple
symbols = []
for box in symbol_boxes:
symbol = self.digit_knn_matcher.predict(crop(meter_cv2_image, box))
symbols.append(symbol)
return symbols
if __name__ == '__main__':
# If you want to see how boxes detection works, uncomment these:
# img_bw = cv2.imread(os.path.join('original.png'), 0)
# boxes = MeterValueReader.get_symbol_boxes(img_bw)
# draw_boxes_and_show(img_bw, boxes)
# Uncomment to generate templates from image
# import random
# TEMPLATE_DIR = 'templates'
# img_bw = cv2.imread(os.path.join('original.png'), 0)
# boxes = MeterValueReader.get_symbol_boxes(img_bw)
# for box in boxes:
# # You need to label templates manually after extraction
# cv2.imwrite(os.path.join(TEMPLATE_DIR, '%s.png' % random.randint(0, 1000)), crop(img_bw, box))
img_bw = cv2.imread(os.path.join('original.png'), 0)
vr = MeterValueReader()
print vr.get_value(img_bw)
I'm working on a custom tiled map loader. Seems to work fine, I don't get any errors, but the screen only shows up 1 tile of each type.
this is the file structure:
/main.py
/other/render2.py
/other/render.py
here's the render2.py file:
import pyglet, json
from pyglet.window import key
from pyglet.gl import *
from ConfigParser import SafeConfigParser
from cocos.layer import *
from cocos.batch import *
from cocos.sprite import Sprite
class renderer( Layer ):
#init function
def __init__(self):
super( renderer, self ).__init__()
#call function, returns the map as a list of sprites, and coordinates
def __call__(self, mapname):
#runs the map file parser
parser = SafeConfigParser()
#reads the map file
try:
world = parser.read('maps/'+mapname+'.txt')
print world
except IOError:
return
#These variables the config from the map file
tileSize = int(parser.get('config', 'tilesize'))
layers = int(parser.get('config', 'layers'))
mapList = []
#the super mega advanced operation to render the mapList
for i in range(0,layers):
layer = json.loads(parser.get('layer'+str(i), 'map'))
tileType = parser.get('layer'+str(i), 'tiletype')
nTiles = int(parser.get('layer'+str(i), 'tiles'))
tileSet = []
#this over here loads all 16 tiles of one type into tileSet
for n in range(0, nTiles):
tileSet.append(Sprite("image/tiles/"+tileType+"/"+str(n)+".png", scale = 1, anchor = (0,0)))
for x in range(0, len(layer)):
for y in range(0, len(layer[x])):
X = (x*tileSize)
Y = (y*tileSize)
total = [tileSet[layer[x][y]], i, X, Y]
print layer[x][y], tileSet[layer[x][y]]
mapList.append(total)
return mapList
This is an example of what this returns :
[<cocos.sprite.Sprite object at 0x060910B0>, 0, 0,0 ]
[<cocos.sprite.Sprite object at 0x060910B0> , 0, 64,64 ]
It returns a huge list with a lot of sublists like these in it.
when I call it from the main.py file, it only draws the last tile of each kind.
here's the main.py file:
import sys, os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
import pyglet
import threading,time
from pyglet import clock
from pyglet.gl import *
from cocos.director import *
from cocos.menu import *
from cocos.scene import *
from cocos.layer import *
from cocos.actions import *
from cocos.batch import *
from cocos.sprite import Sprite
from other.render2 import renderer
import random; rr = random.randrange
class Background(ScrollableLayer):
def __init__(self):
super(Background, self).__init__()
world = renderer()
bg = world('sampleidea')
batch = BatchNode()
for i in range(0, len(bg)):
l= bg[i][1]
x= bg[i][2]
y= bg[i][3]
spr = bg[i][0]
spr.position =(x,y)
batch.add(spr, z = l)
self.add(batch)
class Menu(Layer):
def __init__(self):
super(Menu, self).__init__()
title = Sprite('image/title.png' )
title.position = (400,520)
self.add( title )
def start():
director.set_depth_test()
background = Background()
menu = Menu()
scene = Scene(background, menu)
return scene
def init():
director.init( do_not_scale=True, resizable=True, width=1280, height=720)
def run(scene):
director.run( scene )
if __name__ == "__main__":
init()
s = start()
run(s)
What am I doing wrong? I have an older render.py, which does work, but I remade it since it loaded each sprite file for each tile. That took way to long to load on big maps.
This is the old render.py I've been using before.
It's quite different since it used different map files too.
import pyglet, json
from pyglet.window import key
from pyglet.gl import *
from ConfigParser import SafeConfigParser
from cocos.layer import *
from cocos.batch import *
from cocos.sprite import Sprite
class renderer( Layer ):
def __init__(self):
super( renderer, self ).__init__()
def __call__(self, mapname):
parser = SafeConfigParser()
try:
world = parser.read('maps/'+mapname+'.txt')
print world
except IOError:
print("No world file!")
return
tilesize = json.loads(parser.get('data', 'tilesize'))
world = json.loads(parser.get('data', 'world'))
maplist = []
for l in range(len(world)):
for x in range(len(world[l])):
for y in range(len(world[l][x])):
if world[l][x][y] != None:
foldername = str(world[l][x][y][0])
imagename = str(world[l][x][y][1])
spr = Sprite("image/tiles/"+foldername+"/"+imagename+".png", scale = 1, anchor = (0,0))
X = (x*tilesize)
Y = (y*tilesize)
total = [spr, l, X, Y]
maplist.append(total)
return maplist
Is it possible to make the new "render" to work?
The problem is that my new optimized "renderer" creates a bunch of
cocos.sprite.Sprite objects, instead of just loading Image files as i thought it would. The code in my question only repositioned the same sprite object over and over again this way. To solve this, the way to do it is by opening the image with pyglet.image.load(), and creating sprite objects with that.
example:
f = pyglet.image.load('sprite.png')
batch = CocosNode()
batch.position = 50, 100
add(batch)
for i in range(0, 200):
test = Sprite(f)
test.position = i*10,i*10
batch.add( test )