I voxelize the STL file with the program below and save it as a vtk file.
The code itself is fine, as the program below works fine.
However, I want to convert it to numpy.array format and save it in npz format as well. Using vtk_to_numpy doesn't work, what should I do?
The ultimate goal is to convert it to numpy.array format and use it for 3d deeplearning.
e.g.
Taking 333 voxel data as an example,
I want to generate np.array with shape (3,3,3) like below
[
[[0,0,0],
[0,1,0],
[0,0,0]]
[[1,1,1],
[1,1,1],
[0,0,0]]
[[1,1,1],
[0,0,0],
[0,0,0]]
]
import vtk
import time
import numpy as np
import vtk.util.numpy_support as vnp
######## data ########
filename_in = "<your STL>.stl"
filename_out = "out.vtk"
mesh_size = 100
tol = 1e-7
cubicORrect = "rect"
##################################
start = time.time()
reader = vtk.vtkSTLReader()
reader.SetFileName(filename_in)
reader.Update()
closed_poly = reader.GetOutput()
# x_min:0 x_max:1, y_min:2,y_max:3,z_min:4,z_max:5
bounds = closed_poly.GetBounds()
max_size = max([bounds[1] - bounds[0], bounds[3] -
bounds[2], bounds[5] - bounds[4]])
cell_dims = [mesh_size, mesh_size, mesh_size] # x, y, z
if cubicORrect == "cubic":
mesh_pitch = [max_size/cell_dims[0],
max_size/cell_dims[1],
max_size/cell_dims[2]]
else:
mesh_pitch = [(bounds[1] - bounds[0])/cell_dims[0],
(bounds[3] - bounds[2])/cell_dims[1],
(bounds[5] - bounds[4])/cell_dims[2]]
mins = [bounds[0], bounds[2], bounds[4]]
px, py, pz = mesh_pitch
mx, my, mz = (cell_dims+np.array([1, 1, 1])) * mesh_pitch # max
points = vtk.vtkPoints()
coords = np.stack(np.mgrid[:mx:px, :my:py, :mz:pz], -1).reshape(-1, 3) + mins
points.SetData(vnp.numpy_to_vtk(coords))
structured_base_mesh = vtk.vtkStructuredGrid()
structured_base_mesh.SetExtent(
0, cell_dims[0], 0, cell_dims[1], 0, cell_dims[2])
structured_base_mesh.SetPoints(points)
append = vtk.vtkAppendFilter()
append.AddInputData(structured_base_mesh)
append.Update()
base_mesh = append.GetOutput()
cell_centers = vtk.vtkCellCenters()
cell_centers.SetInputData(base_mesh)
cell_centers.Update()
poly_points = cell_centers.GetOutput()
select_enclosed = vtk.vtkSelectEnclosedPoints()
select_enclosed.SetInputData(poly_points)
select_enclosed.SetSurfaceData(closed_poly)
select_enclosed.SetTolerance(tol)
select_enclosed.Update()
isInsideOrOutside = select_enclosed.GetOutput(
).GetPointData().GetArray("SelectedPoints")
structured_base_mesh.GetCellData().AddArray(isInsideOrOutside)
threshold = vtk.vtkThreshold()
threshold.SetInputArrayToProcess(
0, 0, 0, vtk.vtkDataObject.FIELD_ASSOCIATION_CELLS, "SelectedPoints")
threshold.SetInputData(structured_base_mesh)
threshold.ThresholdBetween(0, 1)
threshold.Update()
writer = vtk.vtkDataSetWriter()
writer.SetFileName(filename_out)
writer.SetInputData(threshold.GetOutput())
writer.Update()
nparray = vnp.vtk_to_numpy(threshold.GetOutput().GetCellData().GetArray("SelectedPoints"))
Steps to create the 3d shaped numpy array:
get the vtkDataArray you want. Something like threshold.GetOutput().GetPointData().GetArary(<arrayname>)
turn it into numpy array using vtk_to_numpy. This returns a 'flat' array: 1D list of all values
reshape it according the the mesh dimensions (threshold.GetOutput().GetDimensions())
Inspirated from this answer https://discourse.vtk.org/t/convert-vtk-array-to-numpy-array/3152/4
import vtk
import time
import numpy as np
import vtk.util.numpy_support as vnp
######## data ########
filename_in = "<your STL>.stl"
filename_out = "out.vtk"
mesh_size = 100
tol = 1e-7
cubicORrect = "rect"
##################################
start = time.time()
reader = vtk.vtkSTLReader()
reader.SetFileName(filename_in)
reader.Update()
closed_poly = reader.GetOutput()
# x_min:0 x_max:1, y_min:2,y_max:3,z_min:4,z_max:5
bounds = closed_poly.GetBounds()
max_size = max([bounds[1] - bounds[0], bounds[3] -
bounds[2], bounds[5] - bounds[4]])
cell_dims = [mesh_size, mesh_size, mesh_size] # x, y, z
if cubicORrect == "cubic":
mesh_pitch = [max_size/cell_dims[0],
max_size/cell_dims[1],
max_size/cell_dims[2]]
else:
mesh_pitch = [(bounds[1] - bounds[0])/cell_dims[0],
(bounds[3] - bounds[2])/cell_dims[1],
(bounds[5] - bounds[4])/cell_dims[2]]
mins = [bounds[0], bounds[2], bounds[4]]
px, py, pz = mesh_pitch
mx, my, mz = (cell_dims+np.array([1, 1, 1])) * mesh_pitch # max
points = vtk.vtkPoints()
coords = np.stack(np.mgrid[:mx:px, :my:py, :mz:pz], -1).reshape(-1, 3) + mins
points.SetData(vnp.numpy_to_vtk(coords))
structured_base_mesh = vtk.vtkStructuredGrid()
structured_base_mesh.SetExtent(
0, cell_dims[0], 0, cell_dims[1], 0, cell_dims[2])
structured_base_mesh.SetPoints(points)
append = vtk.vtkAppendFilter()
append.AddInputData(structured_base_mesh)
append.Update()
base_mesh = append.GetOutput()
cell_centers = vtk.vtkCellCenters()
cell_centers.SetInputData(base_mesh)
cell_centers.Update()
poly_points = cell_centers.GetOutput()
select_enclosed = vtk.vtkSelectEnclosedPoints()
select_enclosed.SetInputData(poly_points)
select_enclosed.SetSurfaceData(closed_poly)
select_enclosed.SetTolerance(tol)
select_enclosed.Update()
isInsideOrOutside = select_enclosed.GetOutput(
).GetPointData().GetArray("SelectedPoints")
structured_base_mesh.GetCellData().AddArray(isInsideOrOutside)
threshold = vtk.vtkThreshold()
threshold.SetInputArrayToProcess(
0, 0, 0, vtk.vtkDataObject.FIELD_ASSOCIATION_CELLS, "SelectedPoints")
threshold.SetInputData(structured_base_mesh)
threshold.ThresholdBetween(0, 1)
threshold.Update()
writer = vtk.vtkDataSetWriter()
writer.SetFileName(filename_out)
writer.SetInputData(threshold.GetOutput())
writer.Update()
nparray = vnp.vtk_to_numpy(threshold.GetOutput().GetCellData().GetArray("SelectedPoints"))
Related
I am trying to implement this Wiener filter to reduced the impact of a camera out of focus: https://docs.opencv.org/4.x/de/d3c/tutorial_out_of_focus_deblur_filter.html. The original code is in C++, and I rewrote it in python, but I do not get the same output as the example.
Here is my code:
import cv2 as cv
import numpy as np
def calcPSF(size, R):
h = np.zeros(size, dtype=np.float32)
cv.circle(h,(size[1]//2,size[0]//2), R, 1, -1)
psf = h/np.sum(h)
return psf
def calcWnrFilter(psf, SNR):
h_psf = np.fft.fftshift(psf)
h_planes = [np.float32(h_psf), np.zeros(h_psf.shape, np.float32)]
h_complexI = cv.merge(h_planes)
h_complexI = cv.dft(h_complexI)
h_planes = cv.split(h_complexI)
denom = np.power(np.abs(h_planes[0]),2) + (1/SNR)
wiener = np.divide(h_planes[0], denom, dtype = np.float32)
return wiener
def filter2DFreq(img, wiener):
planes = [np.float32(img), np.zeros(img.shape, np.float32)]
complexI = cv.merge(planes)
complexI = np.divide(cv.dft(complexI), complexI.size, dtype = np.float32)
planesH = [np.float32(wiener), np.zeros(wiener.shape, np.float32)]
complexH = cv.merge(planesH)
complexIH = cv.mulSpectrums(complexI, complexH, 0)
complexIH = cv.idft(complexIH)
planes = cv.split(complexIH)
out = planes[0]
return out
def deBlur(img, R, SNR):
rows, cols = img.shape
m = cv.getOptimalDFTSize( rows )
n = cv.getOptimalDFTSize( cols )
img = (cv.copyMakeBorder(img, 0, m - rows, 0, n - cols, cv.BORDER_CONSTANT, value=[0, 0, 0])/255).astype(np.float32)
h = calcPSF((m,n), R)
Hw = calcWnrFilter(h, SNR)
out = filter2DFreq(img, Hw)
return out
img = cv.imread("original_blur.jpg")[:,:,0]
while True:
v = deBlur(img,53,5200)
cv.imshow("in", img)
cv.imshow("out", v)
key = cv.waitKey(1) & 0xFF
if key == 27:
break
cv.destroyAllWindows()
And here are my imput and output
This is what I am supposed to get:
I verified the dtype of all my variable, everything is in np.float32, and spent hours looking for differences with the original code.
Or: The search for a faster and more accurate way to rasterize small OpenStreetMap extracts into population-weighted rasters.
I'd like to turn a small .pbf file into a GeoTiff which will be easier to do further spatial analysis on. For the purpose of this question I will constrain the requirements to dealing with polygon geometry since I already found a solution that works very well for lines. It works so well that I am considering converting all my polygons into lines.
To give an example of the type of data that I'd like to convert:
wget https://download.geofabrik.de/europe/liechtenstein-latest.osm.pbf
osmium tags-filter liechtenstein-latest.osm.pbf landuse=grass -o liechtenstein_grass.pbf
ogr2ogr -t_srs EPSG:3857 liechtenstein_grass.sqlite -dsco SPATIALITE=YES -nln multipolygons -nlt POLYGON -skipfailures liechtenstein_grass.pbf
I found a zonal statistics script here which we might be able to build from to solve this problem: http://pcjericks.github.io/py-gdalogr-cookbook/raster_layers.html#calculate-zonal-statistics
The above script takes a vector layer and a raster layer, iterates on the vector features by clipping the raster and doing some statistics on that.
Instead of normal zonal statistics I would like to count the number of vector features which intersect with each raster pixel. I have a global raster grid Int32 with a unique value for each pixel.
{qgis_process} run native:creategrid -- TYPE=2 EXTENT="-20037760, -8399416, 20037760, 18454624 [EPSG:3857]" HSPACING=1912 VSPACING=1912 HOVERLAY=0 VOVERLAY=0 CRS="EPSG:3857" OUTPUT="grid.gpkg"
sqlite3 land.gpkg
SELECT load_extension("mod_spatialite");
alter table output add column ogcfod int;
update output set ogcfod = fid;
gdal_rasterize -l output -a ogcfod -tap -tr 1912.0 1912.0 -a_nodata 0.0 -ot Int32 -of GTiff -co COMPRESS=DEFLATE -co PREDICTOR=2 grid.gpkg grid.tif -te -20037760 -8399416 20037760 18454624
So I'm thinking if we could still iterate on the vector features (as there are far, far fewer of those and there are 88m+ zones in the raster grid) it will probably be much more performant.
We want a script script which takes a vector layer and a raster layer, iterates on the vector features looks up the values of all the pixels the feature covers and then adds one to a dictionary: {px_id: qty}
However, when trying to make this script work it keeps giving me the same geometry... It only shows me 1 of the pixel IDs over and over
import sys
import gdal
import numpy
import ogr
import osr
from rich import inspect, print
def zonal_stats(feat, lyr, raster):
# Get raster georeference info
transform = raster.GetGeoTransform()
xOrigin = transform[0]
yOrigin = transform[3]
pixelWidth = transform[1]
pixelHeight = transform[5]
# Reproject vector geometry to same projection as raster
sourceSR = lyr.GetSpatialRef()
targetSR = osr.SpatialReference()
targetSR.ImportFromWkt(raster.GetProjectionRef())
coordTrans = osr.CoordinateTransformation(sourceSR, targetSR)
feat = lyr.GetNextFeature()
geom = feat.GetGeometryRef()
geom.Transform(coordTrans)
# Get extent of feat
geom = feat.GetGeometryRef()
if geom.GetGeometryName() == "MULTIPOLYGON":
count = 0
pointsX = []
pointsY = []
for polygon in geom:
geomInner = geom.GetGeometryRef(count)
ring = geomInner.GetGeometryRef(0)
numpoints = ring.GetPointCount()
for p in range(numpoints):
lon, lat, z = ring.GetPoint(p)
pointsX.append(lon)
pointsY.append(lat)
count += 1
elif geom.GetGeometryName() == "POLYGON":
ring = geom.GetGeometryRef(0)
numpoints = ring.GetPointCount()
pointsX = []
pointsY = []
for p in range(numpoints):
lon, lat, z = ring.GetPoint(p)
pointsX.append(lon)
pointsY.append(lat)
else:
sys.exit("ERROR: Geometry needs to be either Polygon or Multipolygon")
xmin = min(pointsX)
xmax = max(pointsX)
ymin = min(pointsY)
ymax = max(pointsY)
print(xmin, xmax)
print(ymin, ymax)
# Specify offset and rows and columns to read
xoff = int((xmin - xOrigin) / pixelWidth)
yoff = int((yOrigin - ymax) / pixelWidth)
xcount = int((xmax - xmin) / pixelWidth) + 1
ycount = int((ymax - ymin) / pixelWidth) + 1
# Create memory target raster
target_ds = gdal.GetDriverByName("MEM").Create(
"", xcount, ycount, 1, gdal.GDT_Int32
)
target_ds.SetGeoTransform(
(
xmin,
pixelWidth,
0,
ymax,
0,
pixelHeight,
)
)
# Create for target raster the same projection as for the value raster
raster_srs = osr.SpatialReference()
raster_srs.ImportFromWkt(raster.GetProjectionRef())
target_ds.SetProjection(raster_srs.ExportToWkt())
# Rasterize zone polygon to raster
gdal.RasterizeLayer(target_ds, [1], lyr, burn_values=[1])
# Read raster as arrays
banddataraster = raster.GetRasterBand(1)
dataraster = banddataraster.ReadAsArray(xoff, yoff, xcount, ycount)
bandmask = target_ds.GetRasterBand(1)
datamask = bandmask.ReadAsArray(0, 0, xcount, ycount)
print(dataraster)
# Mask zone of raster
# zoneraster = numpy.ma.masked_array(dataraster, numpy.logical_not(datamask))
# print(zoneraster)
# exit()
def loop_zonal_stats(lyr, raster):
featList = range(lyr.GetFeatureCount())
statDict = {}
for FID in featList:
feat = lyr.GetFeature(FID)
meanValue = zonal_stats(feat, lyr, raster)
statDict[FID] = meanValue
return statDict
def main(input_zonal_raster, input_value_polygon):
raster = gdal.Open(input_zonal_raster)
shp = ogr.Open(input_value_polygon)
lyr = shp.GetLayer()
return loop_zonal_stats(lyr, raster)
if __name__ == "__main__":
if len(sys.argv) != 3:
print(
"[ ERROR ] you must supply two arguments: input-zone-raster-name.tif input-value-shapefile-name.shp "
)
sys.exit(1)
main(sys.argv[1], sys.argv[2])
Prior research:
https://gis.stackexchange.com/questions/177738/count-overlapping-polygons-including-duplicates
https://stackoverflow.com/a/47443399/697964
If gdal_rasterize could burn in the count of all the polygons which intersect with each pixel (rather than a fixed value) that would likely fulfill my needs.
https://github.com/rory/osm-summary-heatmap/blob/main/Makefile
https://old.reddit.com/r/gis/comments/4n2q5v/count_overlapping_polygons_qgis/
heatmapkerneldensity does not work very well or maybe I'm not using it correctly but it seems off
{qgis_process} run qgis:heatmapkerneldensityestimation -- INPUT="{basen}.json.geojson" RADIUS=2868 RADIUS_FIELD=None PIXEL_SIZE=1912 WEIGHT_FIELD=None KERNEL=4 DECAY=0 OUTPUT_VALUE=0 OUTPUT="{basen}.tif
This seems to work. Output is CSV columns=["px_id", "qty"]
"""
python rasterlayerzonalvectorcounts.py grid.tif liechtenstein_grass.sqlite
MIT License
Based on https://github.com/pcjericks/py-gdalogr-cookbook/blob/master/raster_layers.rst#calculate-zonal-statistics
"""
import sys
import osr
import os
import ogr
import numpy
import gdal
import pandas
from joblib import Parallel, delayed
from collections import Counter
def zonal_stats(FID, lyr, raster):
feat = lyr.GetFeature(FID)
# Get raster georeference info
transform = raster.GetGeoTransform()
xOrigin = transform[0]
yOrigin = transform[3]
pixelWidth = transform[1]
pixelHeight = transform[5]
# Reproject vector geometry to same projection as raster
sourceSR = lyr.GetSpatialRef()
targetSR = osr.SpatialReference()
targetSR.ImportFromWkt(raster.GetProjectionRef())
coordTrans = osr.CoordinateTransformation(sourceSR, targetSR)
geom = feat.GetGeometryRef()
geom.Transform(coordTrans)
# Get extent of feat
geom = feat.GetGeometryRef()
xmin, xmax, ymin, ymax = geom.GetEnvelope()
# Specify offset and rows and columns to read
xoff = int((xmin - xOrigin) / pixelWidth)
yoff = int((yOrigin - ymax) / pixelWidth)
xcount = int((xmax - xmin) / pixelWidth) + 1
ycount = int((ymax - ymin) / pixelWidth) + 1
# Create memory target raster
target_ds = gdal.GetDriverByName("MEM").Create(
"", xcount, ycount, 1, gdal.GDT_Int32
)
target_ds.SetGeoTransform(
(
xmin,
pixelWidth,
0,
ymax,
0,
pixelHeight,
)
)
# Create for target raster the same projection as for the value raster
raster_srs = osr.SpatialReference()
raster_srs.ImportFromWkt(raster.GetProjectionRef())
target_ds.SetProjection(raster_srs.ExportToWkt())
# Rasterize zone polygon to raster
gdal.RasterizeLayer(target_ds, [1], lyr, burn_values=[1])
# Read raster as arrays
banddataraster = raster.GetRasterBand(1)
dataraster = banddataraster.ReadAsArray(xoff, yoff, xcount, ycount)
bandmask = target_ds.GetRasterBand(1)
datamask = bandmask.ReadAsArray(0, 0, xcount, ycount)
# Mask zone of raster
zoneraster = numpy.ma.masked_array(dataraster, numpy.logical_not(datamask))
return numpy.array(zoneraster).tolist()
def loop_zonal_stats(input_value_polygon, input_zonal_raster):
shp = ogr.Open(input_value_polygon)
lyr = shp.GetLayer()
print("Processing", lyr.GetFeatureCount(), "features")
featList = range(lyr.GetFeatureCount())
def processFID(input_value_polygon, input_zonal_raster, FID):
shp = ogr.Open(input_value_polygon)
raster = gdal.Open(input_zonal_raster)
lyr = shp.GetLayer()
if FID:
px_ids = zonal_stats(FID, lyr, raster)
# print(px_ids)
px_ids = [item for sublist in px_ids for item in sublist]
return px_ids
return Parallel(n_jobs=8)(
delayed(processFID)(input_value_polygon, input_zonal_raster, FID)
for FID in featList
)
if __name__ == "__main__":
if len(sys.argv) != 3:
print(
"[ ERROR ] you must supply two arguments: input-zone-raster-name.tif input-value-shapefile-name.shp "
)
sys.exit(1)
input_zonal_raster = sys.argv[1]
input_value_polygon = sys.argv[2]
counts = list(
filter(None, loop_zonal_stats(input_value_polygon, input_zonal_raster))
)
counts = Counter([item for sublist in counts for item in sublist])
pandas.DataFrame.from_dict(data=counts, orient="index").to_csv(
os.path.splitext(input_value_polygon)[0] + ".csv", header=False
)
This one will create an output GeoTiff with the same grid system as the source zonal GeoTiff
I wonder if it could be sped up by using What is the purpose of meshgrid in Python / NumPy?
"""
python rasterlayerzonalvectorcounts.py grid.tif liechtenstein_grass.sqlite
MIT License
Based on https://github.com/pcjericks/py-gdalogr-cookbook/blob/master/raster_layers.rst#calculate-zonal-statistics
"""
import sys
import osr
import os
import ogr
import numpy
import gdal
import pandas
from joblib import Parallel, delayed
from collections import Counter
from rich import print, inspect
def zonal_stats(FID, lyr, raster):
feat = lyr.GetFeature(FID)
# Get raster georeference info
transform = raster.GetGeoTransform()
xOrigin = transform[0]
yOrigin = transform[3]
pixelWidth = transform[1]
pixelHeight = transform[5]
# Get extent of feat
geom = feat.GetGeometryRef()
xmin, xmax, ymin, ymax = geom.GetEnvelope()
# Specify offset and rows and columns to read
xoff = int((xmin - xOrigin) / pixelWidth)
yoff = int((yOrigin - ymax) / pixelWidth)
xcount = int((xmax - xmin) / pixelWidth) + 1
ycount = int((ymax - ymin) / pixelWidth) + 1
feat_arr = []
# if xcount != 1 or ycount != 1:
# print(xoff, yoff, xcount, ycount)
for x in range(xcount):
for y in range(ycount):
# print(xoff + x, yoff + y)
feat_arr.append((xoff + x, yoff + y))
return feat_arr
def loop_zonal_stats(input_value_polygon, input_zonal_raster):
shp = ogr.Open(input_value_polygon)
lyr = shp.GetLayer()
print("Processing", lyr.GetFeatureCount(), "features")
featList = range(lyr.GetFeatureCount())
def processFID(input_value_polygon, input_zonal_raster, FID):
shp = ogr.Open(input_value_polygon)
raster = gdal.Open(input_zonal_raster)
lyr = shp.GetLayer()
if FID:
px_ids = zonal_stats(FID, lyr, raster)
return px_ids
return Parallel(n_jobs=1)(
delayed(processFID)(input_value_polygon, input_zonal_raster, FID)
for FID in featList
)
if __name__ == "__main__":
if len(sys.argv) != 3:
print(
"[ ERROR ] you must supply two arguments: input-zone-raster-name.tif input-value-shapefile-name.shp "
)
sys.exit(1)
input_zonal_raster = sys.argv[1]
input_value_polygon = sys.argv[2]
counts = list(
filter(None, loop_zonal_stats(input_value_polygon, input_zonal_raster))
)
counts = Counter([item for sublist in counts for item in sublist])
raster_srs = osr.SpatialReference()
raster_srs.ImportFromWkt(gdal.Open(input_zonal_raster).GetProjectionRef())
raster_arr = numpy.empty((14045, 20960), numpy.int32)
for px in counts.items():
# print(px)
raster_arr[px[0][1]][px[0][0]] = px[1]
target_ds = gdal.GetDriverByName("GTiff").Create(
os.path.splitext(input_value_polygon)[0] + ".tif",
20960,
14045,
1,
gdal.GDT_Int32,
options=["COMPRESS=LZW"],
)
target_ds.SetGeoTransform(gdal.Open(input_zonal_raster).GetGeoTransform())
target_ds.SetProjection(raster_srs.ExportToWkt())
target_ds.GetRasterBand(1).WriteArray(raster_arr)
target_ds.GetRasterBand(1).SetNoDataValue(0)
target_ds.GetRasterBand(1).GetStatistics(0, 1)
target_ds.FlushCache()
target_ds = None
Hello guys i am trying to implement an algortihm to remove water from underwater images and make image more noticable , but i got an errror ValueError: max() arg is an empty sequence , at the function homomorpic on this line r = max(np.ravel(result[:,:i])) , the error is caused because the result array is empty but i filled it above .Here the code below.
import numpy as np
import cv2
def homomorpic(img):
img = np.float32(img)
#img = img/255
rows , cols , dim = img.shape
(rh,rl,cutoff) = 1.3, 0.8, 32
b,g,r = cv2.split(img)
y_log_b = np.log(b + 0.01)
y_log_g = np.log(g + 0.01)
y_log_r = np.log(r + 0.01)
y_fft_b= np.fft.fft2(y_log_b)
y_fft_g= np.fft.fft2(y_log_g)
y_fft_r= np.fft.fft2(y_log_r)
y_fft_b_shift = np.fft.fftshift(y_log_b)
y_fft_g_shift = np.fft.fftshift(y_log_g)
y_fft_r_shift = np.fft.fftshift(y_log_r)
D0=cols/cutoff
H= np.ones((rows,cols))
B= np.ones((rows,cols))
for i in range(rows):
for j in range(cols):
H[i][j] = ((rh-rl)* (1-np.exp(-((i-rows/2)**2+(j-cols/2)**2)/(2*D0**2))))+rl
result_filter_b = H* y_fft_b_shift
result_filter_g = H* y_fft_g_shift
result_filter_r = H* y_fft_r_shift
result_b_intern = np.real(np.fft.ifft2(np.fft.ifftshift(result_filter_b)))
result_g_intern = np.real(np.fft.ifft2(np.fft.ifftshift(result_filter_g)))
result_r_intern = np.real(np.fft.ifft2(np.fft.ifftshift(result_filter_r)))
result_b = np.exp(result_b_intern)
result_g = np.exp(result_g_intern)
result_r = np.exp(result_r_intern)
result = np.zeros((rows,cols,dim))
result[:,:,0] = result_b
result[:,:,1] = result_g
result[:,:,2] = result_r
ma = -1
mi = 500
for i in range(3):
r = max(np.ravel(result[:,:i]))
x = min(np.ravel(result[:,:i]))
if r > ma :
ma = r
if x < mi :
mi = x
return(result)
image = cv2.imread("eg.png")
image2 = homomorpic(image)
Thanks for any help or suggestion.
In this loop for i in range(3): the first value of i would be 0.
This will later on lead to this r = max(np.ravel(result[:,:0])) where the result from the slicing would be empty.
You would want to shift yourrange forward like this:
for i in range(1, 3+1):
I am trying to create an image array from scratch.
I got the code running but it takes arrounds 30 secs to run it.
I feel it could be faster by using numpy native functions.
How can I do this?
import cv2
import numpy as np
import time
volumes = np.random.randint(low=0, high=200, size=10000)
print(volumes)
image_heigh = 128
image_width = 256
image_channel = 3
show_img = False
def nomralized(data, data_min, data_max, maximum_value):
nomamized_data = maximum_value * ((data - data_min) / (data_max - data_min))
return nomamized_data
start_time = time.time()
for ii in range(len(volumes)-image_width):
# ===================== part to optimize start
final_image = np.zeros((image_heigh, image_width, image_channel))
start = ii
end = ii + image_width
current_vols = volumes[start:end]
# nomalize data
vol_min = 0
vol_max = np.max(current_vols)
vol_norm = nomralized(data=current_vols,
data_min=vol_min,
data_max=vol_max,
maximum_value=image_heigh)
for xxx in range(image_width):
final_image[:int(vol_norm[xxx]), xxx, :] = 1
# ===================== part to optimize end
if show_img:
image = np.float32(final_image)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
cv2.imshow("ok", image)
cv2.waitKey(27)
print("total running time: ", (time.time() - start_time))
How can I do to make this image array creation faster?
I need to create the image every timesteps because I want to simulate real live data stream that come every new timesteps.
This is why I would like to optimize only this part of the code :
for xxx in range(image_width):
final_image[:int(vol_norm[xxx]), xxx, :] = 1
How can I do this?
First simplest optimizations are next:
Use comparing values to np.arange(...) instead of inner loop.
Use gray image instead of 3-channels RGB. 3 times less data to process.
Use np.uint8 type instead of np.float32, which is faster to process and doesn't need conversion to float32 for CV2 visualizing.
All these above optimizations give huge speedup (10x times), and my running time is 2.6 sec instead of 27 sec before.
Also another very useful optimization that I didn't do is that you don't need to recompute previous image pixels in a case when max/min of whole data within current window didn't change. You need to recompute previous image data only in the case if max/min changed. And I expect that your real-life data is gradually changing like Forex or Bitcoin prices, hence max/min change within a window is very non-often.
Optimizations 1)-3) mentioned above are implemented in the next code:
import cv2
import numpy as np
import time
volumes = np.random.randint(low=0, high=200, size=10000)
print(volumes)
image_heigh = 128
image_width = 256
image_channel = 3
show_img = False
def nomralized(data, data_min, data_max, maximum_value):
nomamized_data = maximum_value * ((data - data_min) / (data_max - data_min))
return nomamized_data
start_time = time.time()
aranges = np.arange(image_heigh, dtype = np.int32)[:, None]
for ii in range(len(volumes)-image_width):
# ===================== part to optimize start
#final_image = np.zeros((image_heigh, image_width, image_channel), dtype = np.float32)
start = ii
end = ii + image_width
current_vols = volumes[start:end]
# nomalize data
vol_min = 0
vol_max = np.max(current_vols)
vol_norm = nomralized(data=current_vols,
data_min=vol_min,
data_max=vol_max,
maximum_value=image_heigh)
final_image = (aranges < vol_norm[None, :].astype(np.int32)).astype(np.uint8) * 255
# ===================== part to optimize end
if show_img:
cv2.imshow('ok', final_image)
cv2.waitKey(27)
print("total running time: ", (time.time() - start_time))
For above code I just did one more optimization of inner loop which speed-up code above even 2x times more to have timings of 1.3 sec. But also I put back 3 channels plus float32, this reduced speed resulting in final 2.8 sec, here is the code
Another next optimization is possible if re-computing old images data is not needed.
Main thing to be optimized was that you were re-computing almost same whole image on each step with 1 pixel shift-step along width. Instead of this you need to compute whole image once, then shift right not 1 pixel but whole image width.
Then after this optimization running time is 0.08 sec.
And do 1 pixel stepping only for showing animation, not for computing image data, image data should be computed just once if you need speed.
import cv2
import numpy as np
import time
volumes = np.random.randint(low=0, high=200, size=10000)
print(volumes)
image_heigh = 128
image_width = volumes.size #256
image_channel = 3
screen_width = 256
show_img = False
def nomralized(data, data_min, data_max, maximum_value):
nomamized_data = maximum_value * ((data - data_min) / (data_max - data_min))
return nomamized_data
start_time = time.time()
for ii in range(0, len(volumes), image_width):
# ===================== part to optimize start
final_image = np.zeros((image_heigh, image_width, image_channel))
start = ii
end = ii + image_width
current_vols = volumes[start:end]
# nomalize data
vol_min = 0
vol_max = np.max(current_vols)
vol_norm = nomralized(data=current_vols,
data_min=vol_min,
data_max=vol_max,
maximum_value=image_heigh)
for xxx in range(image_width):
final_image[:int(vol_norm[xxx]), xxx, :] = 1
# ===================== part to optimize end
if show_img:
for start in range(0, final_image.shape[1] - screen_width):
image = np.float32(final_image[:, start : start + screen_width])
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
cv2.imshow("ok", image)
cv2.waitKey(27)
print("total running time: ", (time.time() - start_time))
I also created animation image out of your data:
If you want to create same animation just append next piece of code to the end of script above:
# Needs: python -m pip install pillow
import PIL.Image
imgs = [PIL.Image.fromarray(final_image[:, start : start + screen_width].astype(np.uint8) * 255) for start in range(0, final_image.shape[1] - screen_width, 6)]
imgs[0].save('result.png', append_images = imgs[1:], save_all = True, lossless = True, duration = 100)
I've implemented also simulation of real-time live stream data rendering/visualizing.
live_stream() generator spits out random amount of data at random points of time, this is to simulate data generation process.
stream_fetcher() listens to live stream and records all data received to python queue q0, this fetcher is run in one thread.
renderer() gets data recorded by fetcher and renders it into image through your mathematical formulas and normalization process, it renders as much data as available, resulting in images with varying widths, rendered images are saved to another queue q1.
visualizer() visualizes rendered data by fetching as much rendered images as available.
All functions run in separate threads not to block whole process. Also if any of threads works to slow then it skips some of data to catch-up with current real-time data, thus every queue doesn't overflow.
Also you may see that visualized process is jumpy, it is not because functions are somewhat slow, but because live stream spits out different amount of data in each time step, this is how usually real-time data may behave.
In the next code I did also extra optimization mentioned before, that is not-recomputing image if min/max didn't change.
import cv2, numpy as np
import time, random, threading, queue
image_height = 256
image_width = 512
# Make results reproducible and deterministic
np.random.seed(0)
random.seed(0)
def live_stream():
last = 0.
while True:
a = np.random.uniform(low = -1., high = 1., size = random.randint(1, 20)).astype(np.float64).cumsum() + last
yield a
last = a[-1]
time.sleep(random.random() * 0.1)
q0 = queue.Queue()
def stream_fetcher():
for e in live_stream():
q0.put(e)
threading.Thread(target = stream_fetcher, daemon = True).start()
aranges = np.arange(image_height, dtype = np.int32)[:, None]
q1 = queue.Queue()
def renderer():
def normalized(data, data_min, data_max, maximum_value):
nomamized_data = maximum_value * ((data - data_min) / (data_max - data_min))
return nomamized_data
prev_image = np.zeros((image_height, 0), dtype = np.uint8)
prev_vols = np.zeros((0,), dtype = np.float64)
while True:
data = []
data.append(q0.get())
try:
while True:
data.append(q0.get(block = False))
except queue.Empty:
pass
vols = np.concatenate(data)[-image_width:]
prev_vols = prev_vols[-(image_width - vols.size) or prev_vols.size:]
concat_vols = np.concatenate((prev_vols, vols))[-image_width:]
vols_min, vols_max = np.amin(concat_vols), np.amax(concat_vols)
if prev_vols.size > 0 and (vols_min < np.amin(prev_vols) - 10 ** -8 or vols_max > np.amax(prev_vols) + 10 ** -8):
vols = concat_vols
prev_image = prev_image[:, :-prev_vols.size]
prev_vols = prev_vols[:0]
vols_norm = normalized(
data = vols, data_min = vols_min,
data_max = vols_max, maximum_value = image_height,
)
image = (aranges < vols_norm.astype(np.int32)[None, :]).astype(np.uint8) * 255
whole_image = np.concatenate((prev_image, image), axis = 1)[:, -image_width:]
q1.put(whole_image)
prev_image = whole_image
prev_vols = concat_vols
threading.Thread(target = renderer, daemon = True).start()
def visualizer():
imgs = []
while True:
data = []
data.append(q1.get())
try:
while True:
data.append(q1.get(block = False))
except queue.Empty:
pass
image = np.concatenate(data, axis = 1)[:, -image_width:]
cv2.imshow('ok', image)
cv2.waitKey(1)
if imgs is not None:
try:
# Needs: python -m pip install pillow
import PIL.Image
has_pil = True
except:
has_pil = False
imgs = None
if has_pil:
imgs.append(PIL.Image.fromarray(np.pad(image, ((0, 0), (image_width - image.shape[1], 0)), constant_values = 0)))
if len(imgs) >= 1000:
print('saving...', flush = True)
imgs[0].save('result.png', append_images = imgs[1:], save_all = True, lossless = True, duration = 100)
imgs = None
print('saved!', flush = True)
threading.Thread(target = visualizer, daemon = True).start()
while True:
time.sleep(0.1)
Above live process simulation is rendered into result.png which I show down below:
I've also decided to improve visualization, by using more advanced matplotlib instead of cv2 to be able to show axes and doing real-time plot drawing. Visualization image is down below:
Next is a matplotlib-based code corresponding to last image above:
import cv2, numpy as np
import time, random, threading, queue
image_height = 256
image_width = 512
save_nsec = 20
dpi, fps = 100, 15
# Make results reproducible and deterministic
np.random.seed(0)
random.seed(0)
def live_stream():
last = 0.
pos = 0
while True:
a = np.random.uniform(low = -1., high = 1., size = random.randint(1, 30)).astype(np.float64).cumsum() + last
yield a, pos, pos + a.size - 1
pos += a.size
last = a[-1]
time.sleep(random.random() * 2.2 / fps)
q0 = queue.Queue()
def stream_fetcher():
for e in live_stream():
q0.put(e)
threading.Thread(target = stream_fetcher, daemon = True).start()
aranges = np.arange(image_height, dtype = np.int32)[:, None]
q1 = queue.Queue()
def renderer():
def normalized(data, data_min, data_max, maximum_value):
nomamized_data = maximum_value * ((data - data_min) / (data_max - data_min))
return nomamized_data
prev_image = np.zeros((image_height, 0), dtype = np.uint8)
prev_vols = np.zeros((0,), dtype = np.float64)
while True:
data = []
data.append(q0.get())
try:
while True:
data.append(q0.get(block = False))
except queue.Empty:
pass
data_vols = [e[0] for e in data]
data_minx, data_maxx = data[0][1], data[-1][2]
vols = np.concatenate(data_vols)[-image_width:]
prev_vols = prev_vols[-(image_width - vols.size) or prev_vols.size:]
concat_vols = np.concatenate((prev_vols, vols))[-image_width:]
vols_min, vols_max = np.amin(concat_vols), np.amax(concat_vols)
if prev_vols.size > 0 and (vols_min < np.amin(prev_vols) - 10 ** -8 or vols_max > np.amax(prev_vols) + 10 ** -8):
vols = concat_vols
prev_image = prev_image[:, :-prev_vols.size]
prev_vols = prev_vols[:0]
vols_norm = normalized(
data = vols, data_min = vols_min,
data_max = vols_max, maximum_value = image_height,
)
image = (aranges < vols_norm.astype(np.int32)[None, :]).astype(np.uint8) * 255
whole_image = np.concatenate((prev_image, image), axis = 1)[:, -image_width:]
q1.put((whole_image, data_maxx - whole_image.shape[1] + 1, data_maxx, vols_min, vols_max))
prev_image = whole_image
prev_vols = concat_vols
threading.Thread(target = renderer, daemon = True).start()
def visualizer():
import matplotlib.pyplot as plt, matplotlib.animation
def images():
while True:
data = []
data.append(q1.get())
try:
while True:
data.append(q1.get(block = False))
except queue.Empty:
pass
minx = min([e[1] for e in data])
maxx = min([e[2] for e in data])
miny = min([e[3] for e in data])
maxy = min([e[4] for e in data])
image = np.concatenate([e[0] for e in data], axis = 1)[:, -image_width:]
image = np.pad(image, ((0, 0), (image_width - image.shape[1], 0)), constant_values = 0)
image = np.repeat(image[:, :, None], 3, axis = -1)
yield image, minx, maxx, miny, maxy
it = images()
im = None
fig = plt.figure(figsize = (image_width / dpi, image_height / dpi), dpi = dpi)
def animate_func(i):
nonlocal it, im, fig
image, minx, maxx, miny, maxy = next(it)
print(f'.', end = '', flush = True)
if im is None:
im = plt.imshow(image, interpolation = 'none', aspect = 'auto')
else:
im.set_array(image)
im.set_extent((minx, maxx, miny, maxy))
return [im]
anim = matplotlib.animation.FuncAnimation(fig, animate_func, frames = round(save_nsec * fps), interval = 1000 / fps)
print('saving...', end = '', flush = True)
#anim.save('result.mp4', fps = fps, dpi = dpi, extra_args = ['-vcodec', 'libx264'])
anim.save('result.gif', fps = fps, dpi = dpi, writer = 'imagemagick')
print('saved!', end = '', flush = True)
plt.show()
threading.Thread(target = visualizer, daemon = True).start()
while True:
time.sleep(0.1)
Then I've decided to play a bit and colored last image with RGB palette, the higher the peak is more red-ish it is, if it is more in the middle then it is more green-ish, if it is low enough then it is more blue-ish. Resulting image below was achieved by this coloring code:
And another one colored animation below, line-style instead of bar-style, with the help of this code:
Hi I'm a student and I try to make a 3d map reconstruction for a signature and just follow the example in the book "OpenCV with Python by Example" and I don't know why, or how to fix this mistake. I got this error, please help me.
This is the code:
import argparse
import cv2
import numpy as np
def build_arg_parser():
parser = argparse.ArgumentParser(description='Reconstruct the 3D map from \the two input stereo images. Output will be saved in\'output.ply\'')
parser.add_argument("--image-left", dest="image_left", required=True,
help="Input image captured from the left")
parser.add_argument("--image-right", dest="image_right", required=True,
help="Input image captured from the right")
parser.add_argument("--output-file", dest="output_file", required=True,
help="Output filename (without the extension) where the point cloud will be saved")
return parser
def create_output(vertices, colors, filename):
colors = colors.reshape(-1, 3)
vertices = np.hstack([vertices.reshape(-1,3), colors])
ply_header = '''ply
format ascii 1.0
element vertex %(vert_num)d
property float x
property float y
property float z
property uchar red
property uchar green
property uchar blue
end_header
'''
with open(filename, 'w') as f:
f.write(ply_header % dict(vert_num=len(vertices)))
np.savetxt(f, vertices, '%f %f %f %d %d %d')
if __name__ == '__main__':
args = build_arg_parser().parse_args()
image_left = cv2.imread(args.image_left)
image_right = cv2.imread(args.image_right)
output_file = args.output_file + '.ply'
if image_left.shape[0] != image_right.shape[0] or \
image_left.shape[1] != image_right.shape[1]:
raise TypeError("Input images must be of the same size")
# downscale images for faster processing
image_left = cv2.pyrDown(image_left)
image_right = cv2.pyrDown(image_right)
# disparity range is tuned for 'aloe' image pair
win_size = 1
min_disp = 16
max_disp = min_disp * 9
num_disp = max_disp - min_disp# Needs to be divisible by 16
stereo = cv2.StereoSGBM(minDisparity = min_disp,
numDisparities = num_disp,
SADWindowSize = win_size,
uniquenessRatio = 10,
speckleWindowSize = 100,
speckleRange = 32,
disp12MaxDiff = 1,
P1 = 8*3*win_size**2,
P2 = 32*3*win_size**2,
fullDP = True
)
print "\nComputing the disparity map..."
disparity_map = stereo.compute(image_left,image_right).astype(np.float32) /16.0
print "\nGenerating the 3D map..."
h, w = image_left.shape[:2]
focal_length = 0.8*w
# Perspective transformation matrix
Q = np.float32([[1, 0, 0, -w/2.0],
[0,-1, 0, h/2.0],
[0, 0, 0, -focal_length],
[0, 0, 1, 0]])
points_3D = cv2.reprojectImageTo3D(disparity_map, Q)
colors = cv2.cvtColor(image_left, cv2.COLOR_BGR2RGB)
mask_map = disparity_map > disparity_map.min()
output_points = points_3D[mask_map]
output_colors = colors[mask_map]
print "\nCreating the output file...\n"
create_output(output_points, output_colors, output_file)
This the error I'm getting in the console:
Computing the disparity map...
Traceback (most recent call last):
File "rec.py", line 58, in <module>
disparity_map = stereo.compute(image_left,image_right).astype(np.float32) / 16.0
TypeError: Incorrect type of self (must be 'StereoMatcher' or its derivative)
you should use:
stereo = cv2.StereoSGBM_create(
minDisparity = min_disp,
numDisparities = num_disp,
SADWindowSize = win_size,
uniquenessRatio = 10,
speckleWindowSize = 100,
speckleRange = 32,
disp12MaxDiff = 1,
P1 = 8*3*win_size**2,
P2 = 32*3*win_size**2,
fullDP = True
)