Building the 3D Map Python - python

Hi I'm a student and I try to make a 3d map reconstruction for a signature and just follow the example in the book "OpenCV with Python by Example" and I don't know why, or how to fix this mistake. I got this error, please help me.
This is the code:
import argparse
import cv2
import numpy as np
def build_arg_parser():
parser = argparse.ArgumentParser(description='Reconstruct the 3D map from \the two input stereo images. Output will be saved in\'output.ply\'')
parser.add_argument("--image-left", dest="image_left", required=True,
help="Input image captured from the left")
parser.add_argument("--image-right", dest="image_right", required=True,
help="Input image captured from the right")
parser.add_argument("--output-file", dest="output_file", required=True,
help="Output filename (without the extension) where the point cloud will be saved")
return parser
def create_output(vertices, colors, filename):
colors = colors.reshape(-1, 3)
vertices = np.hstack([vertices.reshape(-1,3), colors])
ply_header = '''ply
format ascii 1.0
element vertex %(vert_num)d
property float x
property float y
property float z
property uchar red
property uchar green
property uchar blue
end_header
'''
with open(filename, 'w') as f:
f.write(ply_header % dict(vert_num=len(vertices)))
np.savetxt(f, vertices, '%f %f %f %d %d %d')
if __name__ == '__main__':
args = build_arg_parser().parse_args()
image_left = cv2.imread(args.image_left)
image_right = cv2.imread(args.image_right)
output_file = args.output_file + '.ply'
if image_left.shape[0] != image_right.shape[0] or \
image_left.shape[1] != image_right.shape[1]:
raise TypeError("Input images must be of the same size")
# downscale images for faster processing
image_left = cv2.pyrDown(image_left)
image_right = cv2.pyrDown(image_right)
# disparity range is tuned for 'aloe' image pair
win_size = 1
min_disp = 16
max_disp = min_disp * 9
num_disp = max_disp - min_disp# Needs to be divisible by 16
stereo = cv2.StereoSGBM(minDisparity = min_disp,
numDisparities = num_disp,
SADWindowSize = win_size,
uniquenessRatio = 10,
speckleWindowSize = 100,
speckleRange = 32,
disp12MaxDiff = 1,
P1 = 8*3*win_size**2,
P2 = 32*3*win_size**2,
fullDP = True
)
print "\nComputing the disparity map..."
disparity_map = stereo.compute(image_left,image_right).astype(np.float32) /16.0
print "\nGenerating the 3D map..."
h, w = image_left.shape[:2]
focal_length = 0.8*w
# Perspective transformation matrix
Q = np.float32([[1, 0, 0, -w/2.0],
[0,-1, 0, h/2.0],
[0, 0, 0, -focal_length],
[0, 0, 1, 0]])
points_3D = cv2.reprojectImageTo3D(disparity_map, Q)
colors = cv2.cvtColor(image_left, cv2.COLOR_BGR2RGB)
mask_map = disparity_map > disparity_map.min()
output_points = points_3D[mask_map]
output_colors = colors[mask_map]
print "\nCreating the output file...\n"
create_output(output_points, output_colors, output_file)
This the error I'm getting in the console:
Computing the disparity map...
Traceback (most recent call last):
File "rec.py", line 58, in <module>
disparity_map = stereo.compute(image_left,image_right).astype(np.float32) / 16.0
TypeError: Incorrect type of self (must be 'StereoMatcher' or its derivative)

you should use:
stereo = cv2.StereoSGBM_create(
minDisparity = min_disp,
numDisparities = num_disp,
SADWindowSize = win_size,
uniquenessRatio = 10,
speckleWindowSize = 100,
speckleRange = 32,
disp12MaxDiff = 1,
P1 = 8*3*win_size**2,
P2 = 32*3*win_size**2,
fullDP = True
)

Related

Rotated and deformed object detect by template on python

I am a python beginner, we have a project where we need to detect and crop out the desired pattern from a photo:
This is the original picture:
Each picture has such a regular border, but it is not closed, in order to detect the direction (similar to a QR code)
The image to be detected is taken by a mobile phone and may be deformed, rotated and scaled, we need to get the pattern in the middle of the box:
My code:
import cv2 as cv
import numpy as np
import math
def getdistance(p1,p2):
v1=np.array(p1)
v2=np.array(p2)
v3=v2-v1
v4=math.hypot(v3[0],v3[1])
return v4
img_object = cv.imread('small.png', cv.IMREAD_GRAYSCALE)
img_scene = cv.imread('img2.png', cv.IMREAD_GRAYSCALE)
if img_object is None or img_scene is None:
print('Unable to read image!')
exit(0)
minHessian = 400
detector = cv.SIFT_create()
keypoints_obj, descriptors_obj = detector.detectAndCompute(img_object, None)
keypoints_scene, descriptors_scene = detector.detectAndCompute(img_scene, None)
img_2 = cv.drawKeypoints(img_scene,keypoints_scene,img_scene)
matcher = cv.DescriptorMatcher_create(cv.DescriptorMatcher_FLANNBASED)
knn_matches = matcher.knnMatch(descriptors_obj, descriptors_scene, 2)
ratio_thresh = 0.95
good_matches = []
for m,n in knn_matches:
if m.distance < ratio_thresh * n.distance:
good_matches.append(m)
img_matches = np.empty((max(img_object.shape[0], img_scene.shape[0]), img_object.shape[1]+img_scene.shape[1], 3), dtype=np.uint8)
cv.drawMatches(img_object, keypoints_obj, img_scene, keypoints_scene, good_matches, img_matches, flags=cv.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS)
obj = np.empty((len(good_matches),2), dtype=np.float32)
scene = np.empty((len(good_matches),2), dtype=np.float32)
for i in range(len(good_matches)):
obj[i,0] = keypoints_obj[good_matches[i].queryIdx].pt[0]
obj[i,1] = keypoints_obj[good_matches[i].queryIdx].pt[1]
scene[i,0] = keypoints_scene[good_matches[i].trainIdx].pt[0]
scene[i,1] = keypoints_scene[good_matches[i].trainIdx].pt[1]
H, _ = cv.findHomography(obj, scene, cv.RANSAC)
obj_corners = np.empty((4,1,2), dtype=np.float32)
obj_corners[0,0,0] = 0
obj_corners[0,0,1] = 0
obj_corners[1,0,0] = img_object.shape[1]
obj_corners[1,0,1] = 0
obj_corners[2,0,0] = img_object.shape[1]
obj_corners[2,0,1] = img_object.shape[0]
obj_corners[3,0,0] = 0
obj_corners[3,0,1] = img_object.shape[0]
scene_corners = cv.perspectiveTransform(obj_corners, H)
q1=(int(scene_corners[0,0,0]), int(scene_corners[0,0,1]))
q2=(int(scene_corners[1,0,0]), int(scene_corners[1,0,1]))
q3=(int(scene_corners[2,0,0]), int(scene_corners[2,0,1]))
q4=(int(scene_corners[3,0,0]), int(scene_corners[3,0,1]))
dist=getdistance(q1, q2)
disti=int(dist)
print('q1: %s, q2: %s, q3: %s, q4: %s, dist: %s'%(q1, q2, q3, q4, dist))
src_points = np.array([q3, q4, q1, q2], dtype = "float32")
dst_points = np.array([[disti,disti], [0, disti], [0, 0], [disti, 0]], dtype = "float32")
M = cv.getPerspectiveTransform(src_points, dst_points)
perspective = cv.warpPerspective(img_scene, M, (disti, disti), cv.INTER_LINEAR)
_, result = cv.threshold(perspective, 0, 255,cv.THRESH_BINARY+cv.THRESH_OTSU)
cv.imshow('result', result)
cv.waitKey()
I have researched for a long time, but the results are still not ideal. I would like to ask if there are better ideas to achieve our needs?

How do I count the number of vector geometry that intersect a raster cell?

Or: The search for a faster and more accurate way to rasterize small OpenStreetMap extracts into population-weighted rasters.
I'd like to turn a small .pbf file into a GeoTiff which will be easier to do further spatial analysis on. For the purpose of this question I will constrain the requirements to dealing with polygon geometry since I already found a solution that works very well for lines. It works so well that I am considering converting all my polygons into lines.
To give an example of the type of data that I'd like to convert:
wget https://download.geofabrik.de/europe/liechtenstein-latest.osm.pbf
osmium tags-filter liechtenstein-latest.osm.pbf landuse=grass -o liechtenstein_grass.pbf
ogr2ogr -t_srs EPSG:3857 liechtenstein_grass.sqlite -dsco SPATIALITE=YES -nln multipolygons -nlt POLYGON -skipfailures liechtenstein_grass.pbf
I found a zonal statistics script here which we might be able to build from to solve this problem: http://pcjericks.github.io/py-gdalogr-cookbook/raster_layers.html#calculate-zonal-statistics
The above script takes a vector layer and a raster layer, iterates on the vector features by clipping the raster and doing some statistics on that.
Instead of normal zonal statistics I would like to count the number of vector features which intersect with each raster pixel. I have a global raster grid Int32 with a unique value for each pixel.
{qgis_process} run native:creategrid -- TYPE=2 EXTENT="-20037760, -8399416, 20037760, 18454624 [EPSG:3857]" HSPACING=1912 VSPACING=1912 HOVERLAY=0 VOVERLAY=0 CRS="EPSG:3857" OUTPUT="grid.gpkg"
sqlite3 land.gpkg
SELECT load_extension("mod_spatialite");
alter table output add column ogcfod int;
update output set ogcfod = fid;
gdal_rasterize -l output -a ogcfod -tap -tr 1912.0 1912.0 -a_nodata 0.0 -ot Int32 -of GTiff -co COMPRESS=DEFLATE -co PREDICTOR=2 grid.gpkg grid.tif -te -20037760 -8399416 20037760 18454624
So I'm thinking if we could still iterate on the vector features (as there are far, far fewer of those and there are 88m+ zones in the raster grid) it will probably be much more performant.
We want a script script which takes a vector layer and a raster layer, iterates on the vector features looks up the values of all the pixels the feature covers and then adds one to a dictionary: {px_id: qty}
However, when trying to make this script work it keeps giving me the same geometry... It only shows me 1 of the pixel IDs over and over
import sys
import gdal
import numpy
import ogr
import osr
from rich import inspect, print
def zonal_stats(feat, lyr, raster):
# Get raster georeference info
transform = raster.GetGeoTransform()
xOrigin = transform[0]
yOrigin = transform[3]
pixelWidth = transform[1]
pixelHeight = transform[5]
# Reproject vector geometry to same projection as raster
sourceSR = lyr.GetSpatialRef()
targetSR = osr.SpatialReference()
targetSR.ImportFromWkt(raster.GetProjectionRef())
coordTrans = osr.CoordinateTransformation(sourceSR, targetSR)
feat = lyr.GetNextFeature()
geom = feat.GetGeometryRef()
geom.Transform(coordTrans)
# Get extent of feat
geom = feat.GetGeometryRef()
if geom.GetGeometryName() == "MULTIPOLYGON":
count = 0
pointsX = []
pointsY = []
for polygon in geom:
geomInner = geom.GetGeometryRef(count)
ring = geomInner.GetGeometryRef(0)
numpoints = ring.GetPointCount()
for p in range(numpoints):
lon, lat, z = ring.GetPoint(p)
pointsX.append(lon)
pointsY.append(lat)
count += 1
elif geom.GetGeometryName() == "POLYGON":
ring = geom.GetGeometryRef(0)
numpoints = ring.GetPointCount()
pointsX = []
pointsY = []
for p in range(numpoints):
lon, lat, z = ring.GetPoint(p)
pointsX.append(lon)
pointsY.append(lat)
else:
sys.exit("ERROR: Geometry needs to be either Polygon or Multipolygon")
xmin = min(pointsX)
xmax = max(pointsX)
ymin = min(pointsY)
ymax = max(pointsY)
print(xmin, xmax)
print(ymin, ymax)
# Specify offset and rows and columns to read
xoff = int((xmin - xOrigin) / pixelWidth)
yoff = int((yOrigin - ymax) / pixelWidth)
xcount = int((xmax - xmin) / pixelWidth) + 1
ycount = int((ymax - ymin) / pixelWidth) + 1
# Create memory target raster
target_ds = gdal.GetDriverByName("MEM").Create(
"", xcount, ycount, 1, gdal.GDT_Int32
)
target_ds.SetGeoTransform(
(
xmin,
pixelWidth,
0,
ymax,
0,
pixelHeight,
)
)
# Create for target raster the same projection as for the value raster
raster_srs = osr.SpatialReference()
raster_srs.ImportFromWkt(raster.GetProjectionRef())
target_ds.SetProjection(raster_srs.ExportToWkt())
# Rasterize zone polygon to raster
gdal.RasterizeLayer(target_ds, [1], lyr, burn_values=[1])
# Read raster as arrays
banddataraster = raster.GetRasterBand(1)
dataraster = banddataraster.ReadAsArray(xoff, yoff, xcount, ycount)
bandmask = target_ds.GetRasterBand(1)
datamask = bandmask.ReadAsArray(0, 0, xcount, ycount)
print(dataraster)
# Mask zone of raster
# zoneraster = numpy.ma.masked_array(dataraster, numpy.logical_not(datamask))
# print(zoneraster)
# exit()
def loop_zonal_stats(lyr, raster):
featList = range(lyr.GetFeatureCount())
statDict = {}
for FID in featList:
feat = lyr.GetFeature(FID)
meanValue = zonal_stats(feat, lyr, raster)
statDict[FID] = meanValue
return statDict
def main(input_zonal_raster, input_value_polygon):
raster = gdal.Open(input_zonal_raster)
shp = ogr.Open(input_value_polygon)
lyr = shp.GetLayer()
return loop_zonal_stats(lyr, raster)
if __name__ == "__main__":
if len(sys.argv) != 3:
print(
"[ ERROR ] you must supply two arguments: input-zone-raster-name.tif input-value-shapefile-name.shp "
)
sys.exit(1)
main(sys.argv[1], sys.argv[2])
Prior research:
https://gis.stackexchange.com/questions/177738/count-overlapping-polygons-including-duplicates
https://stackoverflow.com/a/47443399/697964
If gdal_rasterize could burn in the count of all the polygons which intersect with each pixel (rather than a fixed value) that would likely fulfill my needs.
https://github.com/rory/osm-summary-heatmap/blob/main/Makefile
https://old.reddit.com/r/gis/comments/4n2q5v/count_overlapping_polygons_qgis/
heatmapkerneldensity does not work very well or maybe I'm not using it correctly but it seems off
{qgis_process} run qgis:heatmapkerneldensityestimation -- INPUT="{basen}.json.geojson" RADIUS=2868 RADIUS_FIELD=None PIXEL_SIZE=1912 WEIGHT_FIELD=None KERNEL=4 DECAY=0 OUTPUT_VALUE=0 OUTPUT="{basen}.tif
This seems to work. Output is CSV columns=["px_id", "qty"]
"""
python rasterlayerzonalvectorcounts.py grid.tif liechtenstein_grass.sqlite
MIT License
Based on https://github.com/pcjericks/py-gdalogr-cookbook/blob/master/raster_layers.rst#calculate-zonal-statistics
"""
import sys
import osr
import os
import ogr
import numpy
import gdal
import pandas
from joblib import Parallel, delayed
from collections import Counter
def zonal_stats(FID, lyr, raster):
feat = lyr.GetFeature(FID)
# Get raster georeference info
transform = raster.GetGeoTransform()
xOrigin = transform[0]
yOrigin = transform[3]
pixelWidth = transform[1]
pixelHeight = transform[5]
# Reproject vector geometry to same projection as raster
sourceSR = lyr.GetSpatialRef()
targetSR = osr.SpatialReference()
targetSR.ImportFromWkt(raster.GetProjectionRef())
coordTrans = osr.CoordinateTransformation(sourceSR, targetSR)
geom = feat.GetGeometryRef()
geom.Transform(coordTrans)
# Get extent of feat
geom = feat.GetGeometryRef()
xmin, xmax, ymin, ymax = geom.GetEnvelope()
# Specify offset and rows and columns to read
xoff = int((xmin - xOrigin) / pixelWidth)
yoff = int((yOrigin - ymax) / pixelWidth)
xcount = int((xmax - xmin) / pixelWidth) + 1
ycount = int((ymax - ymin) / pixelWidth) + 1
# Create memory target raster
target_ds = gdal.GetDriverByName("MEM").Create(
"", xcount, ycount, 1, gdal.GDT_Int32
)
target_ds.SetGeoTransform(
(
xmin,
pixelWidth,
0,
ymax,
0,
pixelHeight,
)
)
# Create for target raster the same projection as for the value raster
raster_srs = osr.SpatialReference()
raster_srs.ImportFromWkt(raster.GetProjectionRef())
target_ds.SetProjection(raster_srs.ExportToWkt())
# Rasterize zone polygon to raster
gdal.RasterizeLayer(target_ds, [1], lyr, burn_values=[1])
# Read raster as arrays
banddataraster = raster.GetRasterBand(1)
dataraster = banddataraster.ReadAsArray(xoff, yoff, xcount, ycount)
bandmask = target_ds.GetRasterBand(1)
datamask = bandmask.ReadAsArray(0, 0, xcount, ycount)
# Mask zone of raster
zoneraster = numpy.ma.masked_array(dataraster, numpy.logical_not(datamask))
return numpy.array(zoneraster).tolist()
def loop_zonal_stats(input_value_polygon, input_zonal_raster):
shp = ogr.Open(input_value_polygon)
lyr = shp.GetLayer()
print("Processing", lyr.GetFeatureCount(), "features")
featList = range(lyr.GetFeatureCount())
def processFID(input_value_polygon, input_zonal_raster, FID):
shp = ogr.Open(input_value_polygon)
raster = gdal.Open(input_zonal_raster)
lyr = shp.GetLayer()
if FID:
px_ids = zonal_stats(FID, lyr, raster)
# print(px_ids)
px_ids = [item for sublist in px_ids for item in sublist]
return px_ids
return Parallel(n_jobs=8)(
delayed(processFID)(input_value_polygon, input_zonal_raster, FID)
for FID in featList
)
if __name__ == "__main__":
if len(sys.argv) != 3:
print(
"[ ERROR ] you must supply two arguments: input-zone-raster-name.tif input-value-shapefile-name.shp "
)
sys.exit(1)
input_zonal_raster = sys.argv[1]
input_value_polygon = sys.argv[2]
counts = list(
filter(None, loop_zonal_stats(input_value_polygon, input_zonal_raster))
)
counts = Counter([item for sublist in counts for item in sublist])
pandas.DataFrame.from_dict(data=counts, orient="index").to_csv(
os.path.splitext(input_value_polygon)[0] + ".csv", header=False
)
This one will create an output GeoTiff with the same grid system as the source zonal GeoTiff
I wonder if it could be sped up by using What is the purpose of meshgrid in Python / NumPy?
"""
python rasterlayerzonalvectorcounts.py grid.tif liechtenstein_grass.sqlite
MIT License
Based on https://github.com/pcjericks/py-gdalogr-cookbook/blob/master/raster_layers.rst#calculate-zonal-statistics
"""
import sys
import osr
import os
import ogr
import numpy
import gdal
import pandas
from joblib import Parallel, delayed
from collections import Counter
from rich import print, inspect
def zonal_stats(FID, lyr, raster):
feat = lyr.GetFeature(FID)
# Get raster georeference info
transform = raster.GetGeoTransform()
xOrigin = transform[0]
yOrigin = transform[3]
pixelWidth = transform[1]
pixelHeight = transform[5]
# Get extent of feat
geom = feat.GetGeometryRef()
xmin, xmax, ymin, ymax = geom.GetEnvelope()
# Specify offset and rows and columns to read
xoff = int((xmin - xOrigin) / pixelWidth)
yoff = int((yOrigin - ymax) / pixelWidth)
xcount = int((xmax - xmin) / pixelWidth) + 1
ycount = int((ymax - ymin) / pixelWidth) + 1
feat_arr = []
# if xcount != 1 or ycount != 1:
# print(xoff, yoff, xcount, ycount)
for x in range(xcount):
for y in range(ycount):
# print(xoff + x, yoff + y)
feat_arr.append((xoff + x, yoff + y))
return feat_arr
def loop_zonal_stats(input_value_polygon, input_zonal_raster):
shp = ogr.Open(input_value_polygon)
lyr = shp.GetLayer()
print("Processing", lyr.GetFeatureCount(), "features")
featList = range(lyr.GetFeatureCount())
def processFID(input_value_polygon, input_zonal_raster, FID):
shp = ogr.Open(input_value_polygon)
raster = gdal.Open(input_zonal_raster)
lyr = shp.GetLayer()
if FID:
px_ids = zonal_stats(FID, lyr, raster)
return px_ids
return Parallel(n_jobs=1)(
delayed(processFID)(input_value_polygon, input_zonal_raster, FID)
for FID in featList
)
if __name__ == "__main__":
if len(sys.argv) != 3:
print(
"[ ERROR ] you must supply two arguments: input-zone-raster-name.tif input-value-shapefile-name.shp "
)
sys.exit(1)
input_zonal_raster = sys.argv[1]
input_value_polygon = sys.argv[2]
counts = list(
filter(None, loop_zonal_stats(input_value_polygon, input_zonal_raster))
)
counts = Counter([item for sublist in counts for item in sublist])
raster_srs = osr.SpatialReference()
raster_srs.ImportFromWkt(gdal.Open(input_zonal_raster).GetProjectionRef())
raster_arr = numpy.empty((14045, 20960), numpy.int32)
for px in counts.items():
# print(px)
raster_arr[px[0][1]][px[0][0]] = px[1]
target_ds = gdal.GetDriverByName("GTiff").Create(
os.path.splitext(input_value_polygon)[0] + ".tif",
20960,
14045,
1,
gdal.GDT_Int32,
options=["COMPRESS=LZW"],
)
target_ds.SetGeoTransform(gdal.Open(input_zonal_raster).GetGeoTransform())
target_ds.SetProjection(raster_srs.ExportToWkt())
target_ds.GetRasterBand(1).WriteArray(raster_arr)
target_ds.GetRasterBand(1).SetNoDataValue(0)
target_ds.GetRasterBand(1).GetStatistics(0, 1)
target_ds.FlushCache()
target_ds = None

ModuleNotFoundError: No module named 'gtk'

I've tried to follow several solutions on the internet also this one, but no luck. I'm in the process of implementing a object detection system. I am on Windows 10, and using PyCharm using python-3.8
I am getting errors for the packages, I've tried to add them through the package installer and through the terminal, no luck. Here's the error:
Traceback (most recent call last):
File "D:/CabaleGame/runner.py", line 2, in <module>
import processCards
File "D:\CabaleGame\processCards.py", line 1, in <module>
import gtk.gdk
ModuleNotFoundError: No module named 'gtk'
Process finished with exit code 1
I've downloaded the package from here and here, which one is correct?
My program file:
import gtk.gdk
import cv
import time
import os
import string
def takeScreenCapture(screenShotNum = ""):
time.sleep(1)
w = gtk.gdk.get_default_root_window()
sz = w.get_size()
#print "The size of the window is %d x %d" % sz
pb = gtk.gdk.Pixbuf(gtk.gdk.COLORSPACE_RGB,False,8,sz[0],sz[1])
pb = pb.get_from_drawable(w,w.get_colormap(),0,0,0,0,sz[0],sz[1])
# Convert gtk.PixelBuf to a NumPy array
array = pb.get_pixels_array()
# Convert NumPy array to CvMat
mat = cv.fromarray(array)
# Convert RGB to BGR
cv.CvtColor(mat, mat, cv.CV_RGB2BGR)
#cv.ShowImage("win",mat)
#cv.WaitKey(0)
return mat
def getMeaningFromCards(cards):
"""
This takes a dictionary of the form:
(x, y) : Card image
and returns a dictionary of the form:
(x, y) : (number, suit)
(x, y) are the coordinates of the top left of the card
"""
imgdir = "LibraryImages"
templatesNums = os.listdir(os.path.join(imgdir,"Numbers"))
templatesSuits = os.listdir(os.path.join(imgdir,"Suits"))
#templates = filter(lambda s: s[-4:] == ".png", templates)
templatesNums = map(lambda s: os.path.join(imgdir,"Numbers", s), templatesNums)
templatesSuits = map(lambda s: os.path.join(imgdir, "Suits", s), templatesSuits)
for k in cards.keys():
card = cards[k]
cardImg = cv.CreateImageHeader((card.width, card.height), 8, 3)
cv.SetData(cardImg, card.tostring())
numAndSuit3 = cv.GetSubRect(cardImg, (0,0,30,80))
numAndSuit1 = cv.CreateImage((numAndSuit3.width, numAndSuit3.height), 8, 1)
cv.CvtColor(numAndSuit3, numAndSuit1, cv.CV_RGB2GRAY)
# Convert the 1 channel grayscale to 3 channel grayscale
# (GRAY2RGB doesn't actually introduce color)
cv.CvtColor(numAndSuit1, numAndSuit3, cv.CV_GRAY2RGB)
num = findBestTemplateMatch(templatesNums, numAndSuit3)
suit = findBestTemplateMatch(templatesSuits, numAndSuit3)
#print num, suit
# If this image was recognized as a card, but didn't match
# any template, it shouldn't be in the list in the first place
if num == None or suit == None:
del cards[k]
continue
num = string.split(os.path.basename(num), '.')[0]
suit = string.split(os.path.basename(suit), '.')[0]
# The alternate file names have underscores
# after their names
if num[-1] == '_':
num = num[:-1]
if suit[-1] == '_':
suit = suit[:-1]
cards[k] = (num, suit)
#cv.ShowImage("NumandSuit", numAndSuit)
#cv.WaitKey(0)
print
cards
return cards
def findBestTemplateMatch(tplList, img):
"""
Compares img against a list of templates.
tplList is a list of string filenames of template images
Returns a tuple (num, suit) if a template is suitably matched
or None if not
"""
minTpl = 200 # arbitrarily large number
tString = None
for t in tplList:
tpl = cv.LoadImage(t)
w = img.width - tpl.width + 1
h = img.height - tpl.height + 1
result = cv.CreateImage((w,h), 32, 1)
cv.MatchTemplate(img, tpl, result, cv.CV_TM_SQDIFF_NORMED)
(minVal, maxVal, minLoc, maxLoc) = cv.MinMaxLoc(result)
#print t
#print (minVal, maxVal, minLoc, maxLoc)
# 0.2 found by experiment (the non-card images end up being around
# 0.25 - 0.28, and all the card images were much around 0.08 and less
if minVal < minTpl and minVal < 0.2:
minTpl = minVal
tString = t
#print minTpl, tString
#cv.ShowImage("win", img)
#cv.ShowImage("win2", result)
#cv.WaitKey(0)
return tString
def extractCards(fileName = None):
"""
Given an image, this will extract the cards from it.
This takes a filename as an optional argument
This filename should be the name of an image file.
This returns a dictionary of the form:
(x, y) : Card image
It is likely that the output from this will go to the
getMeaningFromCards() function.
"""
if fileName == None:
mat = takeScreenCapture()
else:
mat = cv.LoadImage(fileName)
# First crop the image: but only crop out the bottom.
# It is useful to have all dimensions accurate to the screen
# because otherwise they will throw off the mouse moving and clicking.
# Cropping out the bottom does not change anything in terms of the mouse.
unnec_top_distance = 130
unnec_bottom_distance = 40
margin = 50
submat = cv.GetSubRect(mat, (0,0,mat.width, mat.height - unnec_bottom_distance))
subImg = cv.CreateImageHeader((submat.width, submat.height), 8, 3)
cv.SetData(subImg, submat.tostring())
gray = cv.CreateImage((submat.width, submat.height), 8, 1)
cv.CvtColor(submat, gray, cv.CV_RGB2GRAY)
thresh = 250
max_value = 255
cv.Threshold(gray, gray, thresh, max_value, cv.CV_THRESH_BINARY)
cv.Not(gray,gray)
#cv.ShowImage("sub", submat)
#cv.WaitKey(0)
storage = cv.CreateMemStorage (0)
cpy = cv.CloneImage(gray)
contours = cv.FindContours( cpy, storage, cv.CV_RETR_LIST, cv.CV_CHAIN_APPROX_SIMPLE, (0,0) );
#contours = cv.ApproxPoly(contours, cv.CreateMemStorage(), cv.CV_POLY_APPROX_DP, 3, 1)
bboxes = []
if contours:
while(contours):
area = cv.ContourArea(contours)
# It turns out that all the cards are about 44000 in area...
# It would definitely be nice to have a better way to do this:
# ie, find the size of the card programmatically and use it then
if(area > 44000 and area < submat.width*submat.height*2/3):
bb = cv.BoundingRect(contours)
bboxes.append(bb)
contours = contours.h_next()
#drawBoundingBoxes(bboxes, submat)
# cards is a dictionary of the form:
# (x, y) : card
cards = {}
for box in bboxes:
card = cv.GetSubRect(subImg, box)
#cv.ShowImage("card", card)
#cv.WaitKey(0)
cards[(box[0], box[1])] = card
return cards
def drawBoundingBoxes(bb, img):
for b in bb:
x = b[0]
y = b[1]
width = b[2]
height = b[3]
cv.Rectangle(img, (x,y), (x+width, y+height), (0,255,0,0))
cv.ShowI
mage("bb", img)
cv.WaitKey(0)
def drawSquares(listWithPoints,img):
for l in listWithPoints:
for p in range(len(l)-1):
cv.Line(img, l[p], l[p+1], (0,0,255,0),2)
cv.Line(img, l[-1], l[0], (0,0,255,0),2)
#cv.ShowImage("sub", img)
#cv.WaitKey(0)
def contourToPointList(contour):
plist = []
for (x,y) in contour:
plist.append((x,y))
return plist
if __name__ == '__main__':
cards = extractCards('CardImages/4_heart.jpg')
print
cards
#c = cards[cards.keys()[0]]
#print c
Is it possible to add it manually through the folders?
Would it work if i put the package her:
C:\Users\User1\AppData\Local\Programs\Python\Python38\include
From python documentation :
Replace import gtk.gdk by :
import gi
gi.require_version("Gtk", "insert your gtk version")
from gi.repository import Gtk

OpenCV + Numpy Script

The issue I'm having is that the two scripts below are both outputting this error: https://i.imgur.com/sLH6Mv4.png
TypeError: FeatureDetector.detect() takes at most 2 arguments (3 given)
which I can avoid in the script 2 below by deleting:
useProvidedKeypoints = False
from the end of
kp, descritors = surf.detect(imgg,None,useProvidedKeypoints = False)
which leads to this error in the 2nd script :https://i.imgur.com/ap0odal.png
TypeError: float() argument must be a string or a number
And this error in the first script: i.imgur.com/UVzNvP1.png (2 link limit add manually)
TypeError: trainData data type = 17 is not supported
Any help would be greatly appreciated and the main thing I want to come out of this is with a script I can tweak and edit till I understand the functions involved slightly better.
Summary; I'm not really sure why kp, descritors = surf.detect(imgg,None,useProvidedKeypoints = False) is telling me there too many arguments because the person who helped me write this seemed to think this should work.
1
import cv2
import numpy as np
img =cv2.imread('win18.jpg')
imgg =cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
surf = cv2.SURF()
kp, descritors = surf.detect(imgg,None,useProvidedKeypoints = False)
samples = np.array(descritors)
responses = np.arange(len(kp),dtype = np.float32)
knn = cv2.KNearest()
knn.train(samples,responses)
template = cv2.imread('win17.jpg')
templateg= cv2.cvtColor(template,cv2.COLOR_BGR2GRAY)
keys,desc = surf.detect(templateg,None,useProvidedKeypoints = False)
for h,des in enumerate(desc):
des = np.array(des,np.float32).reshape((1,128))
retval, results, neigh_resp, dists = knn.find_nearest(des,1)
res,dist = int(results[0][0]),dists[0][0]
if dist<0.1:
color = (0,0,255)
else:
print dist
color = (255,0,0)
x,y = kp[res].pt
center = (int(x),int(y))
cv2.circle(img,center,2,color,-1)
x,y = keys[h].pt
center = (int(x),int(y))
cv2.circle(template,center,2,color,-1)
cv2.imshow('img',img)
cv2.imshow('tm',template)
cv2.waitKey(0)
cv2.destroyAllWindows()
2
import cv2
import numpy
opencv_haystack =cv2.imread('win12.jpg')
opencv_needle =cv2.imread('win1.jpg')
ngrey = cv2.cvtColor(opencv_needle, cv2.COLOR_BGR2GRAY)
hgrey = cv2.cvtColor(opencv_haystack, cv2.COLOR_BGR2GRAY)
hessian_threshold = 85
detector = cv2.SURF(hessian_threshold)
(hkeypoints, hdescriptors) = detector.detect(hgrey, None, useProvidedKeypoints = False)
(nkeypoints, ndescriptors) = detector.detect(ngrey, None, useProvidedKeypoints = False)
rowsize = len(hdescriptors) / len(hkeypoints)
if rowsize > 1:
hrows = numpy.array(hdescriptors, dtype = numpy.float32).reshape((-1, rowsize))
nrows = numpy.array(ndescriptors, dtype = numpy.float32).reshape((-1, rowsize))
else:
hrows = numpy.array(hdescriptors, dtype = numpy.float32)
nrows = numpy.array(ndescriptors, dtype = numpy.float32)
rowsize = len(hrows[0])
samples = hrows
responses = numpy.arange(len(hkeypoints), dtype = numpy.float32)
knn = cv2.KNearest()
knn.train(samples,responses)
if dist < 0.1:
color = (0, 0, 255)
else:
color = (255, 0, 0)
x,y = hkeypoints[res].pt
center = (int(x),int(y))
cv2.circle(opencv_haystack,center,2,color,-1)
x,y = nkeypoints[i].pt
center = (int(x),int(y))
cv2.circle(opencv_needle,center,2,color,-1)
cv2.imshow('haystack',opencv_haystack)
cv2.imshow('needle',opencv_needle)
cv2.waitKey(0)
cv2.destroyAllWindows()
Hi I know it's late but for the ones still facing the problem, try replacing detect() with detectAndCompute().
I got the error removed this way.
when in doubt, ...
>>> s = cv2.SURF()
>>> help(s.detect)
Help on built-in function detect:
detect(...)
detect(image[, mask]) -> keypoints
so, your assumptions about the args to SURF.detect() were quite off.

3d reconstruction and distance measurment

hi guys I have been working on a small program in python using the opencv library and two webcams so that I can measure the distance between this last tow and the object right in front of them(using the disparity map) ,so when I run the program at the end I normally I get the result in a matrix but what I get is not only one number(which is supposed to be the distance) but many different numbers,besides I always get one number which doesn't change even if I change the view can any body tell me why?!
here is the code:
import numpy as np
import cv2 as cv
import cv2.cv as cv1
from VideoCapture import Device
import os
def caliLeftCam():
args, img_mask = getopt.getopt(sys.argv[1:], '', ['save=', 'debug=', 'square_size='])
args = dict(args)
try: img_mask = img_mask[0]
except: img_mask = '../cpp/img*.jpg'
img_names = glob(img_mask)
debug_dir = args.get('--debug')
square_size = float(args.get('--square_size', 1.0))
pattern_size = (7, 5)
pattern_points = np.zeros( (np.prod(pattern_size), 3), np.float32 )
pattern_points[:,:2] = np.indices(pattern_size).T.reshape(-1, 2)
pattern_points *= square_size
obj_points = []
img_pointsL = []
h, w = 0, 0
for fn in img_names:
print "processing %s..." % fn,
imgL = cv.imread(fn, 0)
h, w = imgL.shape[:2]
found, corners = cv.findChessboardCorners(imgL, pattern_size)
if found:
term = ( cv.TERM_CRITERIA_EPS + cv.TERM_CRITERIA_COUNT, 30, 0.1 )
cv.cornerSubPix(imgL, corners, (5, 5), (-1, -1), term)
if debug_dir:
vis = cv.cvtColor(imgL, cv.COLOR_GRAY2BGR)
cv.drawChessboardCorners(vis, pattern_size, corners, found)
path, name, ext = splitfn(fn)
cv.imwrite('%s/%s_chess.bmp' % (debug_dir, name), vis)
if not found:
print "chessboard not found"
continue
img_pointsL.append(corners.reshape(-1, 2))
obj_points.append(pattern_points)
print 'ok'
rmsL, cameraL_matrix, dist_coefsL, rvecsL, tvecsL = cv.calibrateCamera(obj_points, img_pointsL, (w, h))
print "RMSL:", rmsL
print "Left camera matrix:\n", cameraL_matrix
print "distortion coefficients: ", dist_coefsL.ravel()
newcameramtxL, roi=cv.getOptimalNewCameraMatrix(cameraL_matrix,dist_coefsL,(w,h),1,(w,h))
#undistort
mapxL,mapyL = cv.initUndistortRectifyMap(cameraL_matrix,dist_coefsL,None,newcameramtxL,(w,h),5)
dstL = cv.remap(imgL,mapxL,mapyL,cv.INTER_LINEAR)
return img_pointsL, cameraL_matrix, dist_coefsL
def caliRightCam():
args, img_mask = getopt.getopt(sys.argv[1:], '', ['save=', 'debug=', 'square_size='])
args = dict(args)
try: img_mask = img_mask[0]
except: img_mask = '../cpp/ph*.jpg'
img_names = glob(img_mask)
debug_dir = args.get('--debug')
square_size = float(args.get('--square_size', 1.0))
pattern_size = (7, 5)
pattern_points = np.zeros( (np.prod(pattern_size), 3), np.float32 )
pattern_points[:,:2] = np.indices(pattern_size).T.reshape(-1, 2)
pattern_points *= square_size
obj_points = []
img_pointsR = []
h, w = 0, 0
for fn in img_names:
print "processing %s..." % fn,
imgR = cv.imread(fn, 0)
h, w = imgR.shape[:2]
found, corners = cv.findChessboardCorners(imgR, pattern_size)
if found:
term = ( cv.TERM_CRITERIA_EPS + cv.TERM_CRITERIA_COUNT, 30, 0.1 )
cv.cornerSubPix(imgR, corners, (5, 5), (-1, -1), term)
if debug_dir:
vis = cv.cvtColor(img, cv2.COLOR_GRAY2BGR)
cv.drawChessboardCorners(vis, pattern_size, corners, found)
path, name, ext = splitfn(fn)
cv.imwrite('%s/%s_chess.bmp' % (debug_dir, name), vis)
if not found:
print "chessboard not found"
continue
img_pointsR.append(corners.reshape(-1, 2))
obj_points.append(pattern_points)
print 'ok'
rmsR, cameraR_matrix, dist_coefsR, rvecsR, tvecsR = cv.calibrateCamera(obj_points, img_pointsR, (w, h))
print "RMSR:", rmsR
print "Right camera matrix:\n", cameraR_matrix
print "distortion coefficients: ", dist_coefsR.ravel()
newcameramtxR, roi=cv.getOptimalNewCameraMatrix(cameraR_matrix,dist_coefsR,(w,h),1,(w,h))
# undistort
mapxR,mapyR = cv.initUndistortRectifyMap(cameraR_matrix,dist_coefsR,None,newcameramtxR,(w,h),5)
dstR = cv.remap(imgR,mapxR,mapyR,cv.INTER_LINEAR)
return img_pointsR,obj_points, cameraR_matrix, dist_coefsR
def Pics():
vc = cv.VideoCapture(2)
retVal, frame = vc.read();
while True :
if frame is not None:
imgray = cv.cvtColor(frame,cv.COLOR_BGR2GRAY)
ret,thresh = cv.threshold(imgray,127,255,1)
contours, hierarchy = cv.findContours(thresh,cv.RETR_EXTERNAL,cv.CHAIN_APPROX_SIMPLE)
cv.namedWindow("threshold")
cv.namedWindow("Camera")
#cv2.drawContours(frame, contours, -1, (0,255,0), 2)
cv.imshow("Camera", frame)
cv.imshow("threshold", thresh)
rval, frame = vc.read()
if cv.waitKey(1) & 0xFF == 27:
break
cv1.DestroyAllWindows()
def LeftCap():
cam = Device(2)
cam.saveSnapshot('imageL.jpg')
fn = 'C:\opencv2.4.8\sources\samples\python2\imageL.jpg'
return fn
def RightCap():
cam = Device(0)
cam.saveSnapshot('imageR.jpg')
fn = 'C:\opencv2.4.8\sources\samples\python2\imageR.jpg'
return fn
def Calculate(Li, Ri, Mat):
img_L = cv.pyrDown( cv.imread(Li) )
img_R = cv.pyrDown( cv.imread(Ri) )
window_size = 3
min_disp = 16
num_disp = 112-min_disp
stereo = cv.StereoSGBM(minDisparity = min_disp,
numDisparities = num_disp,
SADWindowSize = window_size,
uniquenessRatio = 10,
speckleWindowSize = 100,
speckleRange = 32,
disp12MaxDiff = 1,
P1 = 8*3*window_size**2,
P2 = 32*3*window_size**2,
fullDP = False
)
print "computing disparity..."
disp = stereo.compute(img_L, img_R).astype(np.float32) / 16.0
print "generating 3d point cloud..."
h, w = img_L.shape[:2]
f = 0.8*w # guess for focal length
points = cv.reprojectImageTo3D(disp, Mat)
colors = cv.cvtColor(img_L, cv.COLOR_BGR2RGB)
mask = disp > disp.min()
cv.imshow('left', img_L)
cv.imshow('disparity', (disp-min_disp)/num_disp)
b=6.50
D=b*f/disp
print "The Distance =", D
cv.waitKey()
cv1.DestroyAllWindows()
if __name__ == '__main__':
import sys, getopt
from glob import glob
Img_pointsL, Cam_MatL, DisL = caliLeftCam()
Img_pointsR,obj_points, Cam_MatR, DisR = caliRightCam()
print "Running stereo calibration..."
retval, Cam_MatL, DisL, Cam_MatR, DisR, R, T, E, F= cv.stereoCalibrate(obj_points, Img_pointsL, Img_pointsR,(384,288))
print"running rectifation..."
RL, Rr, PL, PR, Q, validRoiL, validRoiR = cv.stereoRectify(Cam_MatL, DisL, Cam_MatR, DisR,(384,288), R, T)
Pics()
Li = LeftCap()
Ri = RightCap()
Calculate(Li, Ri, Q)
Not sure if that is a problem, but you call cv.StereoRectify() function using cv2.StereoRectify() not cv2.cv.StefeoRectify() and you provided wrong arguments to it.
From documentation:
cv.StereoRectify(cameraMatrix1, cameraMatrix2, distCoeffs1, distCoeffs2, imageSize, R, T, R1, R2, P1, P2, Q=None, flags=CV_CALIB_ZERO_DISPARITY, alpha=-1, newImageSize=(0, 0))
You did it that(wrong) way:
cv.stereoRectify(cameraMatrix1, distCoeffs1, cameraMatrix2, distCoeffs2...)

Categories