I developed a TensorFlow model with one class (it had a loss of 0.03 and was trained on 680 labelled images.) I am trying to use this model to detect the object on every video frame. However, whenever I run my code, it detects something in the top left of the screen in the black border surrounding the video. I tried changing the model from one trained with mobile net to one with efficientdet D3, and the same issue persisted. I then tried changing my code to require a minisize and a higher score. I have also tried letting it make multiple detections and use the one with the highest score. However, with all of these conditions, it didn't detect anything that fulfilled the requirements. my code is as follows:
import os
import time
import tensorflow as tf
import cv2
import scipy
import math
import pandas as pd
import numpy as np
from PIL import Image
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as viz_utils
from base64 import b64encode
#os.chdir("C:\\Users\\Ibrahim\\desktop")
PATH_TO_SAVED_MODEL = "C:/Users/Ibrahim/Desktop/fine_tuned_model/content/fine_tuned_model/saved_model"
# Load label map and obtain class names and ids
#label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
category_index=label_map_util.create_category_index_from_labelmap("C:\\Users\\Ibrahim\\Desktop\\customTF2-20221225T123609Z-001\\customTF2\\data\\label_map.pbtxt",use_display_name=True)
file = "CL_1_S0003.mp4"
video = cv2.VideoCapture(file)
ret,frame=video.read()
#getting the walls bbox
wall_bbox = cv2.selectROI(frame)
(x_wall,y_wall,x2_wall,y2_wall) = wall_bbox
print(wall_bbox)
num_cont_frames=0
#video = cv.VideoCapture(path)
ball_size = 0.22 #diameter of a regulation ball in meters
fps_cam = 10000 # Change this to the required fps of the video
fps_vid =video.get(cv2.CAP_PROP_FPS)
fps_time= fps_vid / fps_cam
#print(fps_time)
model = tf.saved_model.load(PATH_TO_SAVED_MODEL)
signature = list(model.signatures.values())[0]
# Initialize variable to track state of ball (in contact with wall or not)
in_contact = False
# Initialize variable to track whether in_contact has ever been True
in_contact_ever = False
# Initialize lists for inbound and outbound velocities
inbound_velocities = []
outbound_velocities = []
# Calculate time interval between frames in seconds
time_interval = 1 / fps_cam
scale = []
x_list = []
y_list = []
x_def=[]
inbound_x = []
inbound_y = []
outbound_x = []
outbound_y = []
w1=[]
score_thresh = 0.8 # Minimum threshold for object detection
max_detections = 20
while True:
# Read frame from video
ret, frame = video.read()
if not ret:
break
# Add a batch dimension to the frame tensor
frame_tensor = tf.expand_dims(frame, axis=0)
# Get detections for image
detections = signature(frame_tensor) # Replace this with a call to your TensorFlow model's predict method
scores = detections['detection_scores'][0, :max_detections].numpy()
bboxes = detections['detection_boxes'][0, :max_detections].numpy()
labels = detections['detection_classes'][0, :max_detections].numpy().astype(np.int64)
labels = [category_index[n]['name'] for n in labels]
# Initialize variables to keep track of the maximum score and corresponding bounding box
max_score = 0
selected_bbox = None
# Loop through all bounding boxes
for bbox, score in zip(bboxes, scores):
# Check if the score is greater than the current maximum score
if score > max_score:
# Update the maximum score and corresponding bounding box
max_score = score
selected_bbox = bbox
# Check if a bounding box was selected
if selected_bbox is not None:
# Extract bounding box coordinates
(x, y, w, h) = selected_bbox
# Filter out bounding boxes that are too small (smaller than a minimum size)
if w >= 10 and h >= 10:
# Draw bounding box on frame
cv2.rectangle(frame, (int(x), int(y)), (int(x+w), int(y+h)), (0,255,0), 20, 1)
cv2.imshow('Frame', frame)
cv2.waitKey(1)
x2=x+w
y2=y+h
# Calculate center point of bounding box
x_center = (x + x2) / 2
y_center = (y + y2) / 2
# Append x and y center points to lists
x_list.append(x_center)
y_list.append(y_center)
w1.append(w)
# Calculate other variables and metrics using bbox
scale.append(ball_size/h) #meters per pixel.diameter in pixels or coordinate value / real diameter in m to give pixel per m for a scale factor
#x_list.append(x2) #list of x positions of right edge
#y_list.append(y2)
if (x_center - w) < max(x2_wall, x_wall): #sometimes the bbox is the wrong way around
# Set in_contact to True
in_contact = True
# Set in_contact_ever to True
in_contact_ever = True
# Increment counter
num_cont_frames = num_cont_frames + 1
x_defe = x2-x2_wall
x_def.append(x_defe)
else:
in_contact = False
if in_contact == False and in_contact_ever==False:
inbound_x.append(x_center) #list of x positions at center of ball
inbound_y.append(y_center) #list of y positions at center of ball
if in_contact == False and in_contact_ever==True:
outbound_x.append(x_center) #list of x positions of right edge
outbound_y.append(x_center)
print(outbound_x)
else:
cv2.putText(frame,'Error',(100,0),cv2.FONT_HERSHEY_SIMPLEX,1,(0,0,255),2)
cv2.imshow('Tracking',frame)
if cv2.waitKey(1) & 0XFF==27:
break
cv2.destroyAllWindows()
scale_ave=scipy.stats.trim_mean(scale, 0.2) #trim_mean 20% either way to remove some extrainious results
x_diff=[]
y_diff=[]
x_len=len(x_list)-1 #minus 1 as python starts with 0 so we dont overflow
for i in range(x_len):
x_diff.append(x_list[i]-x_list[i+1]) #find x distance per frame
for i in range(x_len):
y_diff.append(y_list[i]-y_list[i+1]) #find y distance per frame
pyth_dist=[]
pyth_sub=[]
x2_len=len(x_diff)-1
x_speed=[]
y_speed=[]
for i in range(x2_len):
x_speeds=x_diff[i]*scale_ave*fps_cam
x_speed.append(x_speeds)
y_speeds=y_diff[i]*scale_ave*fps_cam
y_speed.append(y_speeds)
pyth_sub=math.hypot(x_diff[i] , y_diff[i])
pyth_dist.append(pyth_sub) #do pythagoras to find pixel distance per frame
realdist=[]
speed=[]
for i in range(x2_len):
realdistcalc=(pyth_dist[i]*scale_ave)
realdist.append(realdistcalc) # change from pixels to meters
for item in realdist:
if item > 1:
realdist.remove(item)
distlen=len(realdist)-1
for i in range(distlen):
speedcalc=realdist[i]*fps_cam
speed.append(speedcalc)
contact_time=num_cont_frames/fps_cam
print(contact_time)
if x_def:
realxdef = min(x_def)*scale_ave
else:
realxdef = 0
print(realxdef)
# Calculate inbound velocities
inbound_x_diff = []
inbound_y_diff = []
# Calculate inbound x- and y-velocities
inbound_x_velocities = []
inbound_y_velocities = []
inbound_len = len(inbound_x) - 1
# Calculate differences between consecutive x and y coordinates
for i in range(inbound_len):
inbound_x_diff.append(inbound_x[i] - inbound_x[i + 1])
inbound_y_diff.append(inbound_y[i] - inbound_y[i + 1])
# Calculate inbound velocities in meters per second
inbound_velocities = []
for i in range(inbound_len):
inbound_x_velocity = inbound_x_diff[i] * scale_ave * fps_cam
inbound_x_velocities.append(inbound_x_velocity)
inbound_y_velocity = inbound_y_diff[i] * scale_ave * fps_cam
inbound_y_velocities.append(inbound_y_velocity)
inbound_velocity = math.hypot(inbound_x_diff[i], inbound_y_diff[i]) * scale_ave * fps_cam
inbound_velocities.append(inbound_velocity)
# Calculate outbound velocities
outbound_x_diff = []
outbound_y_diff = []
outbound_len = len(outbound_x) - 1
# Calculate differences between consecutive x and y coordinates
for i in range(outbound_len):
outbound_x_diff.append(outbound_x[i] - outbound_x[i + 1])
outbound_y_diff.append(outbound_y[i] - outbound_y[i + 1])
# Calculate outbound velocities in meters per second
outbound_velocities = []
outbound_x_velocities = []
outbound_y_velocities = []
for i in range(outbound_len):
outbound_x_velocity = outbound_x_diff[i] * scale_ave * fps_cam
outbound_x_velocities.append(outbound_x_velocity)
outbound_y_velocity = outbound_y_diff[i] * scale_ave * fps_cam
outbound_y_velocities.append(outbound_y_velocity)
outbound_velocity = math.hypot(outbound_x_diff[i], outbound_y_diff[i]) * scale_ave * fps_cam
outbound_velocities.append(outbound_velocity)e
I expected a bounding box around the ball. I tried changing the model, increasing the number of maximum detections, adding a minimum size to the detections, and increasing the required score for detection.
Related
I'm attempting to extend the 'tail' of an arrow. So far I've been able to draw a line through the center of the arrow, but this line extends 'both' ways, rather than in just one direction. The script below shows my progress. Ideally I would be able to extend the tail of the arrow regardless of the orientation of the arrow image. Any suggestions on how to accomplish this. Image examples below, L:R start, progress, goal.
# import image and grayscale
image = cv2.imread("image path")
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
cv2.imshow("original",image)
# inverts black and white
gray = 255 - image
cv2.imshow("Inverted", gray)
# Extend the borders for the line
extended = cv2.copyMakeBorder(gray, 20, 20, 10, 10, cv2.BORDER_CONSTANT)
cv2.imshow("extended borders", extended)
# contour finding
contours, hierarchy = cv2.findContours(extended, 1, 2)
cont = contours[0]
rows,cols = extended.shape[:2]
[vx,vy,x,y] = cv2.fitLine(cont, cv2.DIST_L2,0,0.01,0.01)
leftish = int((-x*vy/vx) + y)
rightish = int(((cols-x)*vy/vx)+y)
line = cv2.line(extended,(cols-1,rightish),(0,leftish),(255,255,255), 6)
cv2.imshow("drawn line", line)
"Moments" can be strange things. They're building blocks and show up most often in statistics.
It helps to have a little background in statistics, and see the application of those calculations to image data, which can be considered a set of points. If you've ever calculated the weighted average or "centroid" of something, you'll recognize some of the sums that show up in "moments".
Higher order moments can be building blocks to higher statistical measures such as covariance and skewness.
Using covariance, you can calculate the major axis of your set of points, or your arrow in this case.
Using skewness, you can figure out which side of a distribution is heavier than the other... i.e. which side is the arrow's tip and which is its tail.
This should give you a very precise angle. The scale/radius however is best estimated using other ways. You'll notice that the radius estimated from the area of the arrow fluctuates a little. You could find the points belonging to the arrow that are furthest away from the center, and take that as a somewhat stable length.
Here's a longish program that implements the two ideas above and shows the direction of an arrow:
#!/usr/bin/env python3
import os
import sys
import numpy as np
import cv2 as cv
# utilities to convert between 2D vectors and complex numbers
# complex numbers are handy for rotating stuff
def to_complex(vec):
assert vec.shape[-1] == 2
if vec.dtype == np.float32:
return vec.view(np.complex64)
elif vec.dtype == np.float64:
return vec.view(np.complex128)
else:
assert False, vec.dtype
def from_complex(cplx):
if cplx.dtype == np.complex64:
return cplx.view(np.float32)
elif cplx.dtype == np.complex128:
return cplx.view(np.float64)
else:
assert False, cplx.dtype
# utilities for drawing with fractional bits of position
# just to make a pretty picture
def iround(val):
return int(round(val))
def ipt(vec, shift=0):
if isinstance(vec, (int, float)):
return iround(vec * 2**shift)
elif isinstance(vec, (tuple, list, np.ndarray)):
return tuple(iround(el * 2**shift) for el in vec)
else:
assert False, type(vec)
# utilities for affine transformation
# just to make a pretty picture
def rotate(degrees=0):
# we want positive rotation
# meaning move +x towards +y
# getRotationMatrix2D does it differently
result = np.eye(3).astype(np.float32)
result[0:2, 0:3] = cv.getRotationMatrix2D(center=(0,0), angle=-degrees, scale=1.0)
return result
def translate(dx=0, dy=0):
result = np.eye(3).astype(np.float32)
result[0:2,2] = [dx, dy]
return result
# main logic
def calculate_direction(im):
# using "nonzero" (default behavior) is a little noisy
mask = (im >= 128)
m = cv.moments(mask.astype(np.uint8), binaryImage=True)
# easier access... see below for details
m00 = m['m00']
m10 = m['m10']
m01 = m['m01']
mu00 = m00
mu20 = m['mu20']
mu11 = m['mu11']
mu02 = m['mu02']
nu30 = m['nu30']
nu03 = m['nu03']
# that's just the centroid
cx = m10 / m00
cy = m01 / m00
centroid = np.array([cx, cy]) # as a vector
# and that's the size in pixels:
size = m00
# and that's an approximate "radius", if it were a circle which it isn't
radius = (size / np.pi) ** 0.5
# (since the "size" in pixels can fluctuate due to resampling, so will the "radius")
# wikipedia helpfully mentions "image orientation" as an example:
# https://en.wikipedia.org/wiki/Image_moment#Examples_2
# we'll use that for the major axis
mup20 = mu20 / mu00
mup02 = mu02 / mu00
mup11 = mu11 / mu00
theta = 0.5 * np.arctan2(2 * mup11, mup20 - mup02)
#print(f"angle: {theta / np.pi * 180:+6.1f} degrees")
# we only have the axis, not yet the direction
# we will assess "skewness" now
# https://en.wikipedia.org/wiki/Skewness#Definition
# note how "positive" skewness appears in a distribution:
# it points away from the heavy side, towards the light side
# fortunately, cv.moments() also calculates those "standardized moments"
# https://en.wikipedia.org/wiki/Standardized_moment#Standard_normalization
skew = np.array([nu30, nu03])
#print("skew:", skew)
# we'll have to *rotate* that so it *roughly* lies along the x axis
# then assess which end is the heavy/light end
# then use that information to maybe flip the axis,
# so it points in the direction of the arrow
skew_complex = to_complex(skew) # reinterpret two reals as one complex number
rotated_skew_complex = skew_complex * np.exp(1j * -theta) # rotation
rotated_skew = from_complex(rotated_skew_complex)
#print("rotated skew:", rotated_skew)
if rotated_skew[0] > 0: # pointing towards tail
theta = (theta + np.pi) % (2*np.pi) # flip direction 180 degrees
else: # pointing towards head
pass
print(f"angle: {theta / np.pi * 180:+6.1f} degrees")
# construct a vector that points like the arrow in the picture
direction = np.exp([1j * theta])
direction = from_complex(direction)
return (radius, centroid, direction)
def draw_a_picture(im, radius, centroid, direction):
height, width = im.shape[:2]
# take the source at half brightness
canvas = cv.cvtColor(im // 2, cv.COLOR_GRAY2BGR)
shift = 4 # prettier drawing
cv.circle(canvas,
center=ipt(centroid, shift),
radius=ipt(radius, shift),
thickness=iround(radius * 0.1),
color=(0,0,255),
lineType=cv.LINE_AA,
shift=shift)
# (-direction) meaning point the *opposite* of the arrow's direction, i.e. towards tail
cv.line(canvas,
pt1=ipt(centroid + direction * radius * -3.0, shift),
pt2=ipt(centroid + direction * radius * +3.0, shift),
thickness=iround(radius * 0.05),
color=(0,255,255),
lineType=cv.LINE_AA,
shift=shift)
cv.line(canvas,
pt1=ipt(centroid + (-direction) * radius * 3.5, shift),
pt2=ipt(centroid + (-direction) * radius * 4.5, shift),
thickness=iround(radius * 0.15),
color=(0,255,255),
lineType=cv.LINE_AA,
shift=shift)
return canvas
if __name__ == '__main__':
imfile = sys.argv[1] if len(sys.argv) >= 2 else "p7cmR.png"
src = cv.imread(imfile, cv.IMREAD_GRAYSCALE)
src = 255 - src # invert (white arrow on black background)
height, width = src.shape[:2]
diagonal = np.hypot(height, width)
outsize = int(np.ceil(diagonal * 1.3)) # fudge factor
cv.namedWindow("arrow", cv.WINDOW_NORMAL)
cv.resizeWindow("arrow", 5*outsize, 5*outsize)
angle = 0 # degrees
increment = +1
do_spin = True
while True:
print(f"{angle:+.0f} degrees")
M = translate(dx=+outsize/2, dy=+outsize/2) # rotate(degrees=angle) # translate(dx=-width/2, dy=-height/2)
im = cv.warpAffine(src, M=M[:2], dsize=(outsize, outsize), flags=cv.INTER_CUBIC, borderMode=cv.BORDER_REPLICATE)
# resampling introduces blur... except when it's an even number like 0 degrees, 90 degrees, ...
# so at even rotations, things will jump a little.
# this rotation is only for demo purposes
(radius, centroid, direction) = calculate_direction(im)
canvas = draw_a_picture(im, radius, centroid, direction)
cv.imshow("arrow", canvas)
if do_spin:
angle = (angle + increment) % 360
print()
key = cv.waitKeyEx(30 if do_spin else -1)
if key == -1:
continue
elif key in (0x0D, 0x20): # ENTER (CR), SPACE
do_spin = not do_spin # toggle spinning
elif key == 27: # ESC
break # end program
elif key == 0x250000: # VK_LEFT
increment = -abs(increment)
angle += increment
elif key == 0x270000: # VK_RIGHT
increment = +abs(increment)
angle += increment
else:
print(f"key 0x{key:02x}")
cv.destroyAllWindows()
I'm training a yolo network to detect ball soccer, to learn more and understand better this architecture. I can detect the ball in most cases. My problem here is that the bouding box generated is too big, covering much more pixels than the actual ball.
I know the parameter anchor boxes influence on the bouding-boxes. I've experimented with a few config values and also tried a code to generate the anchors. The anchors generate by the code are too small and often we don't even see the box, only the title. I have also thought that this isn't the best approach to deal with a one class problem, because we use the k-means to generate that anchors. When I tried change the values the cover region was completly wrong, but the box was really small, covering less than the original anchors.
There is my code to gen the anchors. I remove some methods to let the code more clean.
def __init__(self, dir_images, dir_labels, width = 416, height = 416, anchors = 9):
# dir_images: Directory that contains all images that will be used or
# already has used to training the neural network
# Example: /home/enacom/Desktop/darknet_football_resized_images/
#dir_labels: Directory that contains all labels that will be used or]
# already has used to training the neural network
# Example: /home/enacom/Desktop/BBoxLabelTool/Images/labels6/
# anchors: the number of anchor that you desire
# default value is 9 on yoloV3, but you can add more if want detect more objects
self.dir_images = dir_images
self.dir_labels = dir_labels
self.grid_w = width/32
self.grid_h = height/32
self.anchors = anchors
self.annotation_dims = []
file_list = os.listdir(self.dir_images)
boxes = self.__read_labels()
for file in file_list:
actual_dir = self.dir_images + file
try:
image = cv2.imread(actual_dir)
width, height = image.shape[:2]
cell_w = width / self.grid_w
cell_h = height / self.grid_h
box = boxes[file.replace("jpg", "txt")]
relative_w = (float(box[0][2]) - float(box[0][0])) # / cell_w
relatice_h = (float(box[0][3]) - float(box[0][1])) # / cell_h
self.annotation_dims.append(tuple(map(float, (relative_w,relatice_h))))
except AttributeError:
print("Failure when reading image")
self.annotation_dims = np.array(self.annotation_dims)
print(self.annotation_dims / 32)
def IOU(self, ann, centroids):
w, h = ann
similarities = []
for centroid in centroids:
c_w, c_h = centroid
if c_w >= w and c_h >= h:
similarity = w * h / (c_w * c_h)
elif c_w >= w and c_h <= h:
similarity = w * c_h / (w * h + (c_w - w) * c_h)
elif c_w <= w and c_h >= h:
similarity = c_w * h / (w * h + c_w * (c_h - h))
else: # means both w,h are bigger than c_w and c_h respectively
similarity = (c_w * c_h) / (w * h)
similarities.append(similarity) # will become (k,) shape
return np.array(similarities)
def run_kmeans(self):
ann_num = self.annotation_dims.shape[0]
iterations = 0
prev_assignments = np.ones(ann_num) * (-1)
iteration = 0
old_distances = np.zeros((ann_num, self.anchors))
indices = [random.randrange(self.annotation_dims.shape[0]) for i in range(self.anchors)]
centroids = self.annotation_dims[indices]
anchor_dim = self.annotation_dims.shape[1]
while True:
distances = []
iteration += 1
for i in range(ann_num):
d = 1 - self.IOU(self.annotation_dims[i], centroids)
distances.append(d)
distances = np.array(distances) # distances.shape = (ann_num, anchor_num)
print("iteration {}: dists = {}".format(iteration, np.sum(np.abs(old_distances - distances))))
# assign samples to centroids
assignments = np.argmin(distances, axis=1)
if (assignments == prev_assignments).all():
return centroids
# calculate new centroids
centroid_sums = np.zeros((self.anchors, anchor_dim), np.float)
for i in range(ann_num):
centroid_sums[assignments[i]] += self.annotation_dims[i]
for j in range(self.anchors):
centroids[j] = centroid_sums[j] / (np.sum(assignments == j) + 1e-6)
prev_assignments = assignments.copy()
old_distances = distances.copy()
a = AnchorGenerator("/home/myDir/Desktop/BBoxLabelTool/Images/5_images_clean/",
"/home/myDir/Desktop/BBoxLabelTool/Labels/005/",
anchors = 9)
centroids = a.run_kmeans()
a.print_anchors(centroids)
Result with original anchors: https://imgur.com/a/5OhiTfl
Result with created my anchors: https://imgur.com/a/YCW2aam
In the case of created by hand anchor, it's close to be right, but I don't know how to do it more precisely. This is the anchors I used that generate this result.
anchors = 3,1, 7,4 5,5, 12,12, 15,15, 16,16, 18,17
I'm currently attempting to implement this algorithm for volume rendering in Python, and am conceptually confused about their method of generating the LH histogram (see section 3.1, page 4).
I have a 3D stack of DICOM images, and calculated its gradient magnitude and the 2 corresponding azimuth and elevation angles with it (which I found out about here), as well as finding the second derivative.
Now, the algorithm is asking me to iterate through a set of voxels, and "track a path by integrating the gradient field in both directions...using the second order Runge-Kutta method with an integration step of one voxel".
What I don't understand is how to use the 2 angles I calculated to integrate the gradient field in said direction. I understand that you can use trilinear interpolation to get intermediate voxel values, but I don't understand how to get the voxel coordinates I want using the angles I have.
In other words, I start at a given voxel position, and want to take a 1 voxel step in the direction of the 2 angles calculated for that voxel (one in the x-y direction, the other in the z-direction). How would I take this step at these 2 angles and retrieve the new (x, y, z) voxel coordinates?
Apologies in advance, as I have a very basic background in Calc II/III, so vector fields/visualization of 3D spaces is still a little rough for me.
Creating 3D stack of DICOM images:
def collect_data(data_path):
print "collecting data"
files = [] # create an empty list
for dirName, subdirList, fileList in os.walk(data_path):
for filename in fileList:
if ".dcm" in filename:
files.append(os.path.join(dirName,filename))
# Get reference file
ref = dicom.read_file(files[0])
# Load dimensions based on the number of rows, columns, and slices (along the Z axis)
pixel_dims = (int(ref.Rows), int(ref.Columns), len(files))
# Load spacing values (in mm)
pixel_spacings = (float(ref.PixelSpacing[0]), float(ref.PixelSpacing[1]), float(ref.SliceThickness))
x = np.arange(0.0, (pixel_dims[0]+1)*pixel_spacings[0], pixel_spacings[0])
y = np.arange(0.0, (pixel_dims[1]+1)*pixel_spacings[1], pixel_spacings[1])
z = np.arange(0.0, (pixel_dims[2]+1)*pixel_spacings[2], pixel_spacings[2])
# Row and column directional cosines
orientation = ref.ImageOrientationPatient
# This will become the intensity values
dcm = np.zeros(pixel_dims, dtype=ref.pixel_array.dtype)
origins = []
# loop through all the DICOM files
for filename in files:
# read the file
ds = dicom.read_file(filename)
#get pixel spacing and origin information
origins.append(ds.ImagePositionPatient) #[0,0,0] coordinates in real 3D space (in mm)
# store the raw image data
dcm[:, :, files.index(filename)] = ds.pixel_array
return dcm, origins, pixel_spacings, orientation
Calculating gradient magnitude:
def calculate_gradient_magnitude(dcm):
print "calculating gradient magnitude"
gradient_magnitude = []
gradient_direction = []
gradx = np.zeros(dcm.shape)
sobel(dcm,0,gradx)
grady = np.zeros(dcm.shape)
sobel(dcm,1,grady)
gradz = np.zeros(dcm.shape)
sobel(dcm,2,gradz)
gradient = np.sqrt(gradx**2 + grady**2 + gradz**2)
azimuthal = np.arctan2(grady, gradx)
elevation = np.arctan(gradz,gradient)
azimuthal = np.degrees(azimuthal)
elevation = np.degrees(elevation)
return gradient, azimuthal, elevation
Converting to patient coordinate system to get actual voxel position:
def get_patient_position(dcm, origins, pixel_spacing, orientation):
"""
Image Space --> Anatomical (Patient) Space is an affine transformation
using the Image Orientation (Patient), Image Position (Patient), and
Pixel Spacing properties from the DICOM header
"""
print "getting patient coordinates"
world_coordinates = np.empty((dcm.shape[0], dcm.shape[1],dcm.shape[2], 3))
affine_matrix = np.zeros((4,4), dtype=np.float32)
rows = dcm.shape[0]
cols = dcm.shape[1]
num_slices = dcm.shape[2]
image_orientation_x = np.array([ orientation[0], orientation[1], orientation[2] ]).reshape(3,1)
image_orientation_y = np.array([ orientation[3], orientation[4], orientation[5] ]).reshape(3,1)
pixel_spacing_x = pixel_spacing[0]
# Construct affine matrix
# Method from:
# http://nipy.org/nibabel/dicom/dicom_orientation.html
T_1 = origins[0]
T_n = origins[num_slices-1]
affine_matrix[0,0] = image_orientation_y[0] * pixel_spacing[0]
affine_matrix[0,1] = image_orientation_x[0] * pixel_spacing[1]
affine_matrix[0,3] = T_1[0]
affine_matrix[1,0] = image_orientation_y[1] * pixel_spacing[0]
affine_matrix[1,1] = image_orientation_x[1] * pixel_spacing[1]
affine_matrix[1,3] = T_1[1]
affine_matrix[2,0] = image_orientation_y[2] * pixel_spacing[0]
affine_matrix[2,1] = image_orientation_x[2] * pixel_spacing[1]
affine_matrix[2,3] = T_1[2]
affine_matrix[3,3] = 1
k1 = (T_1[0] - T_n[0])/ (1 - num_slices)
k2 = (T_1[1] - T_n[1])/ (1 - num_slices)
k3 = (T_1[2] - T_n[2])/ (1 - num_slices)
affine_matrix[:3, 2] = np.array([k1,k2,k3])
for z in range(num_slices):
for r in range(rows):
for c in range(cols):
vector = np.array([r, c, 0, 1]).reshape((4,1))
result = np.matmul(affine_matrix, vector)
result = np.delete(result, 3, axis=0)
result = np.transpose(result)
world_coordinates[r,c,z] = result
# print "Finished slice ", str(z)
# np.save('./data/saved/world_coordinates_3d.npy', str(world_coordinates))
return world_coordinates
Now I'm at the point where I want to write this function:
def create_lh_histogram(patient_positions, dcm, magnitude, azimuthal, elevation):
print "constructing LH histogram"
# Get 2nd derivative
second_derivative = gaussian_filter(magnitude, sigma=1, order=1)
# Determine if voxels lie on boundary or not (thresholding)
# Still have to code out: let's say the thresholded voxels are in
# a numpy array called voxels
#Iterate through all thresholded voxels and integrate gradient field in
# both directions using 2nd-order Runge-Kutta
vox_it = voxels.nditer(voxels, flags=['multi_index'])
while not vox_it.finished:
# ???
So my program is designed to manipulate certain coordinates in order to create this image:
So basically my program draw a bunch of random circles and I have to manipulate the line of equation to create the red sections. So far my image is the following:
I can't seem to figure out how to add another line equation to create the other red section. Any help would be greatly appreciated!
# using the SimpleGraphics library
from SimpleGraphics import *
# tell SimpleGraphics to only draw when I use the update() function
setAutoUpdate(False)
# use the random library to generate random numbers
import random
# size of the circles drawn
diameter = 15
resize(600, 400)
##
# returns a vaid color based on the input coordinates
#
# #param x is an x-coordinate
# #param y is a y-coordinate
# #return a colour based on the input x,y values for the given flag
##
def define_colour(x,y):
##
#add your code to this method and change the return value
slopeOne = (200 - 300)/(0-150)
b = 0 - (slopeOne * 200)
slopeTwo = (0-300)/(200 - 800)
b = 150 - (slopeTwo * 40)
lineEquationOne = (slopeOne * x) + b
lineEquationTwo = (slopeTwo * x) + b
if y > lineEquationOne:
return "red"
elif y > lineEquationTwo:
return "red"
else:
return 'white'
######################################################################
#
# Do NOT change anything below this line
#
######################################################################
# repeat until window is closed
while not closed():
for i in range(500):
# generate random x and y values
x = random.randint(0, getWidth())
y = random.randint(0, getHeight())
# set colour for current circle
setFill( define_colour(x,y) )
# draw the current circle
ellipse(x, y, diameter, diameter)
update()
You're almost there. Add back in the commented lines for your second slope and equation and make sure your variable names match up. Then you just need to add an OR condition for your if statement to set the color based on each equation.
# using the SimpleGraphics library
from SimpleGraphics import *
# tell SimpleGraphics to only draw when I use the update() function
setAutoUpdate(False)
# use the random library to generate random numbers
import random
# size of the circles drawn
diameter = 15
resize(600, 400)
##
# returns a valid color based on the input coordinates
#
# #param x is an x-coordinate
# #param y is a y-coordinate
# #return a colour based on the input x,y values for the given flag
##
def define_colour(x, y):
slopeOne = (200 - 300) / (0 - 150)
b = 0 - (slopeOne * 200)
slopeTwo = (200 - 300) / (0 - 150)
b2 = -50 - (slopeTwo * 200)
lineEquationOne = (slopeOne * x) + b
lineEquationTwo = (slopeTwo * x) + b2
if (y > lineEquationOne) | (y < lineEquationTwo):
return "white"
else:
return 'red'
######################################################################
#
# Do NOT change anything below this line
#
######################################################################
# repeat until window is closed
while not closed():
for i in range(500):
# generate random x and y values
x = random.randint(0, getWidth())
y = random.randint(0, getHeight())
# set colour for current circle
setFill(define_colour(x, y))
# draw the current circle
ellipse(x, y, diameter, diameter)
update()
I am trying to view my local ridge orientation of a fingerprint as a flowchart. But I seem to fail miserably at doing so. My method consists of the following steps:
use the lro function
find the most dominant angle in a 16x16 block
create a line segment and rotate it by the dominant angle to display it
The problem is that while the angles that the lro produces are good, the display of these in the flowchart does not work at all. There I just get a lot of random angles going in all kind of directions. Can anyone help me solve this problem?
Here is the code I'm using:
def lro(im_np):
eps = 2**(-52)
orientsmoothsigma = 4
# original
Gxx = cv2.Sobel(im_np, -1, 2, 0)
Gxy = cv2.Sobel(im_np, -1, 1, 1)
Gyy = cv2.Sobel(im_np, -1, 0, 2)
Gxx = scipy.ndimage.filters.gaussian_filter(Gxx, orientsmoothsigma)
Gxy = scipy.ndimage.filters.gaussian_filter(Gxy, orientsmoothsigma)
Gyy = scipy.ndimage.filters.gaussian_filter(Gyy, orientsmoothsigma)
angle = math.pi/2. + numpy.divide(numpy.arctan2(numpy.multiply(Gxy,2), numpy.subtract(Gxx,Gyy)),2)
return angle
def createLine(im_np):
#Assumes it is 17x17
#Takes in the block-direction
#returns a block-direction image as a numpy array
angle = numpy.max(im_np)
# print im_np.shape
im = Image.new('L', (im_np.shape[0], im_np.shape[1]), (0))
draw = ImageDraw.Draw(im)
draw.line([(0,im_np.shape[0]/2), (im_np.shape[0],im_np.shape[0]/2)], fill=255)
im = im.rotate(angle)
img_np2 = numpy.asarray(im)
# print img_np2
return img_np2
def findDomAngle(im_np):
mask = numpy.zeros((180,2))
for i in range(180):
mask[i][0] = i+1
for i in range(im_np.shape[0]):
for j in range(im_np.shape[0]):
mask[im_np[i][j]-1][1] += 1
max = 0
bestdir = 0
for i in range(180):
if mask[i][1] > max:
bestdir = i + 1
max = mask[i][1]
# print mask
# print max
return bestdir
def blkdir(angle_mat):
x = angle_mat.shape[0]
y = angle_mat.shape[1]
# print angle_mat
domAngle = findDomAngle(angle_mat)
# print domAngle
blkAngle = angle_mat
blkAngle.setflags(write=True)
for i in range(x):
for j in range(y):
blkAngle[i][j] = domAngle
return blkAngle
I am applying another function to process the image block by block, but this method has proven to work so I don't find it relevant to include.