I'm trying to sort an image by luminosity using NumPy, which I'm new to. I've managed to create a random image and sort it.
def create_image(output, width, height, arr):
array = np.zeros([height, width, 3], dtype=np.uint8)
numOfSwatches = len(arr)
swatchWidth = int(width/ numOfSwatches)
for i in range (0, numOfSwatches):
m = i * swatchWidth
r = (i+1) * swatchWidth
array[:, m:r] = arr[i]
img = Image.fromarray(array)
img.save(output)
Which creates this image:
So far so good. Only now I want to switch from creating random images to loading them and then sorting them.
#!/usr/bin/python3
import numpy as np
from PIL import Image
# --------------------------------------------------------------
def load_image( infilename ) :
img = Image.open( infilename )
img.load()
data = np.asarray( img, dtype = "int32" )
return data
# --------------------------------------------------------------
def lum (r,g,b):
return math.sqrt( .241 * r + .691 * g + .068 * b )
myImageFile = "random_colours.png"
imageNP = load_image(myImageFile)
imageNP.sort(key=lambda rgb: lum(*rgb) )
The image should look like this:
The error I get is TypeError: 'key' is an invalid keyword argument for this function I may have created the NP array incorrectly as it worked when it was a random NP array.
Have not ever used PIL, but the following approach hopefully works (I'm not sure as I can't reproduce your exact examples), and of course there might be more efficient ways to do so.
I'm using your functions, having changed the math.sqrt function to np.sqrt in the lum function - as it is better for vector calculations. By the way, I believe this won't work with an int32 type array (as in your load_image function).
The key part is Numpy's argsort function (last line), which gives the indices that would sort the given array; this is applied to a row of the luminosity array (exploiting simmetry) and later used as indexer of img_array.
# Create random image
np.random.seed(4)
img = create_image('test.png', 75, 75, np.random.random((25,3))*255)
# Convert to Numpy array and calculate luminosity
img_array = np.array(img, dtype = np.uint8)
luminosity = lum(img_array[...,0], img_array[...,1], img_array[...,2])
# Sort by luminosity and convert to image again
img_sorted = Image.fromarray(img_array[:,luminosity[0].argsort()])
The original picture:
And the luminosity-sorted one:
Related
I'm new to Python, and I'm trying to deconstruct image bands as arrays of numbers by applying the Singular Value Decomposition (SVD) to them and then putting them back together with matplotlib.image and the Image module from PIL. An SVD may also be written as a sum of dyads s1u1v1T + ... + sKuKvKT, and the point in decomposing it in this way is that a near-perfect approximation of the image can be made from just a few of those dyads, so less data is required.
There must be something wrong with the calculation, though because result_r, result_g, and result_b look like this when converted to Images, and new_image looks like this.
For an example of what this should look like, here are the first dyads of the layers of this image. The image that I'm using (April23.jpg) is this.
import matplotlib.image as image
import numpy.linalg as la
import numpy as np
from PIL import Image
def getcolumn(j, m):
col = []
for i in range(len(m)):
col.append(m[i][j])
return col
def extractCols(U):
Ucols = []
for j in range(len(U[0])):
Ucols.append(getcolumn(j, U))
return np.asarray(Ucols)
def vectorMultiply(u, v):
matrix = []
for i in range(len(u)):
newVec = []
for j in range(len(v)):
newVec.append(u[i] * v[j])
matrix.append(newVec)
return np.asarray(matrix)
im = Image.open('C:/Users/<user>/Desktop/img/April23.jpg')
im.load()
sim = Image.Image.split(im)
rsim = sim[0].save("rsim.jpg") # image bands as images
gsim = sim[1].save("gsim.jpg")
bsim = sim[2].save("bsim.jpg")
# image bands as arrays of numbers
arsim = image.imread('C:/Users/<user>/Desktop/img/rsim.jpg')
agsim = image.imread('C:/Users/<user>/Desktop/img/gsim.jpg')
absim = image.imread('C:/Users/<user>/Desktop/img/bsim.jpg')
ur, sr, vhr = la.svd(arsim, False) # SVD on each band
ug, sg, vhg = la.svd(agsim, False)
ub, sb, vhb = la.svd(absim, False)
urcols = extractCols(ur)
ugcols = extractCols(ug)
ubcols = extractCols(ub)
# calculating the first dyads
result_r = np.multiply(sr[0], vectorMultiply(urcols[0], vhr[0]))
result_g = np.multiply(sg[0], vectorMultiply(ugcols[0], vhg[0]))
result_b = np.multiply(sb[0], vectorMultiply(ubcols[0], vhb[0]))
r = Image.fromarray(result_r, "L")
g = Image.fromarray(result_g, "L")
b = Image.fromarray(result_b, "L")
new_image = Image.merge("RGB", (r, g, b))
What am I missing, here? It seems to be something with the calculations. I figured for a matrix one would have to extract the columns, say the column [1, 2, 3] from a matrix [[1,...], [2,...], [3,...]], since each element of the matrix is a row. So, I wrote extractCols() for that. numpy's matrix add and multiply seem to be fine. I wrote vectorMultiply because np.dot(), np.multiply(), and np.matmul() didn't seem to realize that u was a column and kept saying the dimensions didn't match up. I tested it and it seemed to do what I wanted it to. I was also thinking that maybe the "rows" of U are actually the columns already and don't need to be extracted, but that didn't work either. I've also tried not using np.asarray() without any luck.
Any advice is appreciated.
I am a new programming Interface. I am using the PIL and Matplotlib libraries for the contract streaching.When I am using the Histogram Equalizer I am getting the error as name 'IMG_H' is not defined.I am also Converting my image to numpy array, calculate the histogram, cumulative sum, mapping and then apply the mapping to create a new image.
You can see my code below -
# HISTOGRAM EQUALIZATION
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
def make_histogram(img):
""" Take an image and create a historgram from it's luma values """
y_vals = img[:,:,0].flatten()
histogram = np.zeros(256, dtype=int)
for y_index in range(y_vals.size):
histogram[y_vals[y_index]] += 1
return histogram
def make_cumsum(histogram):
""" Create an array that represents the cumulative sum of the histogram """
cumsum = np.zeros(256, dtype=int)
cumsum[0] = histogram[0]
for i in range(1, histogram.size):
cumsum[i] = cumsum[i-1] + histogram[i]
return cumsum
def make_mapping(histogram, cumsum):
mapping = np.zeros(256, dtype=int)
luma_levels = 256
for i in range(histogram.size):
mapping[i] = max(0, round((luma_levels*cumsum[i])/(IMG_H*IMG_W))-1)
return mapping
def apply_mapping(img, mapping):
""" Apply the mapping to our image """
new_image = img.copy()
new_image[:,:,0] = list(map(lambda a : mapping[a], img[:,:,0]))
return new_image
# Load image
pillow_img = Image.open('pout.jpg')
# Convert our image to numpy array, calculate the histogram, cumulative sum,
# mapping and then apply the mapping to create a new image
img = np.array(pillow_img)
histogram = make_histogram(img)
cumsum = make_cumsum(histogram)
mapping = make_mapping(histogram, cumsum)
new_image = apply_mapping(img, mapping)
output_image = Image.fromarray(np.uint8(new_image))
imshow(output_image, cmap='gray')
# Display the old (black) and new (red) histograms next to eachother
x_axis = np.arange(256)
fig = plt.figure()
fig.add_subplot(1,2,1)
plt.bar(x_axis , histogram, color = "black")
fig.add_subplot(1,2,2)
plt.bar(x_axis , make_histogram(new_image), color = "red")
plt.show()
You have this variable here:
mapping[i] = max(0, round((luma_levels*cumsum[i])/(IMG_H*IMG_W))-1)
But you didn't define it (or import) before, therefore you get this error.
for i in range(histogram.size):
mapping[i] = max(0, round((luma_levels*cumsum[i])/(IMG_H*IMG_W))-1)
In above stated line. you are using 2 Variables, IMG_H and IMG_W.
where you defined these variables?
EDITED PART
for i in range(histogram.size):
mapping[i] = max(0, round((luma_levels*cumsum[i])/(IMG_H*IMG_W))-1)
in the above stated line you are using 2 variables try to do multiplication (IMG_H*IMG_W) but you did not define and import these variables in the whole code.
You can do like this.
you can define these variables on the top of the code.
your code shows that these variables are defined for Image width and height
IMG_W = 120 #Any value in integer for Image Width
IMG_H = 124 #Any value in integer for Image Height
I have an image of dimension 155 x 240. Like the following:
I want to extract certain shape of patchs (25 x 25).
I don't want to patch from the whole image.
I want to extract N number of patch from non-zero (not background) area of the image. How can I do that? Any idea or suggestion or implementation will be appreciated. You can try with either Matlab or Python.
Note:
I have generated a random image so that you can process it for patching. image_process variable is that image in this code.
import numpy as np
from scipy.ndimage.filters import convolve
import matplotlib.pyplot as plt
background = np.ones((155,240))
background[78,120] = 2
n_d = 50
y,x = np.ogrid[-n_d: n_d+1, -n_d: n_d+1]
mask = x**2+y**2 <= n_d**2
mask = 254*mask.astype(float)
image_process = convolve(background, mask)-sum(sum(mask))+1
image_process[image_process==1] = 0
image_process[image_process==255] = 1
plt.imshow(image_process)
Lets assume that the pixels values you want to omit is 0.
In this case what you could do, is first find the indices of the non-zero values, then slice the image in the min/max position to get only the desired area, and then simply apply extract_patches_2d with the desired window size and number of patches.
For example, given the dummy image you supplied:
import numpy as np
from scipy.ndimage.filters import convolve
import matplotlib.pyplot as plt
background = np.ones((155,240))
background[78,120] = 2
n_d = 50
y,x = np.ogrid[-n_d: n_d+1, -n_d: n_d+1]
mask = x**2+y**2 <= n_d**2
mask = 254*mask.astype(float)
image_process = convolve(background, mask)-sum(sum(mask))+1
image_process[image_process==1] = 0
image_process[image_process==255] = 1
plt.figure()
plt.imshow(image_process)
plt.show()
from sklearn.feature_extraction.image import extract_patches_2d
x, y = np.nonzero(image_process)
xl,xr = x.min(),x.max()
yl,yr = y.min(),y.max()
only_desired_area = image_process[xl:xr+1, yl:yr+1]
window_shape = (25, 25)
B = extract_patches_2d(only_desired_area, window_shape, max_patches=100) # B shape will be (100, 25, 25)
If you plot the only_desired_area you will get the following image:
This is the main logic if you wish an even tighter bound you should adjust the slicing properly.
I'm trying to convert an image from RGB to LMS -and vice versa- using OpenCV in Python. From what I understand, I am supposed to use a given 3x3 transformation matrix and multiply it to a 3x1 RGB/LMS matrix. The transformation matrices used can be found here.
I've explored previously asked questions on this site but unfortunately they're in C++, a language I have yet to be proficient in and I have difficulty in understanding how exactly they've solved their problems.
Here is my code so far: [Solved as of 2019-05-19]
import numpy as np
import cv2
#Transformation Matrix#
MsRGB = np.zeros((3,3), dtype='float')
MHPE = np.zeros((3,3), dtype='float')
MsRGB = np.array([[0.4124564, 0.3575761, 0.1804375],
[0.2126729, 0.7151522, 0.0721750],
[0.0193339, 0.1191920, 0.9503041]])
MHPE = np.array([[ 0.4002, 0.7076, -0.0808],
[-0.2263, 1.1653, 0.0457],
[ 0, 0, 0.9182]])
Trgb2lms = MHPE # MsRGB
Tlms2rgb = np.linalg.inv(Trgb2lms)
imgpath = "(insert file directory here)"
imgIN = cv2.imread(imgpath,cv2.IMREAD_UNCHANGED)
imgINrgb = cv2.cvtColor(imgIN, cv2.COLOR_BGR2RGB)
x,y,z = imgINrgb.shape
imgLMS = np.zeros((x,y,z), dtype='float')
imgReshaped = imgINrgb.transpose(2, 0, 1).reshape(3,-1)
imgLMS = Trgb2lms # imgReshaped #Convert to LMS
imgOUT = Tlms2rgb # imgLMS #Convert back to RGB
imgLMS = imgLMS.reshape(z, x, y).transpose(1, 2, 0).astype(np.uint8)
imgOUT = imgOUT.reshape(z, x, y).transpose(1, 2, 0).astype(np.uint8)
imgOUT = cv2.cvtColor(imgOUT, cv2.COLOR_RGB2BGR)
cv2.imshow('Input', imgIN)
cv2.imshow('LMS', imgLMS)
cv2.imshow('Output', imgOUT)
cv2.waitKey(0)
cv2.destroyAllWindows()
The code is now able to perform linear transformation on a given RGB image using a given transformation matrix. Results can be found here.
There are a few errors given the context of your question:
T is not defined. Judging from the context of your code, this should be Trgb2lms instead so we need to change those.
From what I can gather from the question, you are applying a linear transformation to all pixels in the image. To do this, you want to reshape the matrix so that we have three rows where each row corresponds to a single pixel followed by an unravelling of all pixels along the columns. In that case, the reshape method is incorrect. You need not only shuffle the dimensions so that the last dimension is first but you'll also need to set the last dimension of the reshape so that it's -1. This means that we will automatically fill up the columns so that it contains the total number of pixels in the image.
Finally, once you do the linear transformation, you need to reshape the matrix back to the original image size. You can use a final reshape call and use x, y and z from the original call you made to infer the image dimensions. Remember that when we reshape, the channels come first so we'll have to permute the dimensions again. You'll also want to go back to unsigned 8-bit precision after we do the transformation.
Also to compare, let's run this through the inverse transformation to make sure we have the original.
Therefore:
import numpy as np
import cv2
#Transformation Matrix#
MsRGB = np.zeros((3,3), dtype='float')
MHPE = np.zeros((3,3), dtype='float')
MsRGB = np.array([[0.4124564, 0.3575761, 0.1804375],
[0.2126729, 0.7151522, 0.0721750],
[0.0193339, 0.1191920, 0.9503041]])
MHPE = np.array([[ 0.4002, 0.7076, -0.0808],
[-0.2263, 1.1653, 0.0457],
[ 0, 0, 0.9182]])
Trgb2lms = MHPE # MsRGB
# Change
Tlms2rgb = np.linalg.inv(Trgb2lms)
imgpath = "(insert filename here)"
imgIN = cv2.imread(imgpath,cv2.IMREAD_UNCHANGED)
imgINrgb = cv2.cvtColor(imgIN, cv2.COLOR_BGR2RGB)
x,y,z = imgINrgb.shape
imgLMS = np.zeros((x,y,z), dtype='float')
#imgFlatten = imgINrgb.flatten()
# Change
imgReshaped = imgINrgb.transpose(2, 0, 1).reshape(3,-1)
# Change
imgLMS = Trgb2lms # imgReshaped
imgOUT = Tlms2rgb # imgLMS
# New
imgLMS = imgLMS.transpose(z, x, y).permute(1, 2, 0).astype(np.uint8)
imgOUT = imgOUT.transpose(z, x, y).permute(1, 2, 0).astype(np.uint8)
I am attempting to take 2D images of either one or three channels and display them in VTK using vtkImageActor. As I understand it, the current frame to be displayed can be updated by invoking SetImageData on vtkImageActor and providing an instance of vtkImageData.
I have set up my visualiser as shown below. However, I am unsure how to build the vtkImageData object from the numpy arrays(this would go in the updateFrames method). The type of my numpy arrays is np.uint8_t.
I am using VTK8.0, Python 3.6 and Numpy 1.13.1
class VTKStreamVisualiser:
def __init__(self, displayRGB):
self.__displayRGB = displayRGB
self.__started = False
#Setup window.
self.__renderWindow = vtk.vtkRenderWindow()
self.__renderWindowInteractor = vtk.vtkRenderWindowInteractor()
self.__renderWindowInteractor.SetRenderWindow(self.__renderWindow)
#To store renderers and actors.
self.__renderers = []
self.__actors = []
#Initialise to None to check if ready when invoking start()
self.__depthImageData = None
self.__rgbImageData = None
#Determine viewport ranges for depth and setup renderer.
xMinDepth = 0.0
xMaxDepth = 0.5 if displayRGB else 1.0
yMin = 0.0
yMax = 1.0
self.__setupRenderer(xMinDepth, yMin, xMaxDepth, yMax)
#Determine viewport ranges for rgb and setup renderer.
if self.__displayRGB:
xMinRGB = xMaxDepth
xMaxRGB = 2.0 * xMinRGB
self.__setupRenderer(xMinRGB, yMin, xMaxRGB, yMax)
def __setupRenderer(self, xMin, yMin, xMax, yMax):
#Setup renderer.
self.__renderers.append(vtk.vtkRenderer())
idx = len(self.__renderers) - 1
self.__renderWindow.AddRenderer(self.__renderers[idx])
self.__renderers[idx].SetViewport(xMin, yMin, xMax, yMax)
self.__actors.append(vtk.vtkImageActor())
self.__renderers[idx].AddActor(self.__actors[idx])
self.__renderers[idx].ResetCamera()
def start(self):
self.__depthImageData is None or (self.__rgbImageData is None and self.__displayRGB):
return None
if self.__started:
return
self.__renderWindowInteractor.Initialize()
self.__renderWindow.Render()
self.__renderWindowInteractor.Start()
self.__started = True
def stop(self):
if not self.__started:
return
self.__renderWindowInteractor.Stop()
self.__renderWindow.Finalize()
self.__renderWindowInteractor.TerminateApp()
self.__started = False
def updateFrames(self, depthFrame, rgbFrame=None):
#Build vtkImageData here from the given numpy uint8_t arrays.
pass
EDIT: I realise that I can manually copy the data over as demonstrated here, which wouldn't be too bad with Cython(assuming I am able to work with vtkImageData in Cython), however it would be preferable to use the numpy arrays directly.
A slightly more complete answer (generalizing to 1-3 channels, different datatypes).
import vtk
import numpy as np
from vtk.util import numpy_support
def numpy_array_as_vtk_image_data(source_numpy_array):
"""
:param source_numpy_array: source array with 2-3 dimensions. If used, the third dimension represents the channel count.
Note: Channels are flipped, i.e. source is assumed to be BGR instead of RGB (which works if you're using cv2.imread function to read three-channel images)
Note: Assumes array value at [0,0] represents the upper-left pixel.
:type source_numpy_array: np.ndarray
:return: vtk-compatible image, if conversion is successful. Raises exception otherwise
:rtype vtk.vtkImageData
"""
if len(source_numpy_array.shape) > 2:
channel_count = source_numpy_array.shape[2]
else:
channel_count = 1
output_vtk_image = vtk.vtkImageData()
output_vtk_image.SetDimensions(source_numpy_array.shape[1], source_numpy_array.shape[0], channel_count)
vtk_type_by_numpy_type = {
np.uint8: vtk.VTK_UNSIGNED_CHAR,
np.uint16: vtk.VTK_UNSIGNED_SHORT,
np.uint32: vtk.VTK_UNSIGNED_INT,
np.uint64: vtk.VTK_UNSIGNED_LONG if vtk.VTK_SIZEOF_LONG == 64 else vtk.VTK_UNSIGNED_LONG_LONG,
np.int8: vtk.VTK_CHAR,
np.int16: vtk.VTK_SHORT,
np.int32: vtk.VTK_INT,
np.int64: vtk.VTK_LONG if vtk.VTK_SIZEOF_LONG == 64 else vtk.VTK_LONG_LONG,
np.float32: vtk.VTK_FLOAT,
np.float64: vtk.VTK_DOUBLE
}
vtk_datatype = vtk_type_by_numpy_type[source_numpy_array.dtype.type]
source_numpy_array = np.flipud(source_numpy_array)
# Note: don't flip (take out next two lines) if input is RGB.
# Likewise, BGRA->RGBA would require a different reordering here.
if channel_count > 1:
source_numpy_array = np.flip(source_numpy_array, 2)
depth_array = numpy_support.numpy_to_vtk(source_numpy_array.ravel(), deep=True, array_type = vtk_datatype)
depth_array.SetNumberOfComponents(channel_count)
output_vtk_image.SetSpacing([1, 1, 1])
output_vtk_image.SetOrigin([-1, -1, -1])
output_vtk_image.GetPointData().SetScalars(depth_array)
output_vtk_image.Modified()
return output_vtk_image
Using the numpy_support library one can convert numpy arrays into a vtk data arrays
from vtk.util import numpy_support
def updateFrames(self, depthFrame, rgbFrame=None):
#Build vtkImageData here from the given numpy uint8_t arrays.
self.__depthImageData = vtk.vtkImageData()
depthArray = numpy_support.numpy_to_vtk(depthFrame.ravel(), deep=True, array_type=vtk.VTK_UNSIGNED_CHAR)
# .transpose(2, 0, 1) may be required depending on numpy array order see - https://github.com/quentan/Test_ImageData/blob/master/TestImageData.py
__depthImageData.SetDimensions(depthFrame.shape)
#assume 0,0 origin and 1,1 spacing.
__depthImageData.SetSpacing([1,1])
__depthImageData.SetOrigin([0,0])
__depthImageData.GetPointData().SetScalars(depthArray)
Should provide a working example of how to generate the depthFrame as a starting point
In case you came looking for code compatible with VTK version 9 and up. Here is this answer modified:
import vtk
import numpy as np
from vtkmodules.vtkCommonDataModel import vtkImageData
from vtkmodules.util import numpy_support
def numpy_array_as_vtk_image_data(source_numpy_array):
"""
:param source_numpy_array: source array with 2-3 dimensions. If used, the third dimension represents the channel count.
Note: Channels are flipped, i.e. source is assumed to be BGR instead of RGB (which works if you're using cv2.imread function to read three-channel images)
Note: Assumes array value at [0,0] represents the upper-left pixel.
:type source_numpy_array: np.ndarray
:return: vtk-compatible image, if conversion is successful. Raises exception otherwise
:rtype vtk.vtkImageData
"""
if len(source_numpy_array.shape) > 2:
channel_count = source_numpy_array.shape[2]
else:
channel_count = 1
output_vtk_image = vtkImageData()
output_vtk_image.SetDimensions(source_numpy_array.shape[1], source_numpy_array.shape[0], channel_count)
vtk_type_by_numpy_type = {
np.uint8: vtk.VTK_UNSIGNED_CHAR,
np.uint16: vtk.VTK_UNSIGNED_SHORT,
np.uint32: vtk.VTK_UNSIGNED_INT,
np.uint64: vtk.VTK_UNSIGNED_LONG if vtk.VTK_SIZEOF_LONG == 64 else vtk.VTK_UNSIGNED_LONG_LONG,
np.int8: vtk.VTK_CHAR,
np.int16: vtk.VTK_SHORT,
np.int32: vtk.VTK_INT,
np.int64: vtk.VTK_LONG if vtk.VTK_SIZEOF_LONG == 64 else vtk.VTK_LONG_LONG,
np.float32: vtk.VTK_FLOAT,
np.float64: vtk.VTK_DOUBLE
}
vtk_datatype = vtk_type_by_numpy_type[source_numpy_array.dtype.type]
source_numpy_array = np.flipud(source_numpy_array)
# Note: don't flip (take out next two lines) if input is RGB.
# Likewise, BGRA->RGBA would require a different reordering here.
if channel_count > 1:
source_numpy_array = np.flip(source_numpy_array, 2)
depth_array = numpy_support.numpy_to_vtk(source_numpy_array.ravel(), deep=True, array_type=vtk_datatype)
depth_array.SetNumberOfComponents(channel_count)
output_vtk_image.SetSpacing([1, 1, 1])
output_vtk_image.SetOrigin([-1, -1, -1])
output_vtk_image.GetPointData().SetScalars(depth_array)
output_vtk_image.Modified()
return output_vtk_image