Write all numpy arrays to binary file in a loop - python

I have this code:
from osgeo import gdal
import numpy as np
ds = gdal.Open('image.tif')
# loop through each band
for bi in range(ds.RasterCount):
band = ds.GetRasterBand(bi + 1)
# Read this band into a 2D NumPy array
ar = band.ReadAsArray()
print('Band %d has type %s'%(bi + 1, ar.dtype))
ar.astype('uint16').tofile("converted.raw")
As a result, I get the converted.raw file, but it only contains data from the last iteration of the for loop. How to make a file that will contain data from all iterations together.

Use np.save
Ex:
ds = gdal.Open('image.tif')
# loop through each band
with open("converted.raw", "wb") as outfile:
for bi in range(ds.RasterCount):
band = ds.GetRasterBand(bi + 1)
# Read this band into a 2D NumPy array
ar = band.ReadAsArray()
print('Band %d has type %s'%(bi + 1, ar.dtype))
np.save(outfile, ar.astype('uint16'))

Related

Determine coordinates at highest point of raster

I have a raster from which I want to derive the coordinates of the highest point (elevation) in the raster.
Getting the highest elevation is easy, but I don't know how to get its coordinates.
What I have so far:
# required modules
from osgeo import gdal
from osgeo import osr
import numpy as np
import rasterio
# allow GDAL to use python exceptions
gdal.UseExceptions()
# save paths to the files needed
input_raster = 'data/dem.tif'
# open input raster file with errorcatching
try:
ds = gdal.Open(input_raster)
except RuntimeError as err:
print (err)
exit(keep_kernel=True)
if ds is None:
print ('Unable to open %s' % input_raster)
exit(keep_kernel=True)
#access size of file
cols = ds.RasterXSize
rows = ds.RasterYSize
#access band and data as numpy arrays
band = ds.GetRasterBand(1)
data1 = band.ReadAsArray(0, 0, cols, rows).astype(float)
#set nodata values to Nan
nodata_val = band.GetNoDataValue()
print(nodata_val)
data_masked = np.ma.masked_equal(data1,nodata_val)
#determine highest elevation value und its coordinates
highest_val = data_masked.max()
geotransform = ds.GetGeoTransform()
originX = geotransform[0]
originY = geotransform[3]
pixelWidth = geotransform[1]
pixelHeight = geotransform[5]
I'm stuck and thankful for any advice.
Get the indices of the max value(s):
indices = np.where(data_masked == data_masked.max())
Also decide what to do when there are multiple cells with the maximum value.
Compute the coordinates with the transforms:
x = indices[0][0] * pixelWidth + originX

dumping a numpy array into a CSV file: table elements look like printed arrays

I'm trying to use the OpenCV Python wrapper to calculate the Discrete wavelet Haar transform for an image. Supposedly, images == numpy arrays == CV matrices, so I thought this should work:
import cv2
import numpy as np
import pywt
from pywt import wavedec
import csv
imgcv1 = cv2.split(resize)[0]
cv2.boxFilter(imgcv1, 0, (7,7), imgcv1, (-1,-1), False, cv2.BORDER_DEFAULT)
cv2.resize( imgcv1, (32, 32 ) ,imgcv1)
imf = np.float32(imgcv1)/255.0 # float conversion/scale
coeffs = pywt.dwt(imf, "haar") # the dwt
cA = coeffs
#imgout = pywt.dwt(coeffs, "haar")
print(cA)
def zigzag(matrix):
rows=len(matrix)
columns=len(matrix[0])
solution=[[] for i in range(rows+columns-1)]
for i in range(rows):
for j in range(columns):
sum=i+j
if(sum%2 ==0):
solution[sum].insert(0,matrix[i][j])
else:
solution[sum].append(matrix[i][j])
data = (rows*columns)/3
data = round(data)
mtx_zigzag = []
for i in solution:
for j in i:
mtx_zigzag.append(j)
mtxawal = mtx_zigzag[2:26]
# mtxtengah = mtx_zigzag[83:107]
# mtxakhir = mtx_zigzag[191:215]
mtx = mtxawal
# +mtxtengah+mtxakhir
return mtx
hasil_zigzag= zigzag(cA)
print(hasil_zigzag)
zg=[]
zg.append(hasil_zigzag)
citra="tangan {}".format(counter)
file=open('database.csv','a+',newline='')
with file:
write=csv.writer(file)
write.writerows(zg)
the excel /csv data output is like this:
I want the matrix or csv/excel output to be like this:
can you guys help me

Convert mha 3d images into 2d images in Python (2015 brats challenge dataset)

I want to use SimpleITK or wedpy to convert the 3d images into 2d images.
Or i want to get a three-dimensional matrix, and then i divide the three-dimensional matrix into some two-dimensional matrices.
import SimpleITK as ITK
import numpy as np
#from medpy.io import load
url=r'G:\path\to\my.mha'
image = ITK.ReadImage(url)
frame_num, width, height = image_array.shape
print(frame_num,width,height)
Then only get it:155 240 240
but i want to get [[1,5,2,3,1...],[54,1,3,5...],[5,8,9,6....]]
Just to add to Dave Chen's answer, as it is unclear if you want to get a set of 2D SimpleITK images or numpy arrays. The following code covers all three available options:
import SimpleITK as sitk
import numpy as np
url = "my_file.mha"
image = sitk.ReadImage(url)
max_index = image.GetDepth() # or image.GetWidth() or image.GetHeight() depending on the axis along which you want to extract
# As list of 2D SimpleITK images
list_of_2D_images = [image[:,:,i] for i in range(max_index)]
# As list of 2D numpy arrays which cannot be modified (no data copied)
list_of_2D_images_np_view = [sitk.GetArrayViewFromImage(image[:,:,i]) for i in range(max_index)]
# As list of 2D numpy arrays (data copied to numpy array)
list_of_2D_images_np = [sitk.GetArrayFromImage(image[:,:,i]) for i in range(max_index)]
Also, if you really want to work with URLs and not local files I would suggest looking at the remote download approach used in the SimpleITK notebooks repository, the relevant file is downloaddata.py.
That's not a big deal.
CT images have originally all numbers in int16 type so you don't need to handle float numbers.. In this case, we can think that we can easily change from int16 to uint16 only removing negative values in the image (CT images have some negative numbers as pixel values). Note that we really need uint16, or uint8 type so that OpenCV can handle it... as we have a lot of values in the CT image array, the best choice is uint16, so that we don't lose too much precision.
Ok, now you just need to do as follows:
import SimpleITK as sitk
import numpy as np
import cv2
mha = sitk.ReadImage('/mha/directory') #Importing mha file
array = sitk.GetArrayFromImage(mha) #Converting to array int16 (default)
#Translating each slice to the positive side
for m in range(array.shape[0]):
array[m] = array[m] + abs(np.min(array[m]))
array = np.around(array, decimals=0) #remove any float numbers if exists.. probably not
array = np.asarray(array, dtype='uint16') #From int16 to uint16
After these steps the array is just ready to be saved as png images using opencv.imwrite module:
for image in array:
cv2.imwrite('/dir/to/save/'+'name_image.png', image)
Note that by default SimpleITK handles .mha files by the axial view. I really don't know how to change it because I've never needed it before. Anyway, in this case with some searches you can find something.
I'm not sure exactly what you want to get. But it's easy to extract a 2d slice from a 3d image in SimpleITK.
To get a Z slice where Z=100 you can do this:
zslice = image[100]
To get a Y slice for Y=100:
yslice = image[:, 100]
And a X slice for X=100:
xslice = image[:, :, 100]
#zivy#Dave Chen
I've solved my problem.In fact, running this code will give you 150 240*240 PNG pictures.It's i want to get.
# -*- coding:utf-8 -*-
import numpy as np
import subprocess
import random
import progressbar
from glob import glob
from skimage import io
np.random.seed(5) # for reproducibility
progress = progressbar.ProgressBar(widgets=[progressbar.Bar('*', '[', ']'), progressbar.Percentage(), ' '])
class BrainPipeline(object):
'''
A class for processing brain scans for one patient
INPUT: (1) filepath 'path': path to directory of one patient. Contains following mha files:
flair, t1, t1c, t2, ground truth (gt)
(2) bool 'n4itk': True to use n4itk normed t1 scans (defaults to True)
(3) bool 'n4itk_apply': True to apply and save n4itk filter to t1 and t1c scans for given patient. This will only work if the
'''
def __init__(self, path, n4itk = True, n4itk_apply = False):
self.path = path
self.n4itk = n4itk
self.n4itk_apply = n4itk_apply
self.modes = ['flair', 't1', 't1c', 't2', 'gt']
# slices=[[flair x 155], [t1], [t1c], [t2], [gt]], 155 per modality
self.slices_by_mode, n = self.read_scans()
# [ [slice1 x 5], [slice2 x 5], ..., [slice155 x 5]]
self.slices_by_slice = n
self.normed_slices = self.norm_slices()
def read_scans(self):
'''
goes into each modality in patient directory and loads individual scans.
transforms scans of same slice into strip of 5 images
'''
print('Loading scans...')
slices_by_mode = np.zeros((5, 155, 240, 240))
slices_by_slice = np.zeros((155, 5, 240, 240))
flair = glob(self.path + '/*Flair*/*.mha')
t2 = glob(self.path + '/*_T2*/*.mha')
gt = glob(self.path + '/*more*/*.mha')
t1s = glob(self.path + '/**/*T1*.mha')
t1_n4 = glob(self.path + '/*T1*/*_n.mha')
t1 = [scan for scan in t1s if scan not in t1_n4]
scans = [flair[0], t1[0], t1[1], t2[0], gt[0]] # directories to each image (5 total)
if self.n4itk_apply:
print('-> Applyling bias correction...')
for t1_path in t1:
self.n4itk_norm(t1_path) # normalize files
scans = [flair[0], t1_n4[0], t1_n4[1], t2[0], gt[0]]
elif self.n4itk:
scans = [flair[0], t1_n4[0], t1_n4[1], t2[0], gt[0]]
for scan_idx in xrange(5):
# read each image directory, save to self.slices
slices_by_mode[scan_idx] = io.imread(scans[scan_idx], plugin='simpleitk').astype(float)
for mode_ix in xrange(slices_by_mode.shape[0]): # modes 1 thru 5
for slice_ix in xrange(slices_by_mode.shape[1]): # slices 1 thru 155
slices_by_slice[slice_ix][mode_ix] = slices_by_mode[mode_ix][slice_ix] # reshape by slice
return slices_by_mode, slices_by_slice
def norm_slices(self):
'''
normalizes each slice in self.slices_by_slice, excluding gt
subtracts mean and div by std dev for each slice
clips top and bottom one percent of pixel intensities
if n4itk == True, will apply n4itk bias correction to T1 and T1c images
'''
print('Normalizing slices...')
normed_slices = np.zeros((155, 5, 240, 240))
for slice_ix in xrange(155):
normed_slices[slice_ix][-1] = self.slices_by_slice[slice_ix][-1]
for mode_ix in xrange(4):
normed_slices[slice_ix][mode_ix] = self._normalize(self.slices_by_slice[slice_ix][mode_ix])
print('Done.')
return normed_slices
def _normalize(self, slice):
'''
INPUT: (1) a single slice of any given modality (excluding gt)
(2) index of modality assoc with slice (0=flair, 1=t1, 2=t1c, 3=t2)
OUTPUT: normalized slice
'''
b, t = np.percentile(slice, (0.5,99.5))
slice = np.clip(slice, b, t)
if np.std(slice) == 0:
return slice
else:
return (slice - np.mean(slice)) / np.std(slice)
def save_patient(self, reg_norm_n4, patient_num):
'''
INPUT: (1) int 'patient_num': unique identifier for each patient
(2) string 'reg_norm_n4': 'reg' for original images, 'norm' normalized images, 'n4' for n4 normalized images
OUTPUT: saves png in Norm_PNG directory for normed, Training_PNG for reg
'''
print('Saving scans for patient {}...'.format(patient_num))
progress.currval = 0
if reg_norm_n4 == 'norm': #saved normed slices
for slice_ix in progress(xrange(155)): # reshape to strip
strip = self.normed_slices[slice_ix].reshape(1200, 240)
if np.max(strip) != 0: # set values < 1
strip /= np.max(strip)
if np.min(strip) <= -1: # set values > -1
strip /= abs(np.min(strip))
# save as patient_slice.png
io.imsave('Norm_PNG/{}_{}.png'.format(patient_num, slice_ix), strip)
elif reg_norm_n4 == 'reg':
for slice_ix in progress(xrange(155)):
strip = self.slices_by_slice[slice_ix].reshape(1200, 240)
if np.max(strip) != 0:
strip /= np.max(strip)
io.imsave('Training_PNG/{}_{}.png'.format(patient_num, slice_ix), strip)
else:
for slice_ix in progress(xrange(155)): # reshape to strip
strip = self.normed_slices[slice_ix].reshape(1200, 240)
if np.max(strip) != 0: # set values < 1
strip /= np.max(strip)
if np.min(strip) <= -1: # set values > -1
strip /= abs(np.min(strip))
# save as patient_slice.png
io.imsave('n4_PNG/{}_{}.png'.format(patient_num, slice_ix), strip)
def n4itk_norm(self, path, n_dims=3, n_iters='[20,20,10,5]'):
'''
INPUT: (1) filepath 'path': path to mha T1 or T1c file
(2) directory 'parent_dir': parent directory to mha file
OUTPUT: writes n4itk normalized image to parent_dir under orig_filename_n.mha
'''
output_fn = path[:-4] + '_n.mha'
# run n4_bias_correction.py path n_dim n_iters output_fn
subprocess.call('python n4_bias_correction.py ' + path + ' ' + str(n_dims) + ' ' + n_iters + ' ' + output_fn, shell = True)
def save_patient_slices(patients, type):
'''
INPUT (1) list 'patients': paths to any directories of patients to save. for example- glob("Training/HGG/**")
(2) string 'type': options = reg (non-normalized), norm (normalized, but no bias correction), n4 (bias corrected and normalized)
saves strips of patient slices to approriate directory (Training_PNG/, Norm_PNG/ or n4_PNG/) as patient-num_slice-num
'''
for patient_num, path in enumerate(patients):
a = BrainPipeline(path)
a.save_patient(type, patient_num)
def s3_dump(directory, bucket):
'''
dump files from a given directory to an s3 bucket
INPUT (1) string 'directory': directory containing files to save
(2) string 'bucket': name od s3 bucket to dump files
'''
subprocess.call('aws s3 cp' + ' ' + directory + ' ' + 's3://' + bucket + ' ' + '--recursive')
def save_labels(fns):
'''
INPUT list 'fns': filepaths to all labels
'''
progress.currval = 0
for label_idx in progress(range(len(labels))):
slices = io.imread(labels[label_idx], plugin = 'simpleitk')
for slice_idx in range(len(slices)):
io.imsave(r'{}_{}L.png'.format(label_idx, slice_idx), slices[slice_idx])
if __name__ == '__main__':
url = r'G:\work\deeplearning\BRATS2015_Training\HGG\brats_2013_pat0005_1\VSD.Brain.XX.O.MR_T1.54537\VSD.Brain.XX.O.MR_T1.54537.mha'
labels = glob(url)
save_labels(labels)
# patients = glob('Training/HGG/**')
# save_patient_slices(patients, 'reg')
# save_patient_slices(patients, 'norm')
# save_patient_slices(patients, 'n4')
# s3_dump('Graveyard/Training_PNG/', 'orig-training-png')

different result loading binary files matlab and python

I am trying to read a binary file from a readout board that will be converted to an image. In Matlab, all the bytes are correctly read and the image is completelly populated. But in python (ver2.7 using anaconda) there is a line of zeros every 127 columns.
The Matlab code is:
fid = fopen(filename);
Rawdata = fread(fid,'uint8');
Data1d = Rawdata(2:2:end).* 256+ Rawdata(1:2:end) ;
% converts Data1 to a 2D vector, adding a row of zeros to make the reshape
% possible to 3D
Data2d = [reshape(Data1d,4127,1792); zeros(1,1792)];
% reshapes again, but adding a new dimension
Data3d = reshape(Data2d(:),129,32,1792);
% selects the first 128 values in the first dimension
Data3d = Data3d(1:128,:,:);
Data2d = reshape(Data3d(:),4096,1792);
Data2d = Data2d';
CMVimage = Data2d;
fclose(fid); %VGM 2017-01-14 the file should be closed.
In python I tried np.fromfile() and directly reading from python using f.read()
with the same result.
import numpy as np
import matplotlib.pyplot as plt
"""
reads the input .dat file and converts it to an image
Problem: line of zeros every 127 columns in columns: 127,257,368...
curiosly, the columns are in the position of the new byte.
In matlab it works very well.
"""
def readDatFile(filename):
""" reads the binary file in python not in numpy
the data is byte type and it is converted to integer.
"""
import binascii
f = open(filename, 'rb')
data = f.read()
#dataByte = bytearray(data)
f.close()
data_out = []
for num in data:
aux = int(binascii.hexlify(num), 16)
data_out.append(aux)
#print aux
myarray = np.asarray(data_out)
return myarray
def rawConversionNew(filename):
# reads data from a binary file with tupe uint
# f = open(filename, 'rb')
# Rawdata = np.fromfile(f, dtype=np.uint8)
# f.close()
Rawdata = readDatFile(filename)
## gets the image
Data1d = 256*Rawdata[1::2] + Rawdata[0::2]
Data2d = Data1d.reshape(1792,4127)
Data2d = Data2d.T
Data2d = np.vstack([Data2d,np.zeros((1,1792),dtype=np.uint16)] )
Data3d = Data2d.reshape(129,32,1792)
Data3d = Data3d[0:128,:,:]
#plt.figure()
#plt.plot(np.arange(Data3d.shape[0]),Data3d[:,1,1])
#print (Data3d[:,0,0])
CMVimage = Data3d.reshape(4096,1792).T
return CMVimage
There were in fact two errors, not labeling the file as binary ("rb") and the reshape, which is done in a different way in Matlab and numpy.
If the reshape is done using reshape(dim1,dim2,order='F') the results are the same. Check: Matlab vs Python: Reshape

Cannot load an array with numpy load()

I cannot load an array from a binary file. What am I doing wrong?
pic = imread('headey-640.bmp')
save('test.in.npy', pic)
f = open('test.in.npy','r')
A = load(f)
---------------------------------------------------------------------------
ValueError: total size of new array must be unchanged
You have to open your file in binary mode:
import numpy as np
x = np.array([1,2,3])
np.save("test.npy", x)
with open("test.npy", "rb") as npy:
a = np.load(npy)

Categories