Convert .mat file to image in google colab - python

I'm currently trying to converting the images from a .mat file to .jpg file downloaded from this site- BrainTumorDataset. All the files contained in the directory are .mat files, now I want to convert all the files in .jpg format via python for making a project: Brain Tumor Classification via CNN. I searched in StackOverflow and got a answer. But I cannot do using their code in the answer part. Here I used the code-
from os import path
import os
from matplotlib import pyplot as plt
import numpy as np
import h5py
from PIL import Image
import re
import sys
from glob import glob
dir_path = path.dirname(path.abspath('/content/drive/MyDrive/Colab Notebooks/dataset/brain tumor/New folder'))
path_to_mat_files = path.join(dir_path, "*.mat")
found_files = glob(path_to_mat_files, recursive=True)
total_files = 0
def convert_to_png(file: str, number: int):
global total_files
if path.exists(file):
print(file, "already exist\nSkipping...")
else:
h5_file = h5py.File(file, 'r')
png = file[:-3] + "png"
cjdata = h5_file['cjdata']
image = np.array(cjdata.get('image')).astype(np.float64)
label = cjdata.get('label')[0,0]
PID = cjdata.get('PID')
PID = ''.join(chr(c) for c in PID)
tumorBorder = np.array(cjdata.get('tumorBorder'))[0]
tumorMask = np.array(cjdata.get('tumorMask'))
h5_file.close()
hi = np.max(image)
lo = np.min(image)
image = (((image - lo)/(hi-lo))*255).astype(np.uint8)
im = Image.fromarray(image)
im.save(png)
os.system(f"mv {png} {dir_path}\\png_images")#make sure folder png_images exist
total_files += 1
print("saving", png, "File No: ", number)
for file in found_files:
if "cvind.mat" in file:
continue
convert_to_png(file, total_files)
print("Finished converting all files: ", total_files)
and I got the msg :
/content/drive/MyDrive/Colab Notebooks/dataset/brain tumor/cvind (1).mat already exist
Skipping...
Finished converting all files: 0
Here is my colab notebook link. and here is the dataset link in my google drive. I want to convert .mat images in .jpg format. How can I do this? What should I do to convert .mat images in .jpg format?

Related

Python, converting *.dcm files to *.png (dicom)

I am trying to convert *.dcm file to *.jpg file.
while I do that
I am using this code :
import os
import numpy as np
import pydicom
from PIL import Image
def get_names_of_imgs_inside_folder(directory):
names = []
for root, dirnames, filenames in os.walk(directory):
for filename in filenames:
_, ext = os.path.splitext(filename)
if ext in [".dcm"]:
names.append(filename)
return names
def convert(directory):
im = pydicom.dcmread('Database/'+directory)
im = im.pixel_array.astype(float)
rescaled_image = (np.maximum(im,0)/im.max())*255 # float pixels
final_image = np.uint8(rescaled_image) # integers pixels
final_image = Image.fromarray(final_image)
return final_image
names = get_names_of_imgs_inside_folder('Database')
for name in names:
image = convert(name)
image.save(name+'.jpg')
but at line 20 I get this error :
File is missing DICOM File Meta Information header or the 'DICM'
prefix is missing from the header. Use force=True to force reading.
How can I fix this problem .Can anybody help me please

How would I read a folder of images on my computer into a dataframe on Jupyter Notebooks?

This is my code right now, I have used a for loop to go through all the images in the folder and use the PIL library to read it into an array.
import cv2
import os
folder = '/Users/x/x/x/x'
def load_images_from_folder(folder):
images = []
for filename in os.listdir(folder):
img = cv2.imread(os.path.join(folder,filename))
if img is not None:
images.append(img)
return images
img = Image.fromarray(images_array,'RGB', dtype = object)

How to continuously update video from images that keep coming in python?

I am working on a robotics project in which a robot creates images in .pgm format (not camera images) continuously which I'm converting to .jpeg and saving to a directory. However, this directory keeps updating as more images keep getting added while I want to convert these images continuously in a video. I can already create a video from the images saved in one instance but what I want is to update the video continuously as a single file (streaming onto a URL) while images keep getting saved. Here is my code but it doesn't work in parallel for images as well as for videos. Anyone can help?
from PIL import Image
import os, glob
import cv2
import numpy as np
from os.path import isfile, join
# remove .yaml files
directory = "/home/user/catkin_ws/src/ros_map/map_pgms"
files_in_directory = os.listdir(directory)
filtered_files = [file for file in files_in_directory if file.endswith(".yaml")]
for file in filtered_files:
path_to_file = os.path.join(directory, file)
os.remove(path_to_file)
def batch_image(in_dir, out_dir):
if not os.path.exists(out_dir):
print(out_dir, 'is not existed.')
os.mkdir(out_dir)
if not os.path.exists(in_dir):
print(in_dir, 'is not existed.')
return -1
count = 0
for files in glob.glob(in_dir + '/*'):
filepath, filename = os.path.split(files)
out_file = filename[0:9] + '.jpg'
# print(filepath,',',filename, ',', out_file)
im = Image.open(files)
new_path = os.path.join(out_dir, out_file)
print(count, ',', new_path)
count = count + 1
im.save(os.path.join(out_dir, out_file))
if __name__ == '__main__':
batch_image('/home/user/catkin_ws/src/ros_map/map_pgms', './batch')
# convert the .jpgs to video
img_array = []
for filename in glob.glob('./batch/*.jpg'):
img = cv2.imread(filename)
height, width, layers = img.shape
size = (width, height)
img_array.append(img)
out = cv2.VideoWriter('project.avi', cv2.VideoWriter_fourcc(*'DIVX'), 15, size)
for i in range(len(img_array)):
out.write(img_array[i])
out.release()

PIL Image Open is not able to open some files in a zip folder

I have about 300000 image files in a zip folder. Some of those files have path starting with '__'. PIL function Image.Open() is not able to open these files. Please suggest a way to open them. My code below:
import pandas as pd
import numpy as np
from zipfile import ZipFile
from io import BytesIO
from PIL import Image
from PIL import UnidentifiedImageError
problem_files = []
file_paths = []
img_list = []
img_size = (128,128)
with ZipFile('/XXX/YYY/ZZZ/AI_ML/Project2/words.zip') as myzip:
contents = myzip.namelist()
for i in range(0,len(contents)-1):
text = str(contents[i])
if '.png' in text:
file_paths.append(contents[i])
for path in file_paths:
img = myzip.read(path)
try:
img_data = Image.open(BytesIO(img))
except UnidentifiedImageError:
problem_files.append(path)
img_data = img_data.convert('L')
img_data = img_data.resize(img_size)
image_as_array = np.array(img_data, np.uint8)
image_as_array = np.reshape(image_as_array,(1,-1))
img_list.append(image_as_array)
This puts all the files with path starting with '__' into problem_files list
problem_files[-10:]
['__MACOSX/words/j04/j04-070/._j04-070-08-07.png',
'__MACOSX/words/j04/j04-070/._j04-070-04-07.png',
'__MACOSX/words/j04/j04-070/._j04-070-04-06.png',
'__MACOSX/words/j04/j04-070/._j04-070-08-06.png',
'__MACOSX/words/j04/j04-070/._j04-070-06-03.png',
'__MACOSX/words/j04/j04-070/._j04-070-06-01.png',
'__MACOSX/words/j04/j04-070/._j04-070-08-04.png',
'__MACOSX/words/j04/j04-070/._j04-070-04-04.png',
'__MACOSX/words/j04/j04-070/._j04-070-04-05.png',
'__MACOSX/words/j04/j04-070/._j04-070-08-05.png']
There are about 100000 images in problem_files list

Cache error while doing OCR on a directory of pdf's in python

I am trying to OCR an entire directory of pdf files using pytesseract and imagemagick but the issue is that imagemagick is consuming all my Temp folder space and finally I'm getting a cache error i.e "CacheError: unable to extend cache 'C:/Users/Azu/AppData/Local/Temp/magick-18244WfgPyAToCsau11': No space left on device # error/cache.c/OpenPixelCache/3883" I have also written a code to delete the temp folder content once OCR'd but still facing the same issue.
Here's the code till now:
import io
import os
import glob
from PIL import Image
import pytesseract
from wand.image import Image as wi
files = glob.glob(r"D:\files\**")
tempdir = r"C:\Users\Azu\AppData\Local\Temp"
filesall = os.listdir(tempdir)
for file in files:
name = os.path.basename(file).split('.')[0]
#print(file)
pdf = wi(filename = file, resolution = 300)
pdfImg = pdf.convert('jpeg')
imgBlobs = []
for img in pdfImg.sequence:
page = wi(image = img)
imgBlobs.append(page.make_blob('jpeg'))
extracted_texts = []
for imgBlob in imgBlobs:
im = Image.open(io.BytesIO(imgBlob))
text = pytesseract.image_to_string(im, lang = 'eng')
extracted_texts.append(text)
with open("D:\\extracted_text\\"+ name + ".txt", 'w') as f:
f.write(str(extracted_texts))
for ifile in filesall:
if "magick" in ifile:
os.remove(os.path.join(tempdir,ifile))

Categories