I am trying to convert *.dcm file to *.jpg file.
while I do that
I am using this code :
import os
import numpy as np
import pydicom
from PIL import Image
def get_names_of_imgs_inside_folder(directory):
names = []
for root, dirnames, filenames in os.walk(directory):
for filename in filenames:
_, ext = os.path.splitext(filename)
if ext in [".dcm"]:
names.append(filename)
return names
def convert(directory):
im = pydicom.dcmread('Database/'+directory)
im = im.pixel_array.astype(float)
rescaled_image = (np.maximum(im,0)/im.max())*255 # float pixels
final_image = np.uint8(rescaled_image) # integers pixels
final_image = Image.fromarray(final_image)
return final_image
names = get_names_of_imgs_inside_folder('Database')
for name in names:
image = convert(name)
image.save(name+'.jpg')
but at line 20 I get this error :
File is missing DICOM File Meta Information header or the 'DICM'
prefix is missing from the header. Use force=True to force reading.
How can I fix this problem .Can anybody help me please
Related
I have about 300000 image files in a zip folder. Some of those files have path starting with '__'. PIL function Image.Open() is not able to open these files. Please suggest a way to open them. My code below:
import pandas as pd
import numpy as np
from zipfile import ZipFile
from io import BytesIO
from PIL import Image
from PIL import UnidentifiedImageError
problem_files = []
file_paths = []
img_list = []
img_size = (128,128)
with ZipFile('/XXX/YYY/ZZZ/AI_ML/Project2/words.zip') as myzip:
contents = myzip.namelist()
for i in range(0,len(contents)-1):
text = str(contents[i])
if '.png' in text:
file_paths.append(contents[i])
for path in file_paths:
img = myzip.read(path)
try:
img_data = Image.open(BytesIO(img))
except UnidentifiedImageError:
problem_files.append(path)
img_data = img_data.convert('L')
img_data = img_data.resize(img_size)
image_as_array = np.array(img_data, np.uint8)
image_as_array = np.reshape(image_as_array,(1,-1))
img_list.append(image_as_array)
This puts all the files with path starting with '__' into problem_files list
problem_files[-10:]
['__MACOSX/words/j04/j04-070/._j04-070-08-07.png',
'__MACOSX/words/j04/j04-070/._j04-070-04-07.png',
'__MACOSX/words/j04/j04-070/._j04-070-04-06.png',
'__MACOSX/words/j04/j04-070/._j04-070-08-06.png',
'__MACOSX/words/j04/j04-070/._j04-070-06-03.png',
'__MACOSX/words/j04/j04-070/._j04-070-06-01.png',
'__MACOSX/words/j04/j04-070/._j04-070-08-04.png',
'__MACOSX/words/j04/j04-070/._j04-070-04-04.png',
'__MACOSX/words/j04/j04-070/._j04-070-04-05.png',
'__MACOSX/words/j04/j04-070/._j04-070-08-05.png']
There are about 100000 images in problem_files list
I'm in an introductory neural networking class so mind my ignorance. Also my first SO post.
I'm trying to resize some very highly resolved images within a dataset into 80x80p grayscale images in a new dataset. However, when I do this, I'd like to keep the filenames of each new image the same as the original image. The only way I know how to resave images into a new file is through a str(count) which isn't what I want. The filenames are important in creating a .csv file for my dataset later.
The only SO post I can find that is related is this:
Use original file name to save image
But the code suggested there didn't work - wasn't sure if I was going about it the wrong way.
import os
from PIL import Image
import imghdr
count=0
path1 = "/Users/..."
path2 = "/Users/..."
listing = os.listdir(path1)
for file in listing:
type = imghdr.what((path1 + file))
if type == "jpeg":
img = Image.open("/Users/..." +file).convert('LA')
img_resized = img.resize((80,80))
img_resized.save(path2 + str(count) + '.png')
count +=1
pass
pass
Reuse the original filename that you get from the for loop i.e. file
and, split it into filename and extension using os.path.splitext() like below:
import os
from PIL import Image
import imghdr
count=0
path1 = "/Users/..."
path2 = "/Users/..."
listing = os.listdir(path1)
for file in listing:
type = imghdr.what((path1 + file))
if type == "jpeg":
img = Image.open("/Users/..." +file).convert('LA')
img_resized = img.resize((80,80))
# splitting the original filename to remove extension
img_filename = os.path.splitext(file)[0]
img_resized.save(path2 + img_filename + '.png')
count +=1
pass
Another option, we can use python str's built-in split method to split the original filename by . and discard the extension.
import os
from PIL import Image
import imghdr
count=0
path1 = "/Users/..."
path2 = "/Users/..."
listing = os.listdir(path1)
for file in listing:
type = imghdr.what((path1 + file))
if type == "jpeg":
img = Image.open("/Users/..." +file).convert('LA')
img_resized = img.resize((80,80))
# splitting the original filename to remove extension
img_filename = file.split(".")[0]
img_resized.save(path2 + img_filename + '.png')
count +=1
pass
So, if an image has a name such as some_image.jpeg then, the img_filename will have a value some_image as we splitted by . and discarded .jpeg part of it.
NOTE: This option assumes the original_filename will not contain any . other than the extension.
I assume that image name is on path1. If so you can grap image name from there in this way:
x=path1.rsplit('/',1)[1]
We are splitting path1 on last slash and taking image name string via indexing.
In my file, I have a large number of images in jpg format and they are named [fruit type].[index].jpg.
Instead of manually making three new sub folders to copy and paste the images into each sub folder, is there some python code that can parse through the name of the images and choose where to redirect the image to based on the fruit type in the name, at the same time create a new sub folder when a new fruit type is parsed?
Before
TrainingSet (file)
apple.100.jpg
apple.101.jpg
apple.102.jpg
apple.103.jpg
peach.100.jpg
peach.101.jpg
peach.102.jpg
orange.100.jpg
orange.101.jpg
After
TrainingSet (file)
apple(file)
apple.100.jpg
apple.101.jpg
apple.102.jpg
apple.103.jpg
peach(file)
peach.100.jpg
peach.101.jpg
peach.102.jpg
orange(file)
orange.100.jpg
orange.101.jpg
Here’s the code to do just that, if you need help merging this into your codebase let me know:
import os, os.path, shutil
folder_path = "test"
images = [f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))]
for image in images:
folder_name = image.split('.')[0]
new_path = os.path.join(folder_path, folder_name)
if not os.path.exists(new_path):
os.makedirs(new_path)
old_image_path = os.path.join(folder_path, image)
new_image_path = os.path.join(new_path, image)
shutil.move(old_image_path, new_image_path)
If they are all formatted similarly to the three fruit example you gave, you can simply do a string.split(".")[0] on each filename you encounter:
import os
for image in images:
fruit = image.split(".")[0]
if not os.path.isdir(fruit):
os.mkdir(fruit)
os.rename(os.path.join(fruit, image))
As an idea, hope it helps
import os
from pathlib import Path
import shutil
folder_path = "images/"
nameList=[]
for image in os.listdir(folder_paths):
folder_name = image.split('.')[0]
nameList.append(folder_name)
for f in os.listdir(folder_paths):
Path(folder_name).mkdir(parents=True, exist_ok=True,mode=0o755)
des = folder_name +"/"+str(f)
old_path = folder_paths+str(f)
for path in nameList:
if f.endswith('.jpg'):
print(f)
if path == folder_name:
shutil.move(old_path, str(des))
I have converted my image into a csv file and it's like a matrix but I want it to be a single row.
How can I convert all of the images in dataset into a csv file (each image into one line).
Here's the code I've used:
from PIL import Image
import numpy as np
import os, os.path, time
format='.jpg'
myDir = "Lotus1"
def createFileList(myDir, format='.jpg'):
fileList = []
print(myDir)
for root, dirs, files in os.walk(myDir, topdown=False):
for name in files:
if name.endswith(format):
fullName = os.path.join(root, name)
fileList.append(fullName)
return fileList
fileList = createFileList(myDir)
fileFormat='.jpg'
for fileFormat in fileList:
format = '.jpg'
# get original image parameters...
width, height = fileList.size
format = fileList.format
mode = fileList.mode
# Make image Greyscale
img_grey = fileList.convert('L')
# Save Greyscale values
value = np.asarray(fileList.getdata(),dtype=np.float64).reshape((fileList.size[1],fileList.size[0]))
np.savetxt("img_pixels.csv", value, delimiter=',')
input :
http://uupload.ir/files/pto0_lotus1_1.jpg
output:http://uupload.ir/files/huwh_output.png
From your question, I think you want to know about numpy.flatten(). You want to add
value = value.flatten()
right before your np.savetxt call. It will flatten the array to only one dimension and it should then print out as a single line.
The rest of your question is unclear bit it implies you have a directory full of jpeg images and you want a way to read through them all. So first, get a file list:
def createFileList(myDir, format='.jpg'):
fileList = []
print(myDir)
for root, dirs, files in os.walk(myDir, topdown=False):
for name in files:
if name.endswith(format):
fullName = os.path.join(root, name)
fileList.append(fullName)
return fileList
The surround your code with a for fileName in fileList:
Edited to add complete example
Note that I've used csv writer and changed your float64 to ints (which should be ok as pixel data is 0-255
from PIL import Image
import numpy as np
import sys
import os
import csv
#Useful function
def createFileList(myDir, format='.jpg'):
fileList = []
print(myDir)
for root, dirs, files in os.walk(myDir, topdown=False):
for name in files:
if name.endswith(format):
fullName = os.path.join(root, name)
fileList.append(fullName)
return fileList
# load the original image
myFileList = createFileList('path/to/directory/')
for file in myFileList:
print(file)
img_file = Image.open(file)
# img_file.show()
# get original image parameters...
width, height = img_file.size
format = img_file.format
mode = img_file.mode
# Make image Greyscale
img_grey = img_file.convert('L')
#img_grey.save('result.png')
#img_grey.show()
# Save Greyscale values
value = np.asarray(img_grey.getdata(), dtype=np.int).reshape((img_grey.size[1], img_grey.size[0]))
value = value.flatten()
print(value)
with open("img_pixels.csv", 'a') as f:
writer = csv.writer(f)
writer.writerow(value)
How about you convert your images to 2D numpy arrays and then write them as txt files with .csv extensions and , as delimiters?
Maybe you could use a code like following:
np.savetxt('np.csv', image, delimiter=',')
import numpy as np
import cv2
import os
IMG_DIR = '/home/kushal/Documents/opencv_tutorials/image_reading/dataset'
for img in os.listdir(IMG_DIR):
img_array = cv2.imread(os.path.join(IMG_DIR,img), cv2.IMREAD_GRAYSCALE)
img_array = (img_array.flatten())
img_array = img_array.reshape(-1, 1).T
print(img_array)
with open('output.csv', 'ab') as f:
np.savetxt(f, img_array, delimiter=",")
import os
import pandas as pd
path = 'path-to-the-folder'
os.chdir(path)
lists = os.listdir(path)
labels = []
file_lst = []
for folder in lists:
files = os.listdir(path +"/"+folder)
for file in files:
path_file = path + "/" + folder + "/" + file
file_lst.append(path_file)
labels.append(folder)
dictP_n = {"path": file_lst,
"label_name": labels,
"label": labels}
data = pd.DataFrame(dictP_n, index = None)
data = data.sample(frac=1)
data['label'] = data['label'].replace({"class1": 0, "class2": 1 })
data.to_csv("path-to-save-location//file_name.csv", index =None)
files2 = [f for f in listdir(dstpath) if isfile(join(dstpath,f))]
for image in files2:
img = cv2.imread(os.path.join(dstpath,image))
equ = cv2.equalizeHist(img)
dstPath2 = join(dstpath,image)
cv2.imwrite(dstPath2,equ)
I have a folder consisting of grayscale images in jpg format but when I run my above code for Histogram equalization it gives me the above mentioned error. Pls help
imread load image in color mode by default. Try to use img = cv2.imread(your_image_path,cv2.IMREAD_GRAYSCALE) instead
#author: Quantum
"""
import cv2
import os
from os import listdir,makedirs
from os.path import isfile,join
path = r'' # Source Folder
dstpath = r'' # Destination Folder
try:
makedirs(dstpath)
except:
print ("Directory already exist, images will be written in asme folder")
# Folder won't used
files = [f for f in listdir(path) if isfile(join(path,f))]
for image in files:
try:
img = cv2.imread(os.path.join(path,image),cv2.IMREAD_GRAYSCALE)
**imgnew=cv2.equalizeHist(img)**
dstPath = join(dstpath,image)
cv2.imwrite(dstPath,imgnew)
except:
print ("{} is not converted".format(image))
All I did was added the histeq function while my files are converted to grayscale