How to convert Image files to CSV with label - python

Test folder has folders named from 0 to 9. The 0-9 folders include respective handwritten digit images. I want to convert the images to a single test.csv file such that the first column gives the label of the digit (i.e 0-9) and the rest columns give the pixel value if image.
I created the csv but the first column for the label is being shown empty.
from scipy.misc import imread
import numpy as np
import pandas as pd
import os
import imageio
import glob
root = './test'
# go through each directory in the root folder given above
for directory, subdirectories, files in os.walk(root):
# go through each file in that directory
for file in files:
# read the image file and extract its pixels
im = imread(os.path.join(directory,file))
value = im.flatten()
value = np.hstack((directory[8:],value))
df = pd.DataFrame(value).T
df = df.sample(frac=1) # shuffle the dataset
with open('test.csv', 'a') as dataset:
df.to_csv(dataset, header=False, index=False)

from scipy.misc import imread
import numpy as np
import pandas as pd
import os
import imageio
import glob
import pathlib
v = []
for i,files in enumerate(pathlib.Path('./Train').glob('*/**/*.png')):
im = imread(files.as_posix())
value = im.flatten()
value = np.hstack((int(files.parent.name),value))
v.append(value)
df = pd.DataFrame(v)
df = df.sample(frac=1)
df.to_csv('train.csv',header=False,index=False)
This is how I corrected my code.

Related

Python : Get features of several images

I would like to get the feature of a several images located in the same folder.
My codes are as follow - Prerequisites (librairies needed):
import numpy as np
from PIL import Image
import glob
import cv2
import os
Definition of folder where are located the images (around 6000)
images_dir = "TrainImages"
Creation of a function that defines the different variables et compute them
def get_data_from_image(image_path):
cv_img = cv2.imread(image_path)
(means, stds) = cv2.meanStdDev(cv_img)
stats = np.concatenate([means, stds]).flatten()
image_features_list = [stats.tolist()]
return image_features_list
Creation of a variable that scans and analyses the images
image_files = [x.path for x in os.scandir(images_dir)]
Creation of a loop function
i = 0
mylist =[]
for i in range (4): # I test only 4 images, could be more
mylist.append((get_data_from_image(image_files[i])))
Running the stuff
image_features_list = get_data_from_image(image_files[i])
Look at the output
image_features_list
The output provides only the feature of one image, instead of all images located in the folder
[Out]:
[[114.31548828125001,
139.148388671875,
139.57832682291667,
50.54138521536725,
53.82290182999255,
51.946187641459595]]
I would be grateful if I could have a solution on how to have the features of all images (not only one). At this effect, do not hesitate to correct the code.
Thanks and kindest regards
After some commments from friendly persons, here is an additional information for those who would be interested by the response : The output to look at is mylist.
mylist
[Out]:
[[[144.28788548752834,
151.28145691609978,
148.6195351473923,
51.50620316379085,
53.36979275398226,
52.2493589172815]],
[[56.220865079365076,
59.99653968253968,
60.28386507936508,
66.72797279655177,
65.24673515467009,
64.93141350917332]],
[[125.2066064453125,
118.1168994140625,
145.0827685546875,
68.95463582009148,
52.65138276425348,
56.68269683130363]],
[[114.31548828125001,
139.148388671875,
139.57832682291667,
50.54138521536725,
53.82290182999255,
51.946187641459595]]]
Thanks for your help. It is a great forum here !
Try this approach and tell me if its successful
import os, os.path
import numpy as np
from PIL import Image
import cv2
def get_data_from_image(image_path):
cv_img = cv2.imread(image_path)
(means, stds) = cv2.meanStdDev(cv_img)
stats = np.concatenate([means, stds]).flatten()
image_features_list = [stats.tolist()]
return image_features_list
images_dir = 'C:\\Users\\User\\Directory\\TrainImages\\'
images_names = []
with os.scandir(images_dir) as dirs:
for entry in dirs:
images_names.append(entry.name)
for image in images_names:
path = images_dir + image
image_features_list = get_data_from_image(path))
print(image_features_list)

From numpy array to DICOM

My code reads a DICOM file, takes the pixel information to a numpy array then it modifies the numpy array. It uses lists because im trying to operate with multiple DICOM files at the same time.
I havent found any information on how to take my modified numpy array and make it a DICOM file again so i can use it outside Python.
#IMPORT
import cv2
import numpy as np
from matplotlib import pyplot as plt
import matplotlib.pyplot as plt
import SimpleITK as sitk
from glob import glob
import pydicom as dicom
data_path = "C:\\Users\\oliva\\Desktop\\Py tesis\\dicom\\"
output_path = working_path = "C:\\Users\\oliva\\Desktop\\Py tesis\\dicom1\\"
path = glob(data_path + '/*.dcm')
#Checks if we are in the correct path
print ("Total of %d DICOM images.\nFirst 5 filenames:" % len(path))
print ('\n'.join(path[:14]))
data_set = []
for element in path:
imagen=sitk.ReadImage(element)
#imagen = cv2.imread(element)
array_imagen = sitk.GetArrayViewFromImage(imagen)
array2_imagen=array_imagen[0]
imagen_array_norm = np.uint8(cv2.normalize(array2_imagen, None, 0, 255, cv2.NORM_MINMAX))
data_set.append(imagen_array_norm)
#Check
print(len(data_set))
print(type(data_set[1]))
plt.imshow(data_set[4], cmap=plt.cm.gray)
#Equalization
data_set_eq = equal(data_set)
print(len(data_set_eq))
print(type(data_set_eq[6]))
plt.imshow(data_set_eq[7], cmap=plt.cm.gray)
#Filtering
data_set_m = median(data_set)
print(len(data_set_m))
print(type(data_set_m[6]))
plt.imshow(data_set_m[8], cmap=plt.cm.gray)
#Functions
def equal(data):
data_set_eq = []
for element in data_set:
imagen_array_eq = cv2.equalizeHist(element)
data_set_eq.append(imagen_array_eq)
return data_set_eq
def median(data):
data_set_m = []
for element in data_set:
imagen_array_m =cv2.medianBlur(element,5)
data_set_m.append(imagen_array_m)
return data_set_m
I would like some enlightenment on how to produce a DICOM file from my modified numpy array.
You can convert the numpy array back to a SimpleITK image, and then write it out as Dicom. The code would look something like this:
for x in data_set:
img = sitk.GetImageFromArray(x)
sitk.WriteImage(img, "your_image_name_here.dcm")
From the file name suffix, SimpleITK knows to write Dicom.
Note that the filtering you are doing can be accomplished within SimpleITK. You don't really need to use OpenCV. Check out the following filters in SimpleITK: IntensityWindowingImageFilter, AdaptiveHistogramEqualizationFilter, and MedianImageFilter.
https://itk.org/SimpleITKDoxygen/html/classitk_1_1simple_1_1IntensityWindowingImageFilter.html
https://itk.org/SimpleITKDoxygen/html/classitk_1_1simple_1_1AdaptiveHistogramEqualizationImageFilter.html
https://itk.org/SimpleITKDoxygen/html/classitk_1_1simple_1_1MedianImageFilter.html

Pytorch——TypeError: Cannot handle this data type

I'm doing an image processing task and I want to use torch.cat to concat pictures belonging to two different folders. The size of the images in folder 1 is 224*224*3, and the size of the images in folder 2 is 224*224*1.After that, I want to save the generated image.I used the save function, but I got an error, the error code is "TypeError: Cannot handle this data type".Please help me find the solution.
Here is my code:
import glob
import os
import torch
import torchvision.transforms.functional as TF
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import scipy.misc
f1_folder = 'F:\\picture1'
f2_folder = 'F:\\picture2'
f1_images = glob.glob(os.path.join(f1_folder, '*.jpg'))
f2_images = glob.glob(os.path.join(f2_folder, '*.jpg'))
for f1_img, f2_img in zip(f1_images, f2_images):
img1 = Image.open(f1_img)
img2 = Image.open(f2_img)
cat_img = torch.cat((TF.to_tensor(img1), TF.to_tensor(img2)), dim=1)# for 'CHW'
cat_img = cat_img.numpy()
cat_img = np.transpose(cat_img, (1, 2, 0))
im = Image.fromarray(cat_img)
im.save("file.jpeg")

Convert .png file to pandas DataFrame

I have a 512x512 .png radar image name png_image
I also have a csv rainfall_colour_table with the following headers B, G, R, rainfall_mm, that I can use to link colour values to rainfall intensity values.
What is the best way to read png_image and convert it to a 512x512 Pandas DataFrame where the values are rainfall_mm from rainfall_colour_table.
I have used the following approach.
from urllib.request import urlopen, Request
import cv2
import os
import numpy as np
import pandas as pd
# NOTE: the ftp sourve of the png changes every 30 min
# get any current .png file name from the website ftp://ftp.bom.gov.au/anon/gen/radar//
# and set the below variable
file_name =
# get connection to file
input_url = "ftp://ftp.bom.gov.au/anon/gen/radar//" + file_name
req = Request(input_url)
req_html = urlopen(req).read()
# read file
radar_image = np.fromstring(req_html, np.uint8) # read byte image
radar_image = cv2.imdecode(radar_image, cv2.IMREAD_COLOR) # convert to numppy array
# OS agnostic relative file path
# get the current directory path
base_dir = os.path.dirname(__file__)
# OS agnostic relative file path
# load colour to mm/hr concurrency table
rainfall_colour_table = os.path.join(os.sep, base_dir, 'sample_data', 'radar_colours.csv')
rainfall_colour_df = pd.read_csv(rainfall_colour_table)
rainfall_colour_df.set_index(['B', 'G', 'R'], inplace=True)
# switch colours with rain intensity
radar_df = pd.DataFrame(rainfall_colour_df.loc[list(map(tuple, pin))].rainfall.values for pin in radar_image)
radar_df.columns = ['pixel_col_' + str(col) for col in radar_df.columns]
The rainfall_colour_table.csv
colour_id,rainfall,B,G,R
2,1.5,255,180,180
3,2.5,255,120,120
4,4,255,20,20
5,6,195,216,0
6,10,144,150,0
7,15,102,102,0
8,20,0,255,255
9,35,0,200,255
10,50,0,150,255
11,80,0,100,255
12,120,0,0,255
13,200,0,0,200
14,300,0,0,120
15,360,0,0,40

pandas.DataFrame returns Series not a Dataframe

I am working with a series of images. I read them first and store in the list then I convert them to dataframe and finally I would like to implement Isomap. When I read images (I have 84 of them) I get 84x2303 dataframe of objects. Now each object by itself also looks like a dataframe. I am wondering how to convert all of it to_numeric so I can use Isomap on it and then plot it.
Here is my code:
import pandas as pd
from scipy import misc
from mpl_toolkits.mplot3d import Axes3D
import matplotlib
import matplotlib.pyplot as plt
import glob
from sklearn import manifold
samples = []
path = 'Datasets/ALOI/32/*.png'
files = glob.glob(path)
for name in files:
img = misc.imread(name)
img = img[::2, ::2]
x = (img/255.0).reshape(-1,3)
samples.append(x)
df = pd.DataFrame.from_records(samples)
print df.dtypes
print df.shape
Thanks!

Categories