Python dict to csv - python

I have written a script to find image size and aspect ratio of all images in a directory along with their corresponding filepaths, I want to print dict values to csv file with following headers width,height,aspect-ratio and filepath
import os
import json
from PIL import Image
folder_images = "/home/user/Desktop/images"
size_images = dict()
def yocd(a,b):
if(b==0):
return a
else:
return yocd(b,a%b)
for dirpath, _, filenames in os.walk(folder_images):
for path_image in filenames:
if path_image.endswith(".png") or path_image.endswith('.jpg') or path_image.endswith('.JPG') or path_image.endswith('.jpeg'):
image = os.path.abspath(os.path.join(dirpath, path_image))
""" ImageFile.LOAD_TRUNCATED_IMAGES = True """
try:
with Image.open(image) as img:
img.LOAD_TRUNCATED_IMAGES = True
img.verify()
print('Valid image')
except Exception:
print('Invalid image')
img = False
if img is not False:
width, heigth = img.size
divisor = yocd(width, heigth)
w = str(int(width / divisor))
h = str(int(heigth / divisor))
aspectratio = w+':'+h
size_images[image] = {'width': width, 'heigth': heigth,'aspect-ratio':aspectratio,'filepath': image}
for k, v in size_images.items():
print(k, '-->', v)
with open('/home/user/Documents/imagesize.txt', 'w') as file:
file.write(json.dumps(size_images))```

You can add a (properly constructed) dict directly to a pandas.DataFrame. Then, DataFrames have a .to_csv() function.
Here are the docs:
Pandas: Create a DataFrame
Pandas: Write to CSV

Without dependencies (but you may have to tweak the formatting)
csv_sep = ';' # choose here wich field separatar you want
with open('your_csv', 'w') as f:
# header
f.write("width"+csv_sep"+height"+csv_sep"+aspect-ratio"+csv_sep+"filepath\n")
# data
for img in size_images:
fields = [img['width'], img['height'], img['aspect-ratio'], img['filepath']]
f.write(csv_sep.join(fields)+'\n')

Related

Want to get image size in kilobytes using python [duplicate]

This question already has answers here:
How to get image size (bytes) using PIL
(5 answers)
Closed 8 months ago.
I am trying to get the image name, resolution, and size of the image from the directory. I got image name, resolution and size but image size is not in kilobytes it is getting like pixels. so please suggest me how to get the image size with the python script.
# Required Libraries
from os import listdir
from os.path import isfile, join
from pathlib import Path
import numpy
import cv2
import argparse
import numpy
import csv
from PIL import Image
# Check whether the CSV
# exists or not if not then create one.
my_file = Path("csv/details.csv")
if my_file.is_file():
f = open(my_file, "w+")
with open('csv/details.csv', 'a', newline='') as file:
writer = csv.writer(file)
writer.writerow(["S.No.", "Name", "Resolution", "Size"
])
f.close()
pass
else:
with open('csv/details.csv', 'w', newline = '') as file:
writer = csv.writer(file)
writer.writerow(["S.No.", "Name", "Resolution", "Size"
])
# Argparse function to get
# the path of the image directory
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image",
required = True,
help = "Path to folder")
args = vars(ap.parse_args())
# Program to find the
# colors and embed in the CSV
mypath = args["image"]
onlyfiles = [ f for f in listdir(mypath) if isfile(join(mypath,f)) ]
images = numpy.empty(len(onlyfiles), dtype = object)
for n in range(0, len(onlyfiles)):
path = join(mypath,onlyfiles[n])
images[n] = cv2.imread(join(mypath,onlyfiles[n]),
cv2.IMREAD_UNCHANGED)
img = cv2.imread(path)
h,w,c = img.shape
resolution = f"{h} X {w}"
size = img.size
print(h, w)
avg_color_per_row = numpy.average(img, axis = 0)
avg_color = numpy.average(avg_color_per_row, axis = 0)
with open('csv/details.csv', 'a', newline = '') as file:
writer = csv.writer(file)
writer.writerow([n+1, onlyfiles[n], resolution, size
])
file.close()
and also please find below is the screenshot for your reference.
Hi you can use this inside your for loop
import os
file_size = os.path.getsize('d:/file.jpg')
print("File Size is :", file_size, "bytes")

CSV Class reader not getting actual image

I have a CSV file reader which reads a csv file with one column and obtains data about the image such as its label. After much debugging I have found my all data about the image is read but I'm missing the actual get image part, so currently, cv2 shows the input as a black box.
Im not sure what to do to achieve this, and need some assistance
My CSV Reader Below
def __init__(self, csvPath, imageHeight, imageWidth, transform = None):
"""
Arguments:
A CSV File Path
Path to Image Foldr
Extension of Images
PIL Transforms
"""
self.dataFromCSV = pD.read_csv(csvPath)
self.dataLabels = nP.asarray(self.dataFromCSV.iloc[:, 0])
self.imageHeight = imageHeight
self.imageWidth = imageWidth
self.transform = transform
def __getitem__(self, index):
singleImageLabel = self.dataLabels[index]
imagePath = singleImageLabel.split(";")[0]
print ('Path: ' + str(imagePath))
originalImage = cv2.imread(imagePath)
cv2.imshow('IMAGE', originalImage)
# Create an Empty Numpy Array to Fill
imageAsNumpy = nP.ones((32, 32), dtype = 'uint8')
# Fill the Numpy Array with Data from Pandas DF
for i in range(1):
rowPosition = (i-1) // self.imageHeight
columnPosition = (i-1) % self.imageWidth
indexFirst = self.dataFromCSV.iloc[index][i].split(";")[3]
indexLast = self.dataFromCSV.iloc[index][i].split(";")[6]
imageAsNumpy[rowPosition][columnPosition] = indexFirst + indexLast
print ('LABEL: ' + str(singleImageLabel))
cv2.imshow('INPUT',imageAsNumpy)
cv2.waitKey(0)
cv2.destroyAllWindows()
# Convert Image from Numpy Array to PIL Image, Mode 'L' is for Grayscale
imageAsImage = Image.fromarray(imageAsNumpy)
imageAsImage = imageAsImage.convert('1')
# Transform Image to Tensor
if self.transform is not None:
imageAsTensor = self.transform(imageAsImage)
# Transform Label to Tensor
labelAsLabel = int(singleImageLabel.split(";")[7])
labelAsTensor = torch.from_numpy(nP.array(labelAsLabel))
# print ('Target: ' + str(labelAsTensor))
# Return Image & the Label
return (imageAsTensor, labelAsLabel)
def __len__(self):
return len(self.dataFromCSV.index)

Converting images to csv file in python

I have converted my image into a csv file and it's like a matrix but I want it to be a single row.
How can I convert all of the images in dataset into a csv file (each image into one line).
Here's the code I've used:
from PIL import Image
import numpy as np
import os, os.path, time
format='.jpg'
myDir = "Lotus1"
def createFileList(myDir, format='.jpg'):
fileList = []
print(myDir)
for root, dirs, files in os.walk(myDir, topdown=False):
for name in files:
if name.endswith(format):
fullName = os.path.join(root, name)
fileList.append(fullName)
return fileList
fileList = createFileList(myDir)
fileFormat='.jpg'
for fileFormat in fileList:
format = '.jpg'
# get original image parameters...
width, height = fileList.size
format = fileList.format
mode = fileList.mode
# Make image Greyscale
img_grey = fileList.convert('L')
# Save Greyscale values
value = np.asarray(fileList.getdata(),dtype=np.float64).reshape((fileList.size[1],fileList.size[0]))
np.savetxt("img_pixels.csv", value, delimiter=',')
input :
http://uupload.ir/files/pto0_lotus1_1.jpg
output:http://uupload.ir/files/huwh_output.png
From your question, I think you want to know about numpy.flatten(). You want to add
value = value.flatten()
right before your np.savetxt call. It will flatten the array to only one dimension and it should then print out as a single line.
The rest of your question is unclear bit it implies you have a directory full of jpeg images and you want a way to read through them all. So first, get a file list:
def createFileList(myDir, format='.jpg'):
fileList = []
print(myDir)
for root, dirs, files in os.walk(myDir, topdown=False):
for name in files:
if name.endswith(format):
fullName = os.path.join(root, name)
fileList.append(fullName)
return fileList
The surround your code with a for fileName in fileList:
Edited to add complete example
Note that I've used csv writer and changed your float64 to ints (which should be ok as pixel data is 0-255
from PIL import Image
import numpy as np
import sys
import os
import csv
#Useful function
def createFileList(myDir, format='.jpg'):
fileList = []
print(myDir)
for root, dirs, files in os.walk(myDir, topdown=False):
for name in files:
if name.endswith(format):
fullName = os.path.join(root, name)
fileList.append(fullName)
return fileList
# load the original image
myFileList = createFileList('path/to/directory/')
for file in myFileList:
print(file)
img_file = Image.open(file)
# img_file.show()
# get original image parameters...
width, height = img_file.size
format = img_file.format
mode = img_file.mode
# Make image Greyscale
img_grey = img_file.convert('L')
#img_grey.save('result.png')
#img_grey.show()
# Save Greyscale values
value = np.asarray(img_grey.getdata(), dtype=np.int).reshape((img_grey.size[1], img_grey.size[0]))
value = value.flatten()
print(value)
with open("img_pixels.csv", 'a') as f:
writer = csv.writer(f)
writer.writerow(value)
How about you convert your images to 2D numpy arrays and then write them as txt files with .csv extensions and , as delimiters?
Maybe you could use a code like following:
np.savetxt('np.csv', image, delimiter=',')
import numpy as np
import cv2
import os
IMG_DIR = '/home/kushal/Documents/opencv_tutorials/image_reading/dataset'
for img in os.listdir(IMG_DIR):
img_array = cv2.imread(os.path.join(IMG_DIR,img), cv2.IMREAD_GRAYSCALE)
img_array = (img_array.flatten())
img_array = img_array.reshape(-1, 1).T
print(img_array)
with open('output.csv', 'ab') as f:
np.savetxt(f, img_array, delimiter=",")
import os
import pandas as pd
path = 'path-to-the-folder'
os.chdir(path)
lists = os.listdir(path)
labels = []
file_lst = []
for folder in lists:
files = os.listdir(path +"/"+folder)
for file in files:
path_file = path + "/" + folder + "/" + file
file_lst.append(path_file)
labels.append(folder)
dictP_n = {"path": file_lst,
"label_name": labels,
"label": labels}
data = pd.DataFrame(dictP_n, index = None)
data = data.sample(frac=1)
data['label'] = data['label'].replace({"class1": 0, "class2": 1 })
data.to_csv("path-to-save-location//file_name.csv", index =None)

How to encrypt images with python

Preface: this is required for a class, I know ECB should not be used.
I am trying to encrypt images using AES and then display the images
Steps needed:
Read the image,
Convert to byte object,
Pad the bytes,
Encrypt the bytes,
Convert back to image object,
Save as image file
This is my code right now:
from PIL import Image
from Crypto.Cipher import AES
from Crypto import Random
img = Image.open("photo.jpg")
img.tobytes()
key = '0123456789abcdef'
mode = AES.MODE_ECB
encryptor = AES.new(key, mode)
img.frombytes("RGB")
At this point I am stuck. I am getting a "not enough image data" error on the line "img.frombytes("RGB"), and am also stuck at the part to pad the bytes
So I needed a way to transfer files in the form of images (don't ask me why), if you just want to transfer text you can maybe create a txt file.
This is probably not exactly the best solution to the question as you probably want a way to hide data inside an existing image but I would like anyway to share the code in case it will help someone sometime somewhere.
Basically this will create an image with a size dependent on the file size and will put a sequence of 3 bytes in one pixel (RGB)
So I wrote a small folder2ImageEncoder.py
(it will encrypt all the data that is located in a folder named "files" by default)
from PIL import Image
from pathlib import Path
encryptedImagesFolder = 'encryptedImagesFolder'
Path(f"./{encryptedImagesFolder}").mkdir(parents=True, exist_ok=True)
newLine = b'\new\n\rL'
def encode_data_to_image(data: bytes, imagePath: str):
data += b'FINISH_OF_DATA'
data = str(
{
'path': imagePath,
'data': data
}
)
data = data.encode()
n = int((len(data)/3)**0.5) + 1
print(n, len(data))
img = Image.new('RGB', (n, n))
# data = img.getdata()
encryptedPixelsList = []
pixel = []
if len(data) % 3 != 0:
data += (3 - (len(data) % 3)) * b'\x00'
for i, Byte in enumerate(data):
if i % 3 == 2:
pixel.append(Byte)
encryptedPixelsList.append(tuple(pixel))
pixel = []
else:
pixel.append(Byte)
for _ in range(len(encryptedPixelsList), n**2):
encryptedPixelsList.append((0, 0, 0))
img.putdata(encryptedPixelsList)
imagePath = imagePath.replace('\\', '_')
img.save(f'./{encryptedImagesFolder}/{imagePath}.png')
# img.show()
def encode_folder(folder: Path):
for file in folder.iterdir():
if not file.is_dir():
with open(str(file), 'rb') as rb:
data = rb.readlines()
encode_data_to_image(
data=newLine.join(data),
imagePath=str(file))
else:
encode_folder(folder=file)
if __name__ == '__main__':
# ./files is the name of the folder you want to encrypt
encode_folder(folder=Path(r'./files'))
and a Image2FilesDecoder.py
this will iterate through the encrypted images folder and will retrieve its former form (run this in another folder with the encrypted images folder so it won't override the original files folder)
from PIL import Image
from pathlib import Path
newLine = b'\new\n\rL'
def decode_encrypted_images(folder: Path):
for pic in folder.iterdir():
img = Image.open(str(pic))
data = img.getdata()
totalData = []
for pixel in data:
totalData.extend(list(pixel))
decryptedData = bytes(totalData)
try:
decryptedData = eval(
decryptedData[:decryptedData.rfind(b'}')+1].decode())
except:
decryptedData.replace(b'\\', '')
decryptedData = eval(
decryptedData[:decryptedData.rfind(b'}')+1].decode())
decryptedData['data'] = decryptedData['data'][:-14]
filePathObj = Path(decryptedData['path'])
Path(filePathObj.parent).mkdir(
parents=True, exist_ok=True)
writeBytes = decryptedData['data'].split(newLine)
with open(str(filePathObj), 'wb') as wb:
wb.writelines(writeBytes)
if __name__ == '__main__':
decode_encrypted_images(folder=Path(
r".\encryptedImagesFolder"))

Python, ignore files with no Exif data

I am trying to do a mass extraction of gps exif data, my code below:
from PIL import Image
from PIL.ExifTags import TAGS, GPSTAGS
def get_exif_data(image):
exif_data = {}
info = image._getexif()
if info:
for tag, value in info.items():
decoded = TAGS.get(tag, tag)
if decoded == "GPSInfo":
gps_data = {}
for t in value:
sub_decoded = GPSTAGS.get(t, t)
gps_data[sub_decoded] = value[t]
exif_data[decoded] = gps_data
else:
exif_data[decoded] = value
return exif_data
def _get_if_exist(data, key):
if key in data:
return data[key]
else:
pass
def get_lat_lon(exif_data):
gps_info = exif_data["GPSInfo"]
lat = None
lon = None
if "GPSInfo" in exif_data:
gps_info = exif_data["GPSInfo"]
gps_latitude = _get_if_exist(gps_info, "GPSLatitude")
gps_latitude_ref = _get_if_exist(gps_info, "GPSLatitudeRef")
gps_longitude = _get_if_exist(gps_info, "GPSLongitude")
gps_longitude_ref = _get_if_exist(gps_info, "GPSLongitudeRef")
if gps_latitude and gps_latitude_ref and gps_longitude and gps_longitude_ref:
lat = _convert_to_degrees(gps_latitude)
if gps_latitude_ref != "N":
lat = 0 - lat
lon = _convert_to_degrees(gps_longitude)
if gps_longitude_ref != "E":
lon = 0 - lon
return lat, lon
Code source
Which is run like:
if __name__ == "__main__":
image = Image.open("photo directory")
exif_data = get_exif_data(image)
print(get_lat_lon(exif_data)
This works fine for one photo, so I've used glob to iterate over all photos in a file:
import glob
file_names = []
for name in glob.glob(photo directory):
file_names.append(name)
for item in file_names:
if __name__ == "__main__":
image = Image.open(item)
exif_data = get_exif_data(image)
print(get_lat_lon(exif_data))
else:
pass
Which works fine, as long as every photo in the file is a) an image and b) has gps data. I have tried adding a pass in the _get_if_exist function as well as my file iteration, however, neither same to have had any impact and I'm still receiving KeyError: 'GPSInfo'
Any ideas on how I can ignore photos with no data or different file types?
A possible approach would be writing a small helper function that first checks, if the file is actually an image file and as a second step checks if the image contains EXIF data.
def is_metadata_image(filename):
try:
image = Image.open(filename)
return 'exif' in image.info
except OSError:
return False
I found that PIL does not work every time with .png files that do contain EXIF information when using _getexif(). So instead I check for the key exif in the info dictionary of an image.
I've tried this source code.
Simply you need to remove
gps_info = exif_data["GPSInfo"]
from the first line of get_lat_lon(exif_data) function, it works well for me.

Categories