This question already has answers here:
How to get image size (bytes) using PIL
(5 answers)
Closed 8 months ago.
I am trying to get the image name, resolution, and size of the image from the directory. I got image name, resolution and size but image size is not in kilobytes it is getting like pixels. so please suggest me how to get the image size with the python script.
# Required Libraries
from os import listdir
from os.path import isfile, join
from pathlib import Path
import numpy
import cv2
import argparse
import numpy
import csv
from PIL import Image
# Check whether the CSV
# exists or not if not then create one.
my_file = Path("csv/details.csv")
if my_file.is_file():
f = open(my_file, "w+")
with open('csv/details.csv', 'a', newline='') as file:
writer = csv.writer(file)
writer.writerow(["S.No.", "Name", "Resolution", "Size"
])
f.close()
pass
else:
with open('csv/details.csv', 'w', newline = '') as file:
writer = csv.writer(file)
writer.writerow(["S.No.", "Name", "Resolution", "Size"
])
# Argparse function to get
# the path of the image directory
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image",
required = True,
help = "Path to folder")
args = vars(ap.parse_args())
# Program to find the
# colors and embed in the CSV
mypath = args["image"]
onlyfiles = [ f for f in listdir(mypath) if isfile(join(mypath,f)) ]
images = numpy.empty(len(onlyfiles), dtype = object)
for n in range(0, len(onlyfiles)):
path = join(mypath,onlyfiles[n])
images[n] = cv2.imread(join(mypath,onlyfiles[n]),
cv2.IMREAD_UNCHANGED)
img = cv2.imread(path)
h,w,c = img.shape
resolution = f"{h} X {w}"
size = img.size
print(h, w)
avg_color_per_row = numpy.average(img, axis = 0)
avg_color = numpy.average(avg_color_per_row, axis = 0)
with open('csv/details.csv', 'a', newline = '') as file:
writer = csv.writer(file)
writer.writerow([n+1, onlyfiles[n], resolution, size
])
file.close()
and also please find below is the screenshot for your reference.
Hi you can use this inside your for loop
import os
file_size = os.path.getsize('d:/file.jpg')
print("File Size is :", file_size, "bytes")
Related
im try to put my loop numpy data Modus_citra into csv file, but i've tried using numpy and using normal write csv didn't work
import glob
import cv2
from os import listdir
from os.path import isfile, join
import os
import numpy as np
from sklearn.utils.validation import check_array
import csv
import pandas as pd
def find_mode(np_array) :
vals,counts = np.unique(np_array, return_counts=True)
index = np.argmax(counts)
return(vals[index])
folder = ("C:/Users/ROG FLOW/Desktop/Untuk SIDANG TA/Sudah Aman/testbikincsv/folderdatacitra/*.jpg")
for file in glob.glob(folder):
image = cv2.imread(file)
rows = image.shape[0]
cols = image.shape[1]
middlex = cols/2
middley = rows/2
middle = [middlex,middley]
titikawalx = middlex - 10
titikawaly = middley - 10
titikakhirx = middlex + 10
titikakhiry = middley + 10
crop = image[int(titikawaly):int(titikakhiry), int(titikawalx):int(titikakhirx)]
c = cv2.cvtColor(crop, cv2.COLOR_BGR2HSV)
H,S,V = cv2.split(c)
Modus_citra = (find_mode(H))
data = (Modus_citra)
with open("foo.csv", 'w') as file:
writer = csv.writer(file)
writer.writerows(data)
error = 'numpy.uint8' object is not iterable
foo.csv = from pictures
60
40
19
11
please can someone help me ? i Appreciate every help
According to the edit, you can try:
folder = "C:/Users/ROG FLOW/Desktop/Untuk SIDANG TA/Sudah Aman/testbikincsv/folderdatacitra/*.jpg"
all_data = []
for file in glob.glob(folder):
# ...
Modus_citra = find_mode(H)
all_data.append(Modus_citra) # <-- add the numpy.uint8 to the all_data list
# after the loop write the data to the CSV file:
with open("foo.csv", "w") as file:
writer = csv.writer(file)
for data in all_data:
writer.writerow([data])
txt files generated from labelimg ,the txt files need to convert into one csv table with x_center, y_center , height ,and weight some txt files contains more than one line of flies
then the same image_id should have two set of rows each i was able to read only one line of txt file ,but unable to read more than one values in .txt file
import os
import glob
import pandas as pd
import numpy as np
os.chdir(r'D:\karami\Labeled\train1\labels')
myFiles = glob.glob('*.txt')
width=1024
height=1024
image_id=0
final_df=[]
for item in myFiles:
row=[]
bbox_temp=[]
with open(item, 'rt') as fd:
first_line = fd.readline()
splited = first_line.split();
row.append(image_id)
row.append(width)
row.append(height)
try:
bbox_temp.append(float(splited[1])*width)
bbox_temp.append(float(splited[2])*height)
bbox_temp.append(float(splited[3])*width)
bbox_temp.append(float(splited[4])*height)
row.append(bbox_temp)
final_df.append(row)
except:
print("file is not in YOLO format!")
df = pd.DataFrame(final_df,columns=['image_id', 'width', 'height','bbox'])
df.to_csv("saved.csv",index=False)
import os
import glob
import pandas as pd
import numpy as np
os.chdir(r'D:\karami\Labeled\train1\labels')
myFiles = glob.glob('*.txt')
width=1024
height=1024
image_id=0
final_df=[]
for item in myFiles:
row=[]
bbox_temp=[]
image_id+=1
with open(item, 'rt') as fd:
for line in fd.readlines():
splited = line.split():
row.append(image_id)
row.append(width)
row.append(height)
try:
bbox_temp.append(float(splited[1])*width)
bbox_temp.append(float(splited[2])*height)
bbox_temp.append(float(splited[3])*width)
bbox_temp.append(float(splited[4])*height)
row.append(bbox_temp)
final_df.append(row)
except:
print("file is not in YOLO format!")
df = pd.DataFrame(final_df,columns=['image_id', 'width', 'height','bbox'])
df.to_csv("saved.csv",index=False)
if you are trying to convert between formal data annotation type like COCO, PascalVOC, YOLO use python library such as imgann, to reduce code errors and time.
''' This file is used to convert annotations from .txt file to tenforflow csv formate
'''
import os
import os.path
import argparse
import pandas as pd
from PIL import Image
from xml.dom.minidom import Document
def write_to_csv(ann_path ,img_path ,dict):
annos = [] #Kindly adjust the indentations
# Read txts
for files in os.walk(ann_path): #Kindly adjust the indentations
for file in files[2]:
print (file + "-->start!")
# Read image and get its size attributes
img_name = os.path.splitext(file)[0] + '.jpg'
fileimgpath = os.path.join(img_path ,img_name)
im = Image.open(fileimgpath)
w = int(im.size[0])
h = int(im.size[1])
# Read txt file
filelabel = open(os.path.join(ann_path , file), "r")
lines = filelabel.read().split('\n')
obj = lines[:len(lines)-1]
# name = dict[obj[0]]
for i in range(0, int(len(obj))):
objbud=obj[i].split(' ')
name = dict[objbud[0]]
# print(name)
x1 = float(objbud[1])
y1 = float(objbud[2])
w1 = float(objbud[3])
h1 = float(objbud[4])
xmin = int((x1*w) - (w1*w)/2.0)
ymin = int((y1*h) - (h1*h)/2.0)
xmax = int((x1*w) + (w1*w)/2.0)
ymax = int((y1*h) + (h1*h)/2.0)
annos.append([img_name ,w ,h ,name ,xmin ,ymin ,xmax ,ymax])
column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax' ]
df = pd.DataFrame(annos, columns=column_name)
print(annos[:10])
return df
if __name__ == "__main__" :
# Argument Parser
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--input", required=True, help="txt path")
ap.add_argument("-img", "--image", required=True, help="images path")
ap.add_argument("-o", "--output", required=True, help="output csv path ")
args = vars(ap.parse_args())
# Define class number according to the classes in the .txt file
dict = {'0' : 'autorickshaw',
'1': "bus",
'2': "car",
'3': "motorcycle",
'4': "truck",
'5': "vehicle fallback",
}
# Assign paths
ann_path = args["input"]
img_path = args["image"]
csv_path = args["output"]
data=write_to_csv(ann_path ,img_path ,dict)
# print()
data.to_csv(csv_path, index=None)
print('Successfully converted xml to csv. And your output file is {}'.format(args["output"]))
# Command to run the code :
#python3 txt_to_csv.py -i path_of_Text_files_labels_directory -img data\images -o data\data.csv
#Output format will be
#filename ,height ,width ,class ,xmin ,ymin ,xmax ,ymax
I have about 300000 image files in a zip folder. Some of those files have path starting with '__'. PIL function Image.Open() is not able to open these files. Please suggest a way to open them. My code below:
import pandas as pd
import numpy as np
from zipfile import ZipFile
from io import BytesIO
from PIL import Image
from PIL import UnidentifiedImageError
problem_files = []
file_paths = []
img_list = []
img_size = (128,128)
with ZipFile('/XXX/YYY/ZZZ/AI_ML/Project2/words.zip') as myzip:
contents = myzip.namelist()
for i in range(0,len(contents)-1):
text = str(contents[i])
if '.png' in text:
file_paths.append(contents[i])
for path in file_paths:
img = myzip.read(path)
try:
img_data = Image.open(BytesIO(img))
except UnidentifiedImageError:
problem_files.append(path)
img_data = img_data.convert('L')
img_data = img_data.resize(img_size)
image_as_array = np.array(img_data, np.uint8)
image_as_array = np.reshape(image_as_array,(1,-1))
img_list.append(image_as_array)
This puts all the files with path starting with '__' into problem_files list
problem_files[-10:]
['__MACOSX/words/j04/j04-070/._j04-070-08-07.png',
'__MACOSX/words/j04/j04-070/._j04-070-04-07.png',
'__MACOSX/words/j04/j04-070/._j04-070-04-06.png',
'__MACOSX/words/j04/j04-070/._j04-070-08-06.png',
'__MACOSX/words/j04/j04-070/._j04-070-06-03.png',
'__MACOSX/words/j04/j04-070/._j04-070-06-01.png',
'__MACOSX/words/j04/j04-070/._j04-070-08-04.png',
'__MACOSX/words/j04/j04-070/._j04-070-04-04.png',
'__MACOSX/words/j04/j04-070/._j04-070-04-05.png',
'__MACOSX/words/j04/j04-070/._j04-070-08-05.png']
There are about 100000 images in problem_files list
I have written a script to find image size and aspect ratio of all images in a directory along with their corresponding filepaths, I want to print dict values to csv file with following headers width,height,aspect-ratio and filepath
import os
import json
from PIL import Image
folder_images = "/home/user/Desktop/images"
size_images = dict()
def yocd(a,b):
if(b==0):
return a
else:
return yocd(b,a%b)
for dirpath, _, filenames in os.walk(folder_images):
for path_image in filenames:
if path_image.endswith(".png") or path_image.endswith('.jpg') or path_image.endswith('.JPG') or path_image.endswith('.jpeg'):
image = os.path.abspath(os.path.join(dirpath, path_image))
""" ImageFile.LOAD_TRUNCATED_IMAGES = True """
try:
with Image.open(image) as img:
img.LOAD_TRUNCATED_IMAGES = True
img.verify()
print('Valid image')
except Exception:
print('Invalid image')
img = False
if img is not False:
width, heigth = img.size
divisor = yocd(width, heigth)
w = str(int(width / divisor))
h = str(int(heigth / divisor))
aspectratio = w+':'+h
size_images[image] = {'width': width, 'heigth': heigth,'aspect-ratio':aspectratio,'filepath': image}
for k, v in size_images.items():
print(k, '-->', v)
with open('/home/user/Documents/imagesize.txt', 'w') as file:
file.write(json.dumps(size_images))```
You can add a (properly constructed) dict directly to a pandas.DataFrame. Then, DataFrames have a .to_csv() function.
Here are the docs:
Pandas: Create a DataFrame
Pandas: Write to CSV
Without dependencies (but you may have to tweak the formatting)
csv_sep = ';' # choose here wich field separatar you want
with open('your_csv', 'w') as f:
# header
f.write("width"+csv_sep"+height"+csv_sep"+aspect-ratio"+csv_sep+"filepath\n")
# data
for img in size_images:
fields = [img['width'], img['height'], img['aspect-ratio'], img['filepath']]
f.write(csv_sep.join(fields)+'\n')
I have converted my image into a csv file and it's like a matrix but I want it to be a single row.
How can I convert all of the images in dataset into a csv file (each image into one line).
Here's the code I've used:
from PIL import Image
import numpy as np
import os, os.path, time
format='.jpg'
myDir = "Lotus1"
def createFileList(myDir, format='.jpg'):
fileList = []
print(myDir)
for root, dirs, files in os.walk(myDir, topdown=False):
for name in files:
if name.endswith(format):
fullName = os.path.join(root, name)
fileList.append(fullName)
return fileList
fileList = createFileList(myDir)
fileFormat='.jpg'
for fileFormat in fileList:
format = '.jpg'
# get original image parameters...
width, height = fileList.size
format = fileList.format
mode = fileList.mode
# Make image Greyscale
img_grey = fileList.convert('L')
# Save Greyscale values
value = np.asarray(fileList.getdata(),dtype=np.float64).reshape((fileList.size[1],fileList.size[0]))
np.savetxt("img_pixels.csv", value, delimiter=',')
input :
http://uupload.ir/files/pto0_lotus1_1.jpg
output:http://uupload.ir/files/huwh_output.png
From your question, I think you want to know about numpy.flatten(). You want to add
value = value.flatten()
right before your np.savetxt call. It will flatten the array to only one dimension and it should then print out as a single line.
The rest of your question is unclear bit it implies you have a directory full of jpeg images and you want a way to read through them all. So first, get a file list:
def createFileList(myDir, format='.jpg'):
fileList = []
print(myDir)
for root, dirs, files in os.walk(myDir, topdown=False):
for name in files:
if name.endswith(format):
fullName = os.path.join(root, name)
fileList.append(fullName)
return fileList
The surround your code with a for fileName in fileList:
Edited to add complete example
Note that I've used csv writer and changed your float64 to ints (which should be ok as pixel data is 0-255
from PIL import Image
import numpy as np
import sys
import os
import csv
#Useful function
def createFileList(myDir, format='.jpg'):
fileList = []
print(myDir)
for root, dirs, files in os.walk(myDir, topdown=False):
for name in files:
if name.endswith(format):
fullName = os.path.join(root, name)
fileList.append(fullName)
return fileList
# load the original image
myFileList = createFileList('path/to/directory/')
for file in myFileList:
print(file)
img_file = Image.open(file)
# img_file.show()
# get original image parameters...
width, height = img_file.size
format = img_file.format
mode = img_file.mode
# Make image Greyscale
img_grey = img_file.convert('L')
#img_grey.save('result.png')
#img_grey.show()
# Save Greyscale values
value = np.asarray(img_grey.getdata(), dtype=np.int).reshape((img_grey.size[1], img_grey.size[0]))
value = value.flatten()
print(value)
with open("img_pixels.csv", 'a') as f:
writer = csv.writer(f)
writer.writerow(value)
How about you convert your images to 2D numpy arrays and then write them as txt files with .csv extensions and , as delimiters?
Maybe you could use a code like following:
np.savetxt('np.csv', image, delimiter=',')
import numpy as np
import cv2
import os
IMG_DIR = '/home/kushal/Documents/opencv_tutorials/image_reading/dataset'
for img in os.listdir(IMG_DIR):
img_array = cv2.imread(os.path.join(IMG_DIR,img), cv2.IMREAD_GRAYSCALE)
img_array = (img_array.flatten())
img_array = img_array.reshape(-1, 1).T
print(img_array)
with open('output.csv', 'ab') as f:
np.savetxt(f, img_array, delimiter=",")
import os
import pandas as pd
path = 'path-to-the-folder'
os.chdir(path)
lists = os.listdir(path)
labels = []
file_lst = []
for folder in lists:
files = os.listdir(path +"/"+folder)
for file in files:
path_file = path + "/" + folder + "/" + file
file_lst.append(path_file)
labels.append(folder)
dictP_n = {"path": file_lst,
"label_name": labels,
"label": labels}
data = pd.DataFrame(dictP_n, index = None)
data = data.sample(frac=1)
data['label'] = data['label'].replace({"class1": 0, "class2": 1 })
data.to_csv("path-to-save-location//file_name.csv", index =None)