txt files generated from labelimg ,the txt files need to convert into one csv table with x_center, y_center , height ,and weight some txt files contains more than one line of flies
then the same image_id should have two set of rows each i was able to read only one line of txt file ,but unable to read more than one values in .txt file
import os
import glob
import pandas as pd
import numpy as np
os.chdir(r'D:\karami\Labeled\train1\labels')
myFiles = glob.glob('*.txt')
width=1024
height=1024
image_id=0
final_df=[]
for item in myFiles:
row=[]
bbox_temp=[]
with open(item, 'rt') as fd:
first_line = fd.readline()
splited = first_line.split();
row.append(image_id)
row.append(width)
row.append(height)
try:
bbox_temp.append(float(splited[1])*width)
bbox_temp.append(float(splited[2])*height)
bbox_temp.append(float(splited[3])*width)
bbox_temp.append(float(splited[4])*height)
row.append(bbox_temp)
final_df.append(row)
except:
print("file is not in YOLO format!")
df = pd.DataFrame(final_df,columns=['image_id', 'width', 'height','bbox'])
df.to_csv("saved.csv",index=False)
import os
import glob
import pandas as pd
import numpy as np
os.chdir(r'D:\karami\Labeled\train1\labels')
myFiles = glob.glob('*.txt')
width=1024
height=1024
image_id=0
final_df=[]
for item in myFiles:
row=[]
bbox_temp=[]
image_id+=1
with open(item, 'rt') as fd:
for line in fd.readlines():
splited = line.split():
row.append(image_id)
row.append(width)
row.append(height)
try:
bbox_temp.append(float(splited[1])*width)
bbox_temp.append(float(splited[2])*height)
bbox_temp.append(float(splited[3])*width)
bbox_temp.append(float(splited[4])*height)
row.append(bbox_temp)
final_df.append(row)
except:
print("file is not in YOLO format!")
df = pd.DataFrame(final_df,columns=['image_id', 'width', 'height','bbox'])
df.to_csv("saved.csv",index=False)
if you are trying to convert between formal data annotation type like COCO, PascalVOC, YOLO use python library such as imgann, to reduce code errors and time.
''' This file is used to convert annotations from .txt file to tenforflow csv formate
'''
import os
import os.path
import argparse
import pandas as pd
from PIL import Image
from xml.dom.minidom import Document
def write_to_csv(ann_path ,img_path ,dict):
annos = [] #Kindly adjust the indentations
# Read txts
for files in os.walk(ann_path): #Kindly adjust the indentations
for file in files[2]:
print (file + "-->start!")
# Read image and get its size attributes
img_name = os.path.splitext(file)[0] + '.jpg'
fileimgpath = os.path.join(img_path ,img_name)
im = Image.open(fileimgpath)
w = int(im.size[0])
h = int(im.size[1])
# Read txt file
filelabel = open(os.path.join(ann_path , file), "r")
lines = filelabel.read().split('\n')
obj = lines[:len(lines)-1]
# name = dict[obj[0]]
for i in range(0, int(len(obj))):
objbud=obj[i].split(' ')
name = dict[objbud[0]]
# print(name)
x1 = float(objbud[1])
y1 = float(objbud[2])
w1 = float(objbud[3])
h1 = float(objbud[4])
xmin = int((x1*w) - (w1*w)/2.0)
ymin = int((y1*h) - (h1*h)/2.0)
xmax = int((x1*w) + (w1*w)/2.0)
ymax = int((y1*h) + (h1*h)/2.0)
annos.append([img_name ,w ,h ,name ,xmin ,ymin ,xmax ,ymax])
column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax' ]
df = pd.DataFrame(annos, columns=column_name)
print(annos[:10])
return df
if __name__ == "__main__" :
# Argument Parser
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--input", required=True, help="txt path")
ap.add_argument("-img", "--image", required=True, help="images path")
ap.add_argument("-o", "--output", required=True, help="output csv path ")
args = vars(ap.parse_args())
# Define class number according to the classes in the .txt file
dict = {'0' : 'autorickshaw',
'1': "bus",
'2': "car",
'3': "motorcycle",
'4': "truck",
'5': "vehicle fallback",
}
# Assign paths
ann_path = args["input"]
img_path = args["image"]
csv_path = args["output"]
data=write_to_csv(ann_path ,img_path ,dict)
# print()
data.to_csv(csv_path, index=None)
print('Successfully converted xml to csv. And your output file is {}'.format(args["output"]))
# Command to run the code :
#python3 txt_to_csv.py -i path_of_Text_files_labels_directory -img data\images -o data\data.csv
#Output format will be
#filename ,height ,width ,class ,xmin ,ymin ,xmax ,ymax
Related
This question already has answers here:
How to get image size (bytes) using PIL
(5 answers)
Closed 8 months ago.
I am trying to get the image name, resolution, and size of the image from the directory. I got image name, resolution and size but image size is not in kilobytes it is getting like pixels. so please suggest me how to get the image size with the python script.
# Required Libraries
from os import listdir
from os.path import isfile, join
from pathlib import Path
import numpy
import cv2
import argparse
import numpy
import csv
from PIL import Image
# Check whether the CSV
# exists or not if not then create one.
my_file = Path("csv/details.csv")
if my_file.is_file():
f = open(my_file, "w+")
with open('csv/details.csv', 'a', newline='') as file:
writer = csv.writer(file)
writer.writerow(["S.No.", "Name", "Resolution", "Size"
])
f.close()
pass
else:
with open('csv/details.csv', 'w', newline = '') as file:
writer = csv.writer(file)
writer.writerow(["S.No.", "Name", "Resolution", "Size"
])
# Argparse function to get
# the path of the image directory
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image",
required = True,
help = "Path to folder")
args = vars(ap.parse_args())
# Program to find the
# colors and embed in the CSV
mypath = args["image"]
onlyfiles = [ f for f in listdir(mypath) if isfile(join(mypath,f)) ]
images = numpy.empty(len(onlyfiles), dtype = object)
for n in range(0, len(onlyfiles)):
path = join(mypath,onlyfiles[n])
images[n] = cv2.imread(join(mypath,onlyfiles[n]),
cv2.IMREAD_UNCHANGED)
img = cv2.imread(path)
h,w,c = img.shape
resolution = f"{h} X {w}"
size = img.size
print(h, w)
avg_color_per_row = numpy.average(img, axis = 0)
avg_color = numpy.average(avg_color_per_row, axis = 0)
with open('csv/details.csv', 'a', newline = '') as file:
writer = csv.writer(file)
writer.writerow([n+1, onlyfiles[n], resolution, size
])
file.close()
and also please find below is the screenshot for your reference.
Hi you can use this inside your for loop
import os
file_size = os.path.getsize('d:/file.jpg')
print("File Size is :", file_size, "bytes")
Here's my code:
import glob
import itertools
import sys, os
import six
import csv
import numpy as np
from pdfminer.pdfparser import PDFParser
from pdfminer.pdfdocument import PDFDocument
from pdfminer.pdftypes import resolve1
os.chdir("PATH/pdf")
extension = 'pdf'
all_filenames = [i for i in glob.glob('*.{}'.format(extension))]
valeur = []
n = 1
for i in all_filenames:
fp = open(i, "rb")
parser = PDFParser(fp)
doc = PDFDocument(parser)
fields = resolve1(doc.catalog["AcroForm"])["Fields"]
for i in fields:
field = resolve1(i)
name, value = field.get("T"), field.get("V")
filehehe = "{0}:{1}".format(name,value)
values = resolve1(value)
names = resolve1(name)
valeur.append(values)
n = n+1
with open('test.csv','wb') as f:
for i in valeur:
f.write(i)
The goal here is to pick up some informations in PDF. Here's the output :
As you can see, the format is not pretty. I'm not very familiar with open() so I'm kind of stuck.
I would like to have distinct rows for each PDF with each informations having her own cell. Something like that :
Try to store the data from each pdf file in a separate list. And add this list to the valeur list which you have.
Use csv module as #martineau rightly suggested.
You can try the with below code.
import csv
valeur = []
#your code
n = 1
for i in all_filenames:
temp_list = []
fp = open(i, "rb")
parser = PDFParser(fp)
doc = PDFDocument(parser)
fields = resolve1(doc.catalog["AcroForm"])["Fields"]
for i in fields:
field = resolve1(i)
name, value = field.get("T"), field.get("V")
filehehe = "{0}:{1}".format(name,value)
values = resolve1(value)
names = resolve1(name)
temp_list.append(values)
n = n+1
valeur.append(temp_list)
#Finally when you have the required data, you can write to csv file like this.
with open('mycsv.csv', 'w', newline='') as myfile:
wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
for val in valeur:
wr.writerow(val)
With this, the output would be like this
I have used tensorflow object detection-api to train my own object detector.But at that time, the images were annotated using labelimg which create xml file for each image.Now I have got labeled images which have json file for each image.So how I use these json files to create tfrecords.
First I created csv files by using my own script.
import os
import glob
import pandas as pd
import json
import pickle
def json_to_csv():
path_to_json = 'images/train/'
json_files = [pos_json for pos_json in os.listdir(path_to_json) if pos_json.endswith('.json')]
path_to_jpeg = 'images/train/'
jpeg_files = [pos_jpeg for pos_jpeg in os.listdir(path_to_jpeg) if pos_jpeg.endswith('.jpeg')]
fjpeg=(list(reversed(jpeg_files)))
n=0
csv_list = []
labels=[]
for j in json_files:
data_file=open('images/train/{}'.format(j))
data = json.load(data_file)
width,height=data['display_width'],data['display_height']
for item in data["items"]:
box = item['bounding_box']
if item['upc']!='None':
name=item['upc']
labels.append(name)
xmin=box['left']
ymin=box['top']
xmax=box['right']
ymax=box['bottom']
value = (fjpeg[n],
width,
height,
name,
xmin,
ymin,
xmax,
ymax
)
csv_list.append(value)
n=n+1
column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']
csv_df = pd.DataFrame(csv_list, columns=column_name)
labels_train=list(set(labels))
with open("train_labels.txt", "wb") as fp: #Pickling
pickle.dump(labels_train, fp)
return csv_df
def main():
for directory in ['train']:
csv_df = json_to_csv()
csv_df.to_csv('data/{}_labels.csv'.format(directory), index=None)
print('Successfully converted json to csv.')
main()
Then I use this script to create tfrecords.
We have some documentation on the subject.
Note that labelimg should produce outputs similar to the PASCAL VOC datasets we use, so those scripts may also be of use.
I have converted my image into a csv file and it's like a matrix but I want it to be a single row.
How can I convert all of the images in dataset into a csv file (each image into one line).
Here's the code I've used:
from PIL import Image
import numpy as np
import os, os.path, time
format='.jpg'
myDir = "Lotus1"
def createFileList(myDir, format='.jpg'):
fileList = []
print(myDir)
for root, dirs, files in os.walk(myDir, topdown=False):
for name in files:
if name.endswith(format):
fullName = os.path.join(root, name)
fileList.append(fullName)
return fileList
fileList = createFileList(myDir)
fileFormat='.jpg'
for fileFormat in fileList:
format = '.jpg'
# get original image parameters...
width, height = fileList.size
format = fileList.format
mode = fileList.mode
# Make image Greyscale
img_grey = fileList.convert('L')
# Save Greyscale values
value = np.asarray(fileList.getdata(),dtype=np.float64).reshape((fileList.size[1],fileList.size[0]))
np.savetxt("img_pixels.csv", value, delimiter=',')
input :
http://uupload.ir/files/pto0_lotus1_1.jpg
output:http://uupload.ir/files/huwh_output.png
From your question, I think you want to know about numpy.flatten(). You want to add
value = value.flatten()
right before your np.savetxt call. It will flatten the array to only one dimension and it should then print out as a single line.
The rest of your question is unclear bit it implies you have a directory full of jpeg images and you want a way to read through them all. So first, get a file list:
def createFileList(myDir, format='.jpg'):
fileList = []
print(myDir)
for root, dirs, files in os.walk(myDir, topdown=False):
for name in files:
if name.endswith(format):
fullName = os.path.join(root, name)
fileList.append(fullName)
return fileList
The surround your code with a for fileName in fileList:
Edited to add complete example
Note that I've used csv writer and changed your float64 to ints (which should be ok as pixel data is 0-255
from PIL import Image
import numpy as np
import sys
import os
import csv
#Useful function
def createFileList(myDir, format='.jpg'):
fileList = []
print(myDir)
for root, dirs, files in os.walk(myDir, topdown=False):
for name in files:
if name.endswith(format):
fullName = os.path.join(root, name)
fileList.append(fullName)
return fileList
# load the original image
myFileList = createFileList('path/to/directory/')
for file in myFileList:
print(file)
img_file = Image.open(file)
# img_file.show()
# get original image parameters...
width, height = img_file.size
format = img_file.format
mode = img_file.mode
# Make image Greyscale
img_grey = img_file.convert('L')
#img_grey.save('result.png')
#img_grey.show()
# Save Greyscale values
value = np.asarray(img_grey.getdata(), dtype=np.int).reshape((img_grey.size[1], img_grey.size[0]))
value = value.flatten()
print(value)
with open("img_pixels.csv", 'a') as f:
writer = csv.writer(f)
writer.writerow(value)
How about you convert your images to 2D numpy arrays and then write them as txt files with .csv extensions and , as delimiters?
Maybe you could use a code like following:
np.savetxt('np.csv', image, delimiter=',')
import numpy as np
import cv2
import os
IMG_DIR = '/home/kushal/Documents/opencv_tutorials/image_reading/dataset'
for img in os.listdir(IMG_DIR):
img_array = cv2.imread(os.path.join(IMG_DIR,img), cv2.IMREAD_GRAYSCALE)
img_array = (img_array.flatten())
img_array = img_array.reshape(-1, 1).T
print(img_array)
with open('output.csv', 'ab') as f:
np.savetxt(f, img_array, delimiter=",")
import os
import pandas as pd
path = 'path-to-the-folder'
os.chdir(path)
lists = os.listdir(path)
labels = []
file_lst = []
for folder in lists:
files = os.listdir(path +"/"+folder)
for file in files:
path_file = path + "/" + folder + "/" + file
file_lst.append(path_file)
labels.append(folder)
dictP_n = {"path": file_lst,
"label_name": labels,
"label": labels}
data = pd.DataFrame(dictP_n, index = None)
data = data.sample(frac=1)
data['label'] = data['label'].replace({"class1": 0, "class2": 1 })
data.to_csv("path-to-save-location//file_name.csv", index =None)
I am new to Python and am starting some online courses. I am trying to convert some data from a paragraph format to CSV format (shown below.) I am able to import a text file containing the paragraph format and export that to CSV but each line in the paragraph format comes in as a single line when imported into a spreadsheet.
import csv
import glob
import os
directory = raw_input("INPUT Folder:")
output = raw_input("OUTPUT Folder:")
txt_files = os.path.join(directory, '*.txt')
for txt_file in glob.glob(txt_files):
with open(txt_file, "rb") as input_file:
in_txt = csv.reader(input_file, delimiter='=')
filename = os.path.splitext(os.path.basename(txt_file))[0] + '.csv'
with open(os.path.join(output, filename), 'wb') as output_file:
out_csv = csv.writer(output_file)
out_csv.writerows(in_txt)
I do not know how to parse the data to separate the labels and spaces from the numeric values and combine each paragraph section into a single line with quotes and commas for the CSV file. Any help would be greatly appreciated!
Paragraph format:
12-03-06 15:19:36
FLOW: 1.17365 g/m
POS: +9273x1Gal
12-03-06 15:19:37
FLOW: 1.17849 g/m
POS: +9283x1Gal
12-03-06 15:19:38
FLOW: 1.19849 g/m
POS: +9293x1Gal
(repeats)
Desired CSV output (note, I had to add a single quote before the + to allow proper import as text into a spreadsheet, otherwise it comes in as a 0)
"12-03-06 15:19:36","FLOW:","1.17365","g/m","POS:","'+","9273","x1","Gal"
"12-03-06 15:19:37","FLOW:","1.17849","g/m","POS:","'+","9283","x1","Gal"
"12-03-06 15:19:38","FLOW:","1.19849","g/m","POS:","'+","9293","x1","Gal"
I suggest using a collections.deque to work on three lines at a time, and re.match to parse out the items you want:
# -*- coding: utf-8 -*-
from collections import deque
import csv
from functools import partial
import glob
import os
import re
import sys
if sys.hexversion < 0x3000000:
# Python 2.x
inp = raw_input
open_csv_write = partial(open, mode="wb")
else:
# Python 3.x
inp = input
open_csv_write = partial(open, mode="w", newline="")
POS_REG = re.compile("(POS:) ([+-])(\d+(?:\.\d+)?)(x\d+)(\w+)", re.I)
def change_ext(fn, new_ext):
"""
Given `fn` as "path\filename.old_ext",
return "path\filename" + new_ext
"""
return os.path.splitext(fn)[0] + new_ext
def get_pos(line, reg=POS_REG):
"""
Given a string like "POS: +92.73x1Gal",
return ['POS:', '+', '92.73', 'x1', 'Gal']
"""
match = reg.match(line)
return list(match.groups()) if match else []
def process(inf, outcsv):
# line queue
q = deque(maxlen=3)
# preload two lines
q.append(next(inf, '').rstrip())
q.append(next(inf, '').rstrip())
# process rest of lines
for line in inf:
q.append(line.rstrip())
if q[1].startswith('FLOW:'):
pos = get_pos(line)
if pos:
row = [q[0]] + q[1].split() + pos
outcsv.writerow(row)
def main():
# get directories
in_dir = inp("Input directory: ")
out_dir = inp("Output directory: ")
# process file names
in_filespec = os.path.join(in_dir, '*.txt')
in_full_names = glob.glob(in_filespec)
in_names = [os.path.basename(fn) for fn in in_full_names]
out_names = [change_ext(fn, ".csv") for fn in in_names]
out_full_names = [os.path.join(out_dir, fn) for fn in out_names]
# operate on files
for in_name, out_name in zip(in_full_names, out_full_names):
with open(in_name) as inf, open_csv_write(out_name) as outf:
outcsv = csv.writer(outf)
process(inf, outcsv)
if __name__ == "__main__":
main()