Hexadecimal to Image Conversion - python

I am converting the hexadecimal files to images. The input files are converted to byte string using binascii library. The problem arises when the byte string is written to form an image. The output of all the hexadecimal files is same. I will be grateful if someone provides me a solution.
Here is my code:
import binascii
import cv2
import os
from tkinter import *
from tkinter import filedialog
#Hide the root window that comes by default
root=Tk()
root.withdraw()
#Browse and select txt files
dir=[]
dir=filedialog.askopenfilenames(
initialdir="C:\Binaries\Hexadecimal_Text_Files",
title="Open Text file",
filetypes=(("Text Files", "*.txt"),)
)
#Reading data in txt files and decoding hexadecimal characters
for x in dir:
tf=open(x)#Open file
data=tf.read()#Read data in file
data=data.replace(' ','')#Remove whitespaces
data=data.replace('\n','')#Remove breaks in lines
data=binascii.a2b_hex(data)
tf.close()
#Extract txt filename without extension
pathname, extension = os.path.splitext(f"{x}")#Split path into filename and extenion
filename = pathname.split('/')#Get filename without txt extension
filepath=f"C:\Binaries\Images\{filename[-1]}.png"#Defining name of image file same as txt file
#Write data into image
with open(filepath, 'wb') as image_file:
img=image_file.write(data)
#Resizing Image
img=cv2.resize(img,(500,500))
cv2.imwrite(filepath,img)
Output:

I made my own version because I could not get yours to work, but if you want to make yours work, at least one problem with I found is with this line:
img=cv2.resize(img,(500,500))
by printing all the variables after the supposed "conversion", I found that your variable img in the previous line is not an image but the result of image_file.write(data) which returns the number of bytes written to the file and not the image itself, which is probably why it always prints the same image.
Here is my version
root=Tk()
root.withdraw()
file_path = filedialog.askopenfilename(
initialdir = "C:\Binaries\Images",
title = "Select Hexadecimal Text File",
filetypes = (("Text Files", "*.txt"),)
)
with open(file_path, "r") as hex_file:
hex_data = hex_file.read().replace("\n", "")
#replaces white spaces and new lines from file
binary_data = binascii.a2b_hex(hex_data)
#converts the hexadecimal data to binary
pathname, extension = os.path.splitext(file_path)
image_path = pathname + ".png"
#image path and format
with open(image_path, "wb") as image_file:
image_file.write(binary_data)
#writing the binary data to image file
img = cv2.imread(image_path, cv2.IMREAD_UNCHANGED)
#if txt file is empty
if img is None:
print("Error: Image not loaded!")
else:
cv2.imshow("image", img)
#waits for key input and closes when pressing any key
cv2.waitKey(0)
cv2.destroyAllWindows()

I have converted the hexadecimal files into images by using numpy array and Pillow. Now I am getting different images.
import numpy as np
import binascii
import os
from PIL import Image as im
from tkinter import *
from tkinter import filedialog
# Hide the root window that comes by default
root = Tk()
root.withdraw()
# Browse and select txt files
dir = []
dir = filedialog.askopenfilenames(
initialdir="C:\Binaries\Folder_3",
title="Open Text file",
filetypes=(("Text Files", "*.txt"),)
)
# Reading data in txt files and decoding hexadecimal characters
for temp in dir:
tf = open(temp) # Open file
data = tf.read() # Read data in file
data= data.replace('\'','') #Remove label
data = data.replace(' ', '') # Remove whitespaces
data = data.replace('\n', '') # Remove breaks in lines
data = binascii.a2b_hex(data)
tf.close()
#Converting bytes array to numpy array
a = np.frombuffer(data, dtype='uint8')
#print(a) //Display array
#Finding optimal factor pair for size of image
x = len(a)
val1=0
val2=0
for i in range(1, int(pow(x, 1 / 2))+1):
if x % i == 0:
val1=i
val2=int(x / i)
#Converting 1-D to 2-D numpy array
a = np.reshape(a, (val1, val2))
#print(a) #Display 2-D array
#Writing array to image
data = im.fromarray(a)
# Split path into filename and extenion
pathname, extension = os.path.splitext(f"{temp}")
filename = pathname.split('/') # Get filename without txt extension
# Defining name of image file same as txt file
filepath = f"C:\Binaries\Images_3\{filename[-1]}.png"
#Resize image
data=data.resize((500,500))
#Saving image into path
data.save(filepath)

Related

Text Extraction from a folder having multiple images to CSV

I have a file having multiple images and I want to extract the text from them and then to store it in CSV . I tried but not getting satisfied result.
this is my code
enter image description here
from PIL import Image
from pytesseract import pytesseract
import os
#Define path to tessaract.exe
path_to_tesseract = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
#Define path to image
path_to_images = r'all_img_iteration_demo/'
#Point tessaract_cmd to tessaract.exe
pytesseract.tesseract_cmd = path_to_tesseract
#Get the file names in the directory
for root, dirs, file_names in os.walk(path_to_images):
#Iterate over each file_name in the folder
for file_name in file_names:
#Open image with PIL
img = Image.open(path_to_images + file_name)
#Extract text from image
text = pytesseract.image_to_string(img)
list_1 = text.split()
print(list_1)
jdemo = "datarecords.csv"
with open(jdemo, 'w') as csvfile:
csvwriter = csv.writer(csvfile)
csvwriter.writerow(list_1)
this is the output
enter image description here
what i am getting right now is is this..
i tried to store the extracted text to csv but only last value i am getting in the file.
enter image description here
i want output of each extracted text into new row of csv file . and then iterate over next image and so on..

How to continuously update video from images that keep coming in python?

I am working on a robotics project in which a robot creates images in .pgm format (not camera images) continuously which I'm converting to .jpeg and saving to a directory. However, this directory keeps updating as more images keep getting added while I want to convert these images continuously in a video. I can already create a video from the images saved in one instance but what I want is to update the video continuously as a single file (streaming onto a URL) while images keep getting saved. Here is my code but it doesn't work in parallel for images as well as for videos. Anyone can help?
from PIL import Image
import os, glob
import cv2
import numpy as np
from os.path import isfile, join
# remove .yaml files
directory = "/home/user/catkin_ws/src/ros_map/map_pgms"
files_in_directory = os.listdir(directory)
filtered_files = [file for file in files_in_directory if file.endswith(".yaml")]
for file in filtered_files:
path_to_file = os.path.join(directory, file)
os.remove(path_to_file)
def batch_image(in_dir, out_dir):
if not os.path.exists(out_dir):
print(out_dir, 'is not existed.')
os.mkdir(out_dir)
if not os.path.exists(in_dir):
print(in_dir, 'is not existed.')
return -1
count = 0
for files in glob.glob(in_dir + '/*'):
filepath, filename = os.path.split(files)
out_file = filename[0:9] + '.jpg'
# print(filepath,',',filename, ',', out_file)
im = Image.open(files)
new_path = os.path.join(out_dir, out_file)
print(count, ',', new_path)
count = count + 1
im.save(os.path.join(out_dir, out_file))
if __name__ == '__main__':
batch_image('/home/user/catkin_ws/src/ros_map/map_pgms', './batch')
# convert the .jpgs to video
img_array = []
for filename in glob.glob('./batch/*.jpg'):
img = cv2.imread(filename)
height, width, layers = img.shape
size = (width, height)
img_array.append(img)
out = cv2.VideoWriter('project.avi', cv2.VideoWriter_fourcc(*'DIVX'), 15, size)
for i in range(len(img_array)):
out.write(img_array[i])
out.release()

resizing and storing image folder with filesystem using python

I want to resize images in the folder and store them into database with filesystem using postgres as database and jupyter notebook. I am using resize function for resizing my images and then saving then into database but resize function seems not working and unable to understand my mistake.
subject= input("Enter Subject Name:")
cursor.execute("DROP TABLE IF EXISTS %s"%(subject))
cursor.execute( """CREATE TABLE %s (ID SERIAL PRIMARY KEY, PHOTO BYTEA NOT NULL)"""%(subject))
conn.commit()
userfilepath=input("enter file path:")
dirs = os.listdir( userfilepath )
def resize():
for item in dirs:
if os.path.isfile(userfilepath+item):
im = Image.open(userfilepath+item)
f, e = os.userfilepath.splitext(userfilepath+item)
imResize = im.resize((200,200), Image.ANTIALIAS)
imResize.save(f + ' resized.jpg', 'JPEG', quality=90)
import cv2
import os, sys
from PIL import Image
import io
import glob
img_dir = userfilepath # Enter Directory of all images
data_path = os.path.join(img_dir,'*g')
files = glob.glob(data_path)
data = []
for f1 in files:
# img = cv2.imread(f1)
# data.append(img)
with open(f1,"rb") as file:
resize()
BinaryData=file.read()
cursor.execute("INSERT INTO {tab} (photo)
VALUES({})".format(psycopg2.Binary(BinaryData, ) , tab=subject ) )
conn.commit()
#Insert_blob(img_dir)
Feel free to change the variables at the top since you seem to want to get them from user input. I've chosen to hardcode them for the purpose of this example.
Have a look at the code below
import os
userfilepath = "files"
dirs = os.listdir("files")
def do_open(_file):
print(f"Opening {_file}")
def do_resize(_file):
print(f"resizing: {_file}")
def resize():
for item in dirs:
f = userfilepath + item
print(f)
if os.path.isfile(f):
im = do_open(userfilepath + item)
resize()
# output
# filesf1.txt
# filesf2.txt
def resize_well():
for item in dirs:
f = os.path.join(userfilepath, item)
print(f)
if os.path.isfile(f):
im = do_open(f)
resize_well()
# output
# files\f1.txt
# Opening files\f1.txt
# files\f2.txt
# Opening files\f2.txt
In resize_well() os.path.join() creates a proper path, where as using string concatenation misses out the / or \ delimiter on linux and windows respectively.
Your code isn't passing the if statement because userfilepath+item doesn't exist, but probably userfilepath/item does.

Cache error while doing OCR on a directory of pdf's in python

I am trying to OCR an entire directory of pdf files using pytesseract and imagemagick but the issue is that imagemagick is consuming all my Temp folder space and finally I'm getting a cache error i.e "CacheError: unable to extend cache 'C:/Users/Azu/AppData/Local/Temp/magick-18244WfgPyAToCsau11': No space left on device # error/cache.c/OpenPixelCache/3883" I have also written a code to delete the temp folder content once OCR'd but still facing the same issue.
Here's the code till now:
import io
import os
import glob
from PIL import Image
import pytesseract
from wand.image import Image as wi
files = glob.glob(r"D:\files\**")
tempdir = r"C:\Users\Azu\AppData\Local\Temp"
filesall = os.listdir(tempdir)
for file in files:
name = os.path.basename(file).split('.')[0]
#print(file)
pdf = wi(filename = file, resolution = 300)
pdfImg = pdf.convert('jpeg')
imgBlobs = []
for img in pdfImg.sequence:
page = wi(image = img)
imgBlobs.append(page.make_blob('jpeg'))
extracted_texts = []
for imgBlob in imgBlobs:
im = Image.open(io.BytesIO(imgBlob))
text = pytesseract.image_to_string(im, lang = 'eng')
extracted_texts.append(text)
with open("D:\\extracted_text\\"+ name + ".txt", 'w') as f:
f.write(str(extracted_texts))
for ifile in filesall:
if "magick" in ifile:
os.remove(os.path.join(tempdir,ifile))

Converting images to csv file in python

I have converted my image into a csv file and it's like a matrix but I want it to be a single row.
How can I convert all of the images in dataset into a csv file (each image into one line).
Here's the code I've used:
from PIL import Image
import numpy as np
import os, os.path, time
format='.jpg'
myDir = "Lotus1"
def createFileList(myDir, format='.jpg'):
fileList = []
print(myDir)
for root, dirs, files in os.walk(myDir, topdown=False):
for name in files:
if name.endswith(format):
fullName = os.path.join(root, name)
fileList.append(fullName)
return fileList
fileList = createFileList(myDir)
fileFormat='.jpg'
for fileFormat in fileList:
format = '.jpg'
# get original image parameters...
width, height = fileList.size
format = fileList.format
mode = fileList.mode
# Make image Greyscale
img_grey = fileList.convert('L')
# Save Greyscale values
value = np.asarray(fileList.getdata(),dtype=np.float64).reshape((fileList.size[1],fileList.size[0]))
np.savetxt("img_pixels.csv", value, delimiter=',')
input :
http://uupload.ir/files/pto0_lotus1_1.jpg
output:http://uupload.ir/files/huwh_output.png
From your question, I think you want to know about numpy.flatten(). You want to add
value = value.flatten()
right before your np.savetxt call. It will flatten the array to only one dimension and it should then print out as a single line.
The rest of your question is unclear bit it implies you have a directory full of jpeg images and you want a way to read through them all. So first, get a file list:
def createFileList(myDir, format='.jpg'):
fileList = []
print(myDir)
for root, dirs, files in os.walk(myDir, topdown=False):
for name in files:
if name.endswith(format):
fullName = os.path.join(root, name)
fileList.append(fullName)
return fileList
The surround your code with a for fileName in fileList:
Edited to add complete example
Note that I've used csv writer and changed your float64 to ints (which should be ok as pixel data is 0-255
from PIL import Image
import numpy as np
import sys
import os
import csv
#Useful function
def createFileList(myDir, format='.jpg'):
fileList = []
print(myDir)
for root, dirs, files in os.walk(myDir, topdown=False):
for name in files:
if name.endswith(format):
fullName = os.path.join(root, name)
fileList.append(fullName)
return fileList
# load the original image
myFileList = createFileList('path/to/directory/')
for file in myFileList:
print(file)
img_file = Image.open(file)
# img_file.show()
# get original image parameters...
width, height = img_file.size
format = img_file.format
mode = img_file.mode
# Make image Greyscale
img_grey = img_file.convert('L')
#img_grey.save('result.png')
#img_grey.show()
# Save Greyscale values
value = np.asarray(img_grey.getdata(), dtype=np.int).reshape((img_grey.size[1], img_grey.size[0]))
value = value.flatten()
print(value)
with open("img_pixels.csv", 'a') as f:
writer = csv.writer(f)
writer.writerow(value)
How about you convert your images to 2D numpy arrays and then write them as txt files with .csv extensions and , as delimiters?
Maybe you could use a code like following:
np.savetxt('np.csv', image, delimiter=',')
import numpy as np
import cv2
import os
IMG_DIR = '/home/kushal/Documents/opencv_tutorials/image_reading/dataset'
for img in os.listdir(IMG_DIR):
img_array = cv2.imread(os.path.join(IMG_DIR,img), cv2.IMREAD_GRAYSCALE)
img_array = (img_array.flatten())
img_array = img_array.reshape(-1, 1).T
print(img_array)
with open('output.csv', 'ab') as f:
np.savetxt(f, img_array, delimiter=",")
import os
import pandas as pd
path = 'path-to-the-folder'
os.chdir(path)
lists = os.listdir(path)
labels = []
file_lst = []
for folder in lists:
files = os.listdir(path +"/"+folder)
for file in files:
path_file = path + "/" + folder + "/" + file
file_lst.append(path_file)
labels.append(folder)
dictP_n = {"path": file_lst,
"label_name": labels,
"label": labels}
data = pd.DataFrame(dictP_n, index = None)
data = data.sample(frac=1)
data['label'] = data['label'].replace({"class1": 0, "class2": 1 })
data.to_csv("path-to-save-location//file_name.csv", index =None)

Categories