Python Converting String to jpg file - python

I have a file with strings that were pulled out of our HR system that are images of people that work for our company. I wrote the following code to convert these strings into .jpg files.
d is the name of the new file and x is the image string. I have printed both of these variables and they seem to be working. The file is saving and it is 71KB but when I open it in paint it says that it "cannot read this file and This is not a valid bitmap file, or its format is not currently supported."
I opened it with Photos and it just said it "can't open this file." Are you able to see any issue with the code?
import csv
import base64
import tkinter as tk
from tkinter import filedialog
root = tk.Tk()
root.withdraw()
file_path = filedialog.askopenfilename()
with open(file_path, 'r') as csvfile:
readCSV = csv.reader(csvfile,delimiter=',')
next(readCSV)
for line in readCSV:
d = line[0]
x = line[1]
y = base64.encodebytes(x.encode())
with open("C:\\%s.jpg" %(d), "wb") as fh:
fh.write(base64.decodebytes(y))
fh.close()
break

Related

Python : Unable to write image file after reading image from ACCESS db

enter code here
from tkinter import filedialog, messagebox
from . import dbqueries
from . import db, dbqueries
def saveImages(userid):
fname = userid + "_A"
images = dbqueries.selectImages(userid)// OLE object from MS access DB
f = images[0] # Binary data
newfile = open(fname + ".jpg", 'w+')
for line in f:
newfile.write(str(line))
print(os.getcwd())
#messagebox.showinfo("profile image saved", " saved")
newfile.close()
When I open the newfile from directory receiving error "It looks like we do not support file format".
I've tried encoding decoding using BASE64 package, no luck.
enter image description here

Hashing Issue, Non-Text Files

My code works ok except for hashing. It works fine on hashing text files but as soon as it encounters a jpg or other file type, it crashes. I know it's some type of encoding error, but I'm stumped on how to encode it properly for non-text files.
#import libraries
import os
import time
from datetime import datetime
import logging
import hashlib
from prettytable import PrettyTable
from pathlib import Path
import glob
#user input
path = input ("Please enter directory: ")
print ("===============================================")
#processing input
if os.path.exists(path):
print("Processing directory: ", (path))
else:
print("Invalid directory.")
logging.basicConfig(filename="error.log", level=logging.ERROR)
logging.error(' The directory is not valid, please run the script again with the correct directory.')
print ("===============================================")
#process directory
directory = Path(path)
paths = []
filename = []
size = []
hashes = []
modified = []
files = list(directory.glob('**/*.*'))
for file in files:
paths.append(file.parents[0])
filename.append(file.parts[-1])
size.append(file.stat().st_size)
modified.append(datetime.fromtimestamp(file.stat().st_mtime))
with open(file) as f:
hashes.append(hashlib.md5(f.read().encode()).hexdigest())
#output in to tablecx
report = PrettyTable()
column_names = ['Path', 'File Name', 'File Size', 'Last Modified Time', 'MD5 Hash']
report.add_column(column_names[0], paths)
report.add_column(column_names[1], filename)
report.add_column(column_names[2], size)
report.add_column(column_names[3], modified)
report.add_column(column_names[4], hashes)
report.sortby = 'File Size'
print(report)
change following lines
with open(file) as f:
hashes.append(hashlib.md5(f.read().encode()).hexdigest())
to
with open(file, "rb") as f:
hashes.append(hashlib.md5(f.read()).hexdigest())
Doing this you will read the contents directly as bytes and you calculate the hash.
Your version tried to read the file as text and re-encoded it to bytes.
Reading a file as text means, the code tries to decode it with the system's encoding. For some byte combinations this will fail, as they are no valid code points for the given encoding.
So just read everything directly as bytes.

How to read Arabic text from PDF using Python script

I have a code written in Python that reads from PDF files and convert it to text file.
The problem occurred when I tried to read Arabic text from PDF files. I know that the error is in the coding and encoding process but I don't know how to fix it.
The system converts Arabic PDF files but the text file is empty.
and display this error:
Traceback (most recent call last): File
"C:\Users\test\Downloads\pdf-txt\text maker.py", line 68, in
f.write(content) UnicodeEncodeError: 'ascii' codec can't encode character u'\xa9' in position 50: ordinal not in range(128)
Code:
import os
from os import chdir, getcwd, listdir, path
import codecs
import pyPdf
from time import strftime
def check_path(prompt):
''' (str) -> str
Verifies if the provided absolute path does exist.
'''
abs_path = raw_input(prompt)
while path.exists(abs_path) != True:
print "\nThe specified path does not exist.\n"
abs_path = raw_input(prompt)
return abs_path
print "\n"
folder = check_path("Provide absolute path for the folder: ")
list=[]
directory=folder
for root,dirs,files in os.walk(directory):
for filename in files:
if filename.endswith('.pdf'):
t=os.path.join(directory,filename)
list.append(t)
m=len(list)
print (m)
i=0
while i<=m-1:
path=list[i]
print(path)
head,tail=os.path.split(path)
var="\\"
tail=tail.replace(".pdf",".txt")
name=head+var+tail
content = ""
# Load PDF into pyPDF
pdf = pyPdf.PdfFileReader(file(path, "rb"))
# Iterate pages
for j in range(0, pdf.getNumPages()):
# Extract text from page and add to content
content += pdf.getPage(j).extractText() + "\n"
print strftime("%H:%M:%S"), " pdf -> txt "
f=open(name,'w')
content.encode('utf-8')
f.write(content)
f.close
i=i+1
You have a couple of problems:
content.encode('utf-8') doesn't do anything. The return value is the encoded content, but you have to assign it to a variable. Better yet, open the file with an encoding, and write Unicode strings to that file. content appears to be Unicode data.
Example (works for both Python 2 and 3):
import io
f = io.open(name,'w',encoding='utf8')
f.write(content)
If you don't close the file properly, you may see no content because the file is not flushed to disk. You have f.close not f.close(). It's better to use with, which ensures the file is closed when the block exits.
Example:
import io
with io.open(name,'w',encoding='utf8') as f:
f.write(content)
In Python 3, you don't need to import and use io.open but it still works. open is equivalent. Python 2 needs the io.open form.
you can use anthor library called pdfplumber instead of using pypdf or PyPDF2
import arabic_reshaper
from bidi.algorithm import get_display
with pdfplumber.open(r'example.pdf') as pdf:
my_page = pdf.pages[10]
thepages=my_page.extract_text()
reshaped_text = arabic_reshaper.reshape(thepages)
bidi_text = get_display(reshaped_text)
print(bidi_text)

Python _csv Error: line contains NULL byte

This is my code:
filepath = sys.argv[1]
csvdata = list(csv.reader(open(filepath)))
How can I fix it?
I saved my excel file as a csv and receieved this error:
_csv.Error: new-line character seen in unquoted field - do you need to open the file in universal-newline mode?
An Excel file is not a csv file. First export / save the file as csv.
There are differences between python versions about whether to open the file as binary or text. This has relevance to how newlines are handled.
In Python 2.x, open as binary: open(filepath, 'rb')
In Python 3.x, don't : open('file.csv', 'r')
The second part I learned from this link about reading in csv files
For some operating systems (Mac OS for sure) you need to open with the mode 'rU' See: this link with same problem specifically on Mac OS
try this (put actual location of csv file)...
with open('c:\pytest.csv', 'rb') as csvfile:
data = csv.reader(csvfile)
mylist = list (data)
print mylist
from tkFileDialog import askopenfilename
import csv
filename = askopenfilename()
with open(filename, 'rb') as csvfile:
data = csv.reader(csvfile)
mylist = list (data)
print mylist

Python CSV.Writer changing saving path

I would like to have my tkinter program prompt the user to select the path the want to save the file which will be produced by the program.
My code looks like this. At this stage the program only saves to one file (the one I defined to test the program)
What code would I use to have 'test_write.csv' changed to any file the user chooses?
##Writing to .cvs file
with open('test_write.csv', 'w') as fp:
a = csv.writer(fp)
# write row of header names
a.writerow(n)
Thank you
Here's an example using tkFileDialog:
import Tkinter
import tkFileDialog
import csv
formats = [('Comma Separated values', '*.csv'), ]
root = Tkinter.Tk()
file_name = tkFileDialog.asksaveasfilename(parent=root, filetypes=formats, title="Save as...")
if file_name:
with open(file_name, 'w') as fp:
a = csv.writer(fp)
# write row of header names
a.writerow(n)
Use the tkFileDialog module.
Example:
import tkFileDialog
with open(tkFileDialog.asksaveasfilename(), "w") as fp:
...
Solution for python3.xxx
import tkinter
from tkinter.filedialog import asksaveasfilename
with open(asksaveasfilename(), 'w') as fp:

Categories