Encoding any file type with python - python

I am trying to go through all files within a folder, read the file data encoded using utf-8, then rewriting that data to a new file which should create a copy of that file. However when doing so the new copy of the file gets corrupted.
-Should i be using utf-8 text encoding to encode all file types (.py, .txt, .docx, .jpg)?
-Is there one standard text encoding format that works for all file types?
def read_files():
files = ["program.py", "letter.docx", "cat.jpg", "hello_world.py"]
for file in files:
#open exsting file
f = open(file, encoding="utf-8")
file_content = f.read()
#get file name info
file_extension = file.split(".")[1]
file_name = file.split(".")[0]
#write encoded data to new file
f = open(file_name + "_converted." + file_extension , "wb")
f.write(bytes(file_content, encoding="utf-8"))
f.close()
read_files()

proper way to copy files with shutil:
import shutil
source = file
destination = file_name + "_converted." + file_extension
shutil.copy(source, destination)
bad and slow way to copy files:
def read_files():
files = ["program.py", "letter.docx", "cat.jpg", "hello_world.py"]
for file in files:
#open exsting file
f = open(file,'rb') # read file in binary mode
file_content = f.read()
f.close() # don't forget to close the file !
#get file name info
file_extension = file.split(".")[1]
file_name = file.split(".")[0]
#write raw data to new file
f = open(file_name + "_converted." + file_extension , "wb")
f.write(file_content)
f.close()
read_files()
if you don't need to decode them to text then you should only open them in binary mode, as things like jpg and docx will break in text mode and should be opened in binary mode.
alternatively if you actually need to do some work on the docx or jpg files then you should use the proper modules to do so like Pillow for jpg and docx module for docx files.

Related

Encoding issue when writing Hebrew to text file, with Python

Django Project
Encoding problem:
The flow of code
reading Excel file
manipulate the data from the Excel
create new txt file and write into it
send the txt file to the client
coding = "utf8"
file = open(filename, "w", encoding=coding, errors="ignore")
for row in excel_data_df.iloc():
line = manipulate(row)
file.write(line)
file.close()
file_data = open(filename, "r", encoding=coding, errors="ignore")
response = HttpResponse(file_data, content_type='application/vnd.ms-excel')
response['Content-Disposition'] = 'attachment; filename=' + filename
every thing is working just fine but when I open the the file with ANSI Encoding all the Hebrew change into gibberish
I have try to change the coding with every Hebrew option https://docs.python.org/2.4/lib/standard-encodings.html
The Hebrew coding should be write with ASCII NEW CODE or ASCII WINDOWS TEXT,
any ideas?
You need to change the mode to "rb" and remove the encoding parameter
file = open(filename, "w")
for row in excel_data_df.iloc():
line = manipulate(row)
file.write(line)
file.close()
file_data = open(filename, "rb")
response = HttpResponse(file_data, content_type='application/vnd.ms-
excel')
response['Content-Disposition'] = 'attachment; filename=' + filename

Wrong name for compressed .gz file - Python

I'm trying to compress a text file into a gzip (.gz) file, in Python, using the following code:
import shutil, gzip
text_file = 'C:\\Users\\lenovo-miguel\\Downloads\\python\\text_file.txt'
gz_file = text_file + '.GZ'
with open(text_file, 'rb') as f_in:
with gzip.open(gz_file, 'wb') as f_out:
shutil.copyfileobj(f_in, f_out)
.GZ file image
The code works, the .gz file is created with the correct name (text_file.txt.GZ), but when you open the .GZ file, the name of the compressed file within it is not 'text_file.txt', it is also'text_file.txt.GZ' (see image).
I need to keep the original name, is there a way to do it?
Thanks in advance for any help!

How to extract Bzip2 file in python

I am writing program which fetch extension of filename from string than download it and extract it, I am able to do it with Tar.gz and zip type of compression but while extracting Bz2 type of file i am able to read file and transfer data to new file in same directory but what i would like is to create folder like zipfile and tarfile using something like 'ExtractAll" and extract file in it, but i am unable can anyone help
def bz2_download(*args):
mystring_zip = ' '.join(args)
print('{} Bz2 file Download Started!!!'.format(mystring_zip.split('/')[-1].split('.')[-2]))
r = requests.get(mystring_zip)
filename = mystring_zip.split('/')[-1] # this will take only -1 splitted part of the url
with open(filename, 'wb') as output_file:
output_file.write(r.content)
print('{} Bz2 file Download Completed!!!'.format(mystring_zip.split('/')[-1].split('.')[-2]))
path_zip = mystring_zip.split('/')[-1].split('.')[0]
print(path_zip, '+++++++++++++++++++++++++++++++++++++++++++++++++++++++')
extract_dir = path_zip
print('--------------------------{} extraction Started----------------------------------'.format(extract_dir))
os.mkdir(path_zip)
with open(path_zip, 'wb') as new_file, open(filename, 'rb') as file:
decompressor = _bz2.BZ2Decompressor()
for data in iter(lambda: file.read(100 * 1024), b''):
a = new_file.write(decompressor.decompress(data))
print(a, '1111111111111111111111111111111111111111111')
if extract_dir:
print('---------------------------{} extraction completed----------------------------------'.format(extract_dir))
else:
print('---------------------------{} extraction Failed----------------------------------'.format(extract_dir))
Here is my Tar.gz code which download the file and extract it to a folder, want to do something similar with bzip2 file
def tar_download(*args):
mystring_tar = ' '.join(args)
print('{} Tar.gz file Download Started!!!'.format(mystring_tar.split('/')[-1].split('.')[0]))
r = requests.get(mystring_tar)
filename = mystring_tar.split('/')[-1]
with open(filename, 'wb') as output_file:
output_file.write(r.content)
print(' {} Tar.gz file Download Completed!!!'.format(mystring_tar.split('/')[-1].split('.')[0]))
path_tar = mystring_tar.split('/')[-1].split('.')[0]
extract_dir = path_tar
print('---------------------------{} extraction Started----------------------------------'.format(extract_dir))
thetarfile = tarfile.open(filename, mode="r|gz")
thetarfile.extractall(extract_dir)
if extract_dir:
print('---------------------------{} extraction Complete----------------------------------'.format(extract_dir))
else:
print('---------------------------{} extraction Failed----------------------------------'.format(extract_dir))

Error reading data from the old files, writing it into the new files and then deleting the old files in Python

I'm trying to use below code to read 5 files from source, write them in destination and then deleting the files in source. I get the following error: [Errno 13] Permission denied: 'c:\\data\\AM\\Desktop\\tester1. The file by the way look like this:
import os
import time
source = r'c:\data\AM\Desktop\tester'
destination = r'c:\data\AM\Desktop\tester1'
for file in os.listdir(source):
file_path = os.path.join(source, file)
if not os.path.isfile:
continue
print(file_path)
with open (file_path, 'r') as IN, open (destination, 'w') as OUT:
data ={
'Power': None,
}
for line in IN:
splitter = (ID, Item, Content, Status) = line.strip().split()
if Item in data == "Power":
Content = str(int(Content) * 10)
os.remove(IN)
I have re-written your entire code. I assume you want to update the value of Power by a multiple of 10 and write the updated content into a new file. The below code will do just that.
Your code had multiple issues, first and foremost, most of what you wanted in your head did not get written in the code (like writing into a new file, providing what and where to write, etc.). The original issue of the permission was because you were trying to open a directory to write instead of a file.
source = r'c:\data\AM\Desktop\tester'
destination = r'c:\data\AM\Desktop\tester1'
for file in os.listdir(source):
source_file = os.path.join(source, file)
destination_file=os.path.join(destination, file)
if not os.path.isfile:
continue
print(source_file)
with open (source_file, 'r') as IN , open (destination_file, 'w') as OUT:
data={
'Power': None,
}
for line in IN:
splitter = (ID, Item, Content, Status) = line.strip().split()
if Item in data:# == "Power": #Changed
Content = str(int(Content) * 10)
OUT.write(ID+'\t'+Item+'\t'+Content+'\t'+Status+'\n') #Added to write the content into destination file.
else:
OUT.write(line) #Added to write the content into destination file.
os.remove(source_file)
Hope this works for you.
I'm not sure what you're going for here, but here's what I could come up with the question put into the title.
import os
# Takes the text from the old file
with open('old file path.txt', 'r') as f:
text = f.read()
# Takes text from old file and writes it to the new file
with open('new file path.txt', 'w') as f:
f.write(text)
# Removes the old text file
os.remove('old file path.txt')
Sounds from your description like this line fails:
with open (file_path, 'r') as IN, open (destination, 'w') as OUT:
Because of this operation:
open (destination, 'w')
So, you might not have write-access to
c:\data\AM\Desktop\tester1
Set file permission on Windows systems:
https://www.online-tech-tips.com/computer-tips/set-file-folder-permissions-windows/
#Sherin Jayanand
One more question bro, I wanted to try something out with some pieces of your code. I made this of it:
import os
import time
from datetime import datetime
#Make source, destination and archive paths.
source = r'c:\data\AM\Desktop\Source'
destination = r'c:\data\AM\Desktop\Destination'
archive = r'c:\data\AM\Desktop\Archive'
for root, dirs, files in os.walk(source):
for f in files:
pads = (root + '\\' + f)
# print(pads)
for file in os.listdir(source):
dst_path=os.path.join(destination, file)
print(dst_path)
with open(pads, 'r') as IN, open(dst_path, 'w') as OUT:
data={'Power': None,
}
for line in IN:
(ID, Item, Content, Status) = line.strip().split()
if Item in data:
Content = str(int(Content) * 10)
OUT.write(ID+'\t'+Item+'\t'+Content+'\t'+Status+'\n')
else:
OUT.write(line)
But again I received the same error: Permission denied: 'c:\\data\\AM\\Desktop\\Destination\\C'
How comes? Thank you very much!

how to open a gz file and save the file as txt in python

I have a gz file, how can I unzip the file and save the content to a txt in python?
I imported gzip already
file_path = gzip.open(file_name, 'rb')
How about opening a second file and writing to it?
import gzip
with gzip.open('file.txt.gz', 'rb') as f, open('file.txt', 'w') as f_out:
f_out.write(f.read())
Gzip's open method should open the file in such a way that its contents can be read like a normal file:
import gzip
#Define the file's location
file_path = "/path/to/file.gz"
#Open the file and read its contents
with gzip.open(file_path, "rb") as file:
file_content = file.read()
#Save the new txt file
txt_file_name = "txtFile.txt"
with open(txt_file_name, "w") as file:
file.write(file_content)

Categories