Python - Compare filenames in lists - python

I have code which check if files contents are the same. Files have the same filename in both folders.
For example:
folder JSON1/test1.txt
folder JSON2/test1.txt
For a single file this works, but I would like to apply it to a list of files.
So in the JSON1 folder I will have:
JSON1 / test.txt
JSON1 / second.txt
JSON1 / result.txt
And in the JSON2 folder I will have the same file list.
Can you help how to change it to handle multiple files?
import os
import sys
filenameODS = "C:/Users/adm/Json1/" + sys.argv[1] + ".txt"
filenameDWH = "C:/Users/adm/Json2/" + sys.argv[1] + ".txt"
file1contents = set(open(filenameODS).readlines())
file2contents = set(open(filenameDWH).readlines())
if file1contents == file2contents:
print("Files are the same!")
else:
print("In file2, not file1:\n")
for diffLine in file2contents - file1contents:
print ("\t", diffLine)
print ("\nIn file1, not file2:\n")
for diffLine in file1contents - file2contents:
print ("\t", diffLine)
with open('C:/Users/adm/Json3/'+ sys.argv[1] + '.txt', 'w') as f:
f.write(diffLine)
f.close()

Related

How to I merge multiple .txt files that are in a .zip file into only one .txt file in Python?

I'm trying to merge multiple .txt files that are in a .zip file into only one .txt file in Python.
My code is the following:
firstfile = Path(r'C:\Users\Viniz\Downloads\devkmbe-5511001_05-12-2022_00_20_09.zip\AudioCaptureMemoryUsage_01_12_2022.txt')
secondfile = Path(r'C:\Users\Viniz\Downloads\devkmbe-5511001_05-12-2022_00_20_09.zip\AudioMatchingMemoryUsage_01_12_2022.txt')
newfile = input("Enter the name of the new file: ")
print()
print("The merged content of the 2 files will be in", newfile)
with open(newfile, "wb") as wfd:
for f in [firstfile, secondfile]:
with open(f, "rb") as fd:
shutil.copyfileobj(fd, wfd, 1024 * 1024 * 10)
print("\nThe content is merged successfully.!")
print("Do you want to view it ? (y / n): ")
check = input()
if check == 'n':
exit()
else:
print()
c = open(newfile, "r")
print(c.read())
c.close()
Thanks.
I tried to merge them in only one file but it doesn't worked.
To merge the files, you'll need to first extract the files from the zip file, then merge them, and then write the merged content to a new file. Here is an example of how you can do this using the zipfile module.
Update: If the .txt files are located inside a folder within the zip file, you'll need to include the folder name in the path when opening the files.
import zipfile
zip_file = r'C:\Users\Viniz\Downloads\devkmbe-5511001_05-12-2022_00_20_09.zip'
folder_name = 'myfolder'
first_file = folder_name + '/AudioCaptureMemoryUsage_01_12_2022.txt'
second_file = folder_name + '/AudioMatchingMemoryUsage_01_12_2022.txt'
with zipfile.ZipFile(zip_file, 'r') as zip_ref:
with zip_ref.open(first_file) as f1, zip_ref.open(second_file) as f2:
first_content = f1.read()
second_content = f2.read()
# Concatenate the two files
merged_content = first_content + second_content
# Write the merged content to a new file
new_file = input("Enter the name of the new file: ")
with open(new_file, 'wb') as new_f:
new_f.write(merged_content)
print("The content is merged successfully.!")
print("Do you want to view it ? (y / n): ")
check = input()
if check == 'n':
exit()
else:
print()
c = open(new_file, "r")
print(c.read())
c.close()
Make sure to replace 'myfolder' with the actual name of the folder containing the .txt files in your zip file.
For multiple files..
import zipfile
zip_file = r'C:\Users\Viniz\Downloads\devkmbe-5511001_05-12-2022_00_20_09.zip'
folder_name = 'myfolder'
file_names = ['AudioCaptureMemoryUsage_01_12_2022.txt',
'AudioMatchingMemoryUsage_01_12_2022.txt',
'File3.txt',
'File4.txt',
...
'File29.txt']
merged_content = b'' # Initialize an empty bytes object
with zipfile.ZipFile(zip_file, 'r') as zip_ref:
for file_name in file_names:
with zip_ref.open(folder_name + '/' + file_name) as f:
merged_content += f.read()
# Write the merged content to a new file
new_file = input("Enter the name of the new file: ")
with open(new_file, 'wb') as new_f:
new_f.write(merged_content)
print("The content is merged successfully.!")
print("Do you want to view it ? (y / n): ")
check = input()
if check == 'n':
exit()
else:
print()
c = open(new_file, "r")
print(c.read())
c.close()
import os
import zipfile
import shutil
def extract_txt_files(zip_path, temp_folder):
"""Extracts all the .txt files from the given zip file to the given temp folder"""
with zipfile.ZipFile(zip_path, "r") as zip_file:
i = len([name for name in os.listdir(temp_folder) if name.endswith(".txt")]) + 1
for member in zip_file.infolist():
if member.filename.endswith(".txt"):
zip_file.extract(member, temp_folder)
os.rename(os.path.join(temp_folder, member.filename), os.path.join(temp_folder, f"{i}.txt"))
i += 1
def merge_txt_files(temp_folder):
"""Merges all the .txt files from the given temp folder into a single file called "merged.txt" """
with open("merged.txt", "w") as outfile:
for filename in os.listdir(temp_folder):
if filename.endswith(".txt"):
with open(os.path.join(temp_folder, filename)) as infile:
outfile.write(infile.read())
def delete_temp_folder(temp_folder):
"""Deletes the given temp folder"""
os.rmdir(temp_folder)
# paths to the zip files
zip1_path = "zip1.zip"
zip2_path = "zip2.zip"
# create a temporary folder to extract the .txt files
temp_folder = "temp"
os.makedirs(temp_folder, exist_ok=True)
# extract the .txt files from the zip files
extract_txt_files(zip1_path, temp_folder)
extract_txt_files(zip2_path, temp_folder)
# merge the .txt files
merge_txt_files(temp_folder)
# delete the temporary folder
shutil.rmtree(temp_folder)
print("The content is merged successfully.!")
print("Do you want to view it ? (y / n): ")
check = input()
if check == 'n':
exit()
else:
print()
c = open(new_file, "r")
print(c.read())
c.close()
The zip path in the script is relative, which means that the zip files "zip1.zip" and "zip2.zip" are expected to be in the same directory as the script.
If the zip files contain multiple .txt files, the script will extract all of them to the temporary folder.
the script renames the extracted .txt files with an incremental index and the .txt extension to ensure that all the extracted files will have unique names and not overwritten.This will maintain the order of txt files as they were in zip file.

Python: How make a zip with file, not into folder

Folder contain my files and I want to make a zip with those files, and save zip into a folder.
Here my files
- file:
- file_0.txt
- file_1.txt
- file_2.txt
- zip:
// save zip
script.py
Here my code
from zipfile import ZipFile
zip_name = "Zipfile"
zipObj = ZipFile("zip/{}.zip".format(zip_name), "w")
count = 0
while count < 3:
file_name = "file_"
zipObj.write('file/' + file_name + str(count) + ".txt")
count += 1
This make a Zip file with a folder named file, and inside all txt, I want to remove folder and only zip the files
from zipfile import ZipFile
zip_name = "Zipfile"
zipObj = ZipFile("zip/{}.zip".format(zip_name), "w")
count = 0
import os
os.chdir('file')
while count < 3:
file_name = "file_"
zipObj.write(file_name + str(count) + ".txt")
count += 1
This should work for you

How to save all files, not replacing the file?

I have 100 text files and I want to save it into 100 text files too. Right now, my coding can read all the files but it save only one file, which is the latest result. Here I attached the code.
def nama():
path = "C:/Amar/code/"
infilename = os.listdir(path)
print len(infilename)
for filename in infilename:
print("jumpa dah" + path + "\\"+ filename)
f = open(path + "\\" + filename, "r")
data = f.read()
f.close()
lines = data.split('\n')
outfilename = path + "result.txt"
print outfilename
f = open(outfilename , "a")
Append a string that will act as a unique identifier for each output file. You can use the input filename for this:
outfilename = path + filename + "_result.txt"
# e.g reports_result.txt

How to output in different directory?

I have this:
from os import path
base_path = "C:\\texts\\*.txt"
for file in files:
with open (file) as in_file, open(path.join(base_path,"%s_tokenized.txt" % file), "w") as out_file:
data = in_file.readlines()
for line in data:
words = line.split()
str1 = ','.join(words)
out_file.write(str1)
out_file.write("\n")
It produced tokenized files in the same directory it reads from. How can I output those out_files in different directory such as "C:\\texts\\Tokenized" ?
I know there are some ways to move those new files to other directory after producing them, but what I wanna know is that if there is anyway to output new files to other directory at the same time they are produced in above code?
Is this what you're looking for:
import os
import glob
source_pattern = 'c:/texts/*.txt'
output_directory = 'c:/texts/tokenized'
# Iterate over files matching source_pattern
for input_file in glob.glob(source_pattern):
# build the output filename
base,ext = os.path.splitext(os.path.basename(input_file))
output_file = os.path.join(output_directory,base + '_tokenized' + ext)
with open(input_file) as in_file, open(output_file,'w') as out_file:
for line in in_file:
out_file.write(','.join(line.split()) + '\n')
This is how I output to files in arbitrary directories :
dir_name = "../some_dir"
if not os.path.exists(dir_name) : os.makedirs(dir_name)
out_file_name = dir_name + '/out.txt'
out_file = open( out_file_name, 'w')
EDIT :
file_name = "{0}_tokenized.txt".format(something_from_tokenizing)
if not os.path.exists(dir_name) : os.makedirs(dir_name)
out_file_name = dir_name + file_name
EDIT :
I just tried it, worked for me. You simply need two paths, one for the source directory and one for the destination. Hope this helps.
import os
from os import path
f1 = open("in.txt")
f2 = open("out.txt")
files = ["in.txt", "out.txt"]
base_path = "."
dir_name = "./some_dir"
if not os.path.exists(dir_name) : os.makedirs(dir_name)
for file in files:
with open (file) as in_file, open(path.join(dir_name,"%s_tokenized.txt" % file), "w") as out_file:
data = in_file.readlines()
for line in data:
words = line.split()
str1 = ','.join(words)
out_file.write(str1)
out_file.write("\n")

How to replace strings in multiple file using Python

I have two files (say file1 and file2). There are strings in file1 and file2 (equal numbers of strings).
I want to search the content of file1 in a directory(which have multiple sub-directories and XML files) which contains XML files and replace it with the content for file2.
import subprocess
import sys
import os
f_line = f.readlines()
g_line = g.readlines()
f=open("file1.txt")
g=open("file2.txt")
i = 0
for line in f_line:
if line.replace("\r\n", "") != g_line[i].replace("\r\n", "") :
print (line)
print(g_line[i])
cmd = "sed -i 's/" + line.replace("\r\n", "") + "/" + line[i].replace("\r\n","") + "/g' " + "`grep -l -R " + line.replace("\r\n", "") + " *.xml`"
print(cmd)
os.system(cmd)
i = i + 1
But the problem I'm facing is like this. The script searches the files and string and prints also (print(cmd)) but when I sun this script placing in the directory, I see this error in CYGWIN window "no input files for sed".
read two files into a dictionary
walk the directory reading xml files, replacing their contents, backing them up and overwriting the originals
f1 = open('pathtofile1').readlines()
f2 = open('pathtofile2').readlines()
replaceWith = dict()
for i in range(len(f1)):
replaceWith[f1[i].strip()] = f2[i].strip()
for root, dirnames, filenames in os.walk('pathtodir'):
for f in filenames:
f = open(os.path.join(root, f), 'r')
contents = f.read()
for k, v in replaceWith:
contents = re.sub(k, v, contents)
f.close()
shutil.copyfile(os.path.join(root, f), os.path.join(root, f)+'.bak')
f = open(os.path.join(root, f), 'w')
f.write(contents)
f.close()
A limitation is that if some search strings appear in replacements strings, a string may be replaced many times over.

Categories