How to replace strings in multiple file using Python - python

I have two files (say file1 and file2). There are strings in file1 and file2 (equal numbers of strings).
I want to search the content of file1 in a directory(which have multiple sub-directories and XML files) which contains XML files and replace it with the content for file2.
import subprocess
import sys
import os
f_line = f.readlines()
g_line = g.readlines()
f=open("file1.txt")
g=open("file2.txt")
i = 0
for line in f_line:
if line.replace("\r\n", "") != g_line[i].replace("\r\n", "") :
print (line)
print(g_line[i])
cmd = "sed -i 's/" + line.replace("\r\n", "") + "/" + line[i].replace("\r\n","") + "/g' " + "`grep -l -R " + line.replace("\r\n", "") + " *.xml`"
print(cmd)
os.system(cmd)
i = i + 1
But the problem I'm facing is like this. The script searches the files and string and prints also (print(cmd)) but when I sun this script placing in the directory, I see this error in CYGWIN window "no input files for sed".

read two files into a dictionary
walk the directory reading xml files, replacing their contents, backing them up and overwriting the originals
f1 = open('pathtofile1').readlines()
f2 = open('pathtofile2').readlines()
replaceWith = dict()
for i in range(len(f1)):
replaceWith[f1[i].strip()] = f2[i].strip()
for root, dirnames, filenames in os.walk('pathtodir'):
for f in filenames:
f = open(os.path.join(root, f), 'r')
contents = f.read()
for k, v in replaceWith:
contents = re.sub(k, v, contents)
f.close()
shutil.copyfile(os.path.join(root, f), os.path.join(root, f)+'.bak')
f = open(os.path.join(root, f), 'w')
f.write(contents)
f.close()
A limitation is that if some search strings appear in replacements strings, a string may be replaced many times over.

Related

Python - Compare filenames in lists

I have code which check if files contents are the same. Files have the same filename in both folders.
For example:
folder JSON1/test1.txt
folder JSON2/test1.txt
For a single file this works, but I would like to apply it to a list of files.
So in the JSON1 folder I will have:
JSON1 / test.txt
JSON1 / second.txt
JSON1 / result.txt
And in the JSON2 folder I will have the same file list.
Can you help how to change it to handle multiple files?
import os
import sys
filenameODS = "C:/Users/adm/Json1/" + sys.argv[1] + ".txt"
filenameDWH = "C:/Users/adm/Json2/" + sys.argv[1] + ".txt"
file1contents = set(open(filenameODS).readlines())
file2contents = set(open(filenameDWH).readlines())
if file1contents == file2contents:
print("Files are the same!")
else:
print("In file2, not file1:\n")
for diffLine in file2contents - file1contents:
print ("\t", diffLine)
print ("\nIn file1, not file2:\n")
for diffLine in file1contents - file2contents:
print ("\t", diffLine)
with open('C:/Users/adm/Json3/'+ sys.argv[1] + '.txt', 'w') as f:
f.write(diffLine)
f.close()

Python function within a loop iterating over text files only works on the first file

I'm writing a simple script which loops over some text file and uses a function which should replace some string looking in a .csv file (every row has the word to replace and the word which I want there)
Here is my simple code:
import os
import re
import csv
def substitute_tips(table, tree_content):
count = 0
for l in table:
print("element of the table", l[1])
reg_tree = re.search(l[1],tree_content)
if reg_tree is not None:
#print("match in the tree: ",reg_tree.group())
tree_content = tree_content.replace(reg_tree.group(), l[0])
count = count + 1
else:
print("Not found: ",l[1])
tree_content = tree_content
print("Substitutions done: ",count)
return(tree_content)
path=os.getcwd()
table_name = "162_table.csv"
table = open(table_name)
csv_table = csv.reader(table, delimiter='\t')
for root, dirs, files in os.walk(path, topdown=True):
for name in files:
if name.endswith(".tree"):
print(Fore.GREEN + "Working on treefile", name)
my_tree = open(name, "r")
my_tree_content = my_tree.read()
output_tree = substitute_tips(csv_table, my_tree_content)
output_file = open(name.rstrip("tree") + "SPECIES_NAME.tre", "w")
output_file.write(output_tree)
output_file.close()
else:
print(Fore.YELLOW + name ,Fore.RED + "doesn't end in .tree")
It's probably very easy, but I'm a newbie.
Thanks!
The files list returned by os.walk contains only the file names rather than the full path names. You should join root with the file names instead to be able to open them:
Change:
my_tree = open(name, "r")
...
output_file = open(name.rstrip("tree") + "SPECIES_NAME.tre", "w")
to:
my_tree = open(os.path.join(root, name), "r")
...
output_file = open(os.path.join(root, name.rstrip("tree") + "SPECIES_NAME.tre"), "w")

Finding Files in File Tree Based on List of Extensions

I'm working on a small python 3 utility to build a zip file based on a list of file extensions. I have a text file of extensions and I'm passing a folder into the script:
working_folder = sys.argv[1]
zip_name = sys.argv[2]
#Open the extension file
extensions = []
with open('CxExt.txt') as fp:
lines = fp.readlines()
for line in lines:
extensions.append(line)
#Now get the files in the directory. If they have the right exttension add them to the list.
files = os.listdir(working_folder)
files_to_zip = []
for ext in extensions:
results = glob.glob(working_folder + '**/' + ext, recursive=True)
print(str(len(results)) + " results for " + working_folder + '**/*' + ext)
#search = "*"+ext
#results = [y for x in os.walk(working_folder) for y in glob(os.path.join(x[0], search))]
#results = list(Path(".").rglob(search))
for result in results:
files_to_zip.append(result)
if len(files_to_zip) == 0:
print("No Files Found")
sys.exit()
for f in files:
print("Checking: " + f)
filename, file_extension = os.path.splitext(f)
print(file_extension)
if file_extension in extensions:
print(f)
files_to_zip.append(file)
ZipFile = zipfile.ZipFile(zip_name, "w" )
for z in files_to_zip:
ZipFile.write(os.path.basename(z), compress_type=zipfile.ZIP_DEFLATED)
I've tried using glob, os.walk, and Path.rglob and I still can't get a list of files. There's got to be something just obvious that I'm missing. I built a test directory that has some directories, py files, and a few zip files. It returns 0 for all file types. What am I overlooking?
This is my first answer, so please don't expect it to be perfect.
I notice you're using file.readlines(). According to the Python docs here, file.readlines() returns a list of lines including the newline at the end. If your text file has the extensions separated by newlines, maybe try using file.read().split("\n") instead. Besides that, your code looks okay. Tell me if this fix doesn't work.

How to save all files, not replacing the file?

I have 100 text files and I want to save it into 100 text files too. Right now, my coding can read all the files but it save only one file, which is the latest result. Here I attached the code.
def nama():
path = "C:/Amar/code/"
infilename = os.listdir(path)
print len(infilename)
for filename in infilename:
print("jumpa dah" + path + "\\"+ filename)
f = open(path + "\\" + filename, "r")
data = f.read()
f.close()
lines = data.split('\n')
outfilename = path + "result.txt"
print outfilename
f = open(outfilename , "a")
Append a string that will act as a unique identifier for each output file. You can use the input filename for this:
outfilename = path + filename + "_result.txt"
# e.g reports_result.txt

How to output in different directory?

I have this:
from os import path
base_path = "C:\\texts\\*.txt"
for file in files:
with open (file) as in_file, open(path.join(base_path,"%s_tokenized.txt" % file), "w") as out_file:
data = in_file.readlines()
for line in data:
words = line.split()
str1 = ','.join(words)
out_file.write(str1)
out_file.write("\n")
It produced tokenized files in the same directory it reads from. How can I output those out_files in different directory such as "C:\\texts\\Tokenized" ?
I know there are some ways to move those new files to other directory after producing them, but what I wanna know is that if there is anyway to output new files to other directory at the same time they are produced in above code?
Is this what you're looking for:
import os
import glob
source_pattern = 'c:/texts/*.txt'
output_directory = 'c:/texts/tokenized'
# Iterate over files matching source_pattern
for input_file in glob.glob(source_pattern):
# build the output filename
base,ext = os.path.splitext(os.path.basename(input_file))
output_file = os.path.join(output_directory,base + '_tokenized' + ext)
with open(input_file) as in_file, open(output_file,'w') as out_file:
for line in in_file:
out_file.write(','.join(line.split()) + '\n')
This is how I output to files in arbitrary directories :
dir_name = "../some_dir"
if not os.path.exists(dir_name) : os.makedirs(dir_name)
out_file_name = dir_name + '/out.txt'
out_file = open( out_file_name, 'w')
EDIT :
file_name = "{0}_tokenized.txt".format(something_from_tokenizing)
if not os.path.exists(dir_name) : os.makedirs(dir_name)
out_file_name = dir_name + file_name
EDIT :
I just tried it, worked for me. You simply need two paths, one for the source directory and one for the destination. Hope this helps.
import os
from os import path
f1 = open("in.txt")
f2 = open("out.txt")
files = ["in.txt", "out.txt"]
base_path = "."
dir_name = "./some_dir"
if not os.path.exists(dir_name) : os.makedirs(dir_name)
for file in files:
with open (file) as in_file, open(path.join(dir_name,"%s_tokenized.txt" % file), "w") as out_file:
data = in_file.readlines()
for line in data:
words = line.split()
str1 = ','.join(words)
out_file.write(str1)
out_file.write("\n")

Categories