Python: Issue when trying to read and write multiple files

Python: Issue when trying to read and write multiple files - python

This script reads and writes all the individual html files in a directory. The script reiterates, highlight and write the output.The issue is, after highlighting the last instance of the search item, the script removes all the remaining contents after the last search instance in the output of each file. Any help here is appreciated.
import os
import sys
import re
source = raw_input("Enter the source files path:")
listfiles = os.listdir(source)
for f in listfiles:
filepath = os.path.join(source+'\\'+f)
infile = open(filepath, 'r+')
source_content = infile.read()
color = ('red')
regex = re.compile(r"(\b in \b)|(\b be \b)|(\b by \b)|(\b user \b)|(\bmay\b)|(\bmight\b)|(\bwill\b)|(\b's\b)|(\bdon't\b)|(\bdoesn't\b)|(\bwon't\b)|(\bsupport\b)|(\bcan't\b)|(\bkill\b)|(\betc\b)|(\b NA \b)|(\bfollow\b)|(\bhang\b)|(\bbelow\b)", re.I)
i = 0; output = ""
for m in regex.finditer(source_content):
output += "".join([source_content[i:m.start()],
"<strong><span style='color:%s'>" % color[0:],
source_content[m.start():m.end()],
"</span></strong>"])
i = m.end()
outfile = open(filepath, 'w')
outfile.seek(0, 2)
outfile.write(output)
print "\nProcess Completed!\n"
infile.close()
outfile.close()
raw_input()

After your for loop is over, you need to include whatever is left after the last match:
...
i = m.end()
output += source_content[i:]) # Here's the end of your file
outfile = open(filepath, 'w')
...

Related

How to replace two different lines of text?

I need to create a file that changes the date and name of a .txt, but I can only change one or the other with this code I found on the internet, can anyone give me any tips?
Print
import os
from ast import Str
file = open("example.txt", "r")
replacement = ""
data = "02/07/2022"
name = "Alan"
for line in file:
line = line.strip()
changes = line.replace("__/__/____", data)
replacement = replacement + changes + "\n"
file.close()
fout = open("final.txt", "w")
fout.write(replacement)
fout.close()

You don't need to do this a line a time. You can replace that entire program with this:
data = "02/07/2022"
name = "Alan"
text = open("example.txt", "r").read().replace("__/__/____", data)
open("final.txt", "w").write(text)

How to add for loop in python?

I'm creating new files from originally existing ones in the mdp folder by changing a couple of lines in those files using python. I need to do this for 1000 files. Can anyone suggest a for loop which reads all files and changes them and creates new in one go?
This way I have to change the the number followed by 'md_' in the path and it's tedious because there are a 1000 files here.
I tried using str() but there was a 'could not read file error'
fin = open("/home/abc/xyz/mdp/md_1.mdp", "rt")
fout = open("/home/abc/xyz/middle/md_1.mdp", "wt")
for line in fin:
fout.write(line.replace('integrator = md', 'integrator
= md-vv'))
fin = open("/home/abc/xyz/middle/md_1.mdp", "rt")
fout = open("/home/abc/xyz/mdb/md_1.mdp", "wt")
for line in fin:
fout.write(line.replace('dt = 0.001', 'dt
= -0.001'))
fin.close()
fout.close()

os.listdir(path) is your friend:
import os
sourcedir = "/home/abc/xyz/mdp"
destdir = "/home/abc/xyz/middle"
for filename in os.listdir(sourcedir):
if not filename.endswith(".mdp"):
continue
source = os.path.join(sourcedir, filename)
dest = os.path.join(destdir, filename)
# with open(xxx) as varname makes sure the file(s)
# will be closed whatever happens in the 'with' block
# NB text mode is the default, and so is read mode
with open(source) as fin, open(dest, "w") as fout:
# python files are iterable... avoids reading
# the whole file in memory at once
for line in fin:
# will only work for those exact strings,
# you may want to use regexps if number of
# whitespaces vary etc
line = line.replace("dt = 0.001", "dt = -0.001")
line = line.replace(
'integrator = md',
'integrator = md-vv'
)
fout.write(line)

Assuming you want to edit all files that are located in the mdp folder you could do something like this.
import os
dir = "/home/abc/xyz/mdp/"
for filename in os.listdir(dir):
with open(dir + filename, "r+") as file:
text = file.read()
text = text.replace("dt = 0.001", "dt = -0.001")
file.seek(0)
file.write(text)
file.truncate()
This will go through every file and change it using str.replace().
If there are other files in the mdp folder that you do not want to edit, you could use and if-statement to check for the correct file name. Add something like this to encase the with open statement.
if filename.startswith("md_")

Search string and replace value

Here is my code,
import os, os.path
import collections
import sys
import re
DIR_DAT = "dat"
DIR_OUTPUT = "output"
filenames = []
data = []
#in case if output folder doesn't exist
if not os.path.exists(DIR_OUTPUT):
os.makedirs(DIR_OUTPUT)
input_file = 'axcfgpasww-from-server.dat'
element = sys.argv[1]
output_value = sys.argv[2]
with open(input_file) as infile, open('axcfgpasww-modified.dat', "w") as outfile:
if element in open(input_file).read():
regex = re.findall("\s*([\S\s]+)", element)
outfile.write(regex[0])
print(regex[0])
else:
print('No match found')
The input_file :
CMD_VERS=2
CMD_TRNS=O
CMD_REINIT=N
CMD_ORDER=MAJECR
CMD_COMM=2590552
NUM_COMM:nNN0.7=2590552
I execute my script this way : modify_file.py NUM_COMM:nNN0.7 Hello world !
So if NUM_COMM:nNN0.7 exists in the file, it writes "NUM_COMM:nNN0.7" in a new axcfgpasww-modified.dat file.
But what I want to do, is execute my command as written above. And the result is the input file, with only the new value.
So my output file would be :
CMD_VERS=2
CMD_TRNS=O
CMD_REINIT=N
CMD_ORDER=MAJECR
CMD_COMM=2590552
NUM_COMM:nNN0.7=Hello world !
Can anyone help me on this ?
Thanks !

I have made some refactoring to your original code, and made it produce the output you seek,
import os, os.path
import collections
import sys
import re
DIR_DAT = "dat"
DIR_OUTPUT = "output"
filenames = []
data = []
found = False
#in case if output folder doesn't exist
if not os.path.exists(DIR_OUTPUT):
os.makedirs(DIR_OUTPUT)
input_file = 'axcfgpasww-from-server.dat'
element = sys.argv[1]
output_value = sys.argv[2]
with open(input_file) as infile:
for line in infile.readlines():
if element in line:
old_value = line.split("=")[1]
data.append(line.replace(old_value, output_value))
found = True
else:
data.append(line)
if not found:
print('No match found')
with open(input_file, 'w') as outfile:
for line in data:
outfile.write(line)
output:
CMD_VERS=2
CMD_TRNS=O
CMD_REINIT=N
CMD_ORDER=MAJECR
CMD_COMM=2590552
NUM_COMM:nNN0.7=Hello World!
Hope this helps

How to split text file by id in python

I have a bunch of text files containing tab separated tables. The second column contains an id number, and each file is already sorted by that id number. I want to separate each file into multiple files by the id number in column 2. Here's what I have.
readpath = 'path-to-read-file'
writepath = 'path-to-write-file'
for filename in os.listdir(readpath):
with open(readpath+filename, 'r') as fh:
lines = fh.readlines()
lastid = 0
f = open(writepath+'checkme.txt', 'w')
f.write(filename)
for line in lines:
thisid = line.split("\t")[1]
if int(thisid) <> lastid:
f.close()
f = open(writepath+thisid+'-'+filename,'w')
lastid = int(thisid)
f.write(line)
f.close()
What I get is simply a copy of all the read files with the first id number from each file in front of the new filenames. It is as if
thisid = line.split("\t")[1]
is only done once in the loop. Any clue to what is going on?
EDIT
The problem was my files used \r rather than \r\n to terminate lines. Corrected code (simply adding 'rU' when opening the read file and swapping != for <>):
readpath = 'path-to-read-file'
writepath = 'path-to-write-file'
for filename in os.listdir(readpath):
with open(readpath+filename, 'rU') as fh:
lines = fh.readlines()
lastid = 0
f = open(writepath+'checkme.txt', 'w')
f.write(filename)
for line in lines:
thisid = line.split("\t")[1]
if int(thisid) != lastid:
f.close()
f = open(writepath+thisid+'-'+filename,'w')
lastid = int(thisid)
f.write(line)
f.close()

If you're dealing with tab delimited files, then you can use the csv module, and take advantage of the fact that itertools.groupby will do the previous/current tracking of the id for you. Also utilise os.path.join to make sure your filenames end up joining correctly.
Untested:
import os
import csv
from itertools import groupby
readpath = 'path-to-read-file'
writepath = 'path-to-write-file'
for filename in os.listdir(readpath):
with open(os.path.join(readpath, filename)) as fin:
tabin = csv.reader(fin, delimiter='\t')
for file_id, rows in groupby(tabin, lambda L: L[1]):
with open(os.path.join(writepath, file_id + '-' + filename), 'w') as fout:
tabout = csv.writer(fout, delimiter='\t')
tabout.writerows(rows)

How to search and replace text in a file?

How do I search and replace text in a file using Python 3?
Here is my code:
import os
import sys
import fileinput
print ("Text to search for:")
textToSearch = input( "> " )
print ("Text to replace it with:")
textToReplace = input( "> " )
print ("File to perform Search-Replace on:")
fileToSearch = input( "> " )
#fileToSearch = 'D:\dummy1.txt'
tempFile = open( fileToSearch, 'r+' )
for line in fileinput.input( fileToSearch ):
if textToSearch in line :
print('Match Found')
else:
print('Match Not Found!!')
tempFile.write( line.replace( textToSearch, textToReplace ) )
tempFile.close()
input( '\n\n Press Enter to exit...' )
Input file:
hi this is abcd hi this is abcd
This is dummy text file.
This is how search and replace works abcd
When I search and replace 'ram' by 'abcd' in above input file, it works as a charm. But when I do it vice-versa i.e. replacing 'abcd' by 'ram', some junk characters are left at the end.
Replacing 'abcd' by 'ram'
hi this is ram hi this is ram
This is dummy text file.
This is how search and replace works rambcd

As pointed out by michaelb958, you cannot replace in place with data of a different length because this will put the rest of the sections out of place. I disagree with the other posters suggesting you read from one file and write to another. Instead, I would read the file into memory, fix the data up, and then write it out to the same file in a separate step.
# Read in the file
with open('file.txt', 'r') as file :
filedata = file.read()
# Replace the target string
filedata = filedata.replace('abcd', 'ram')
# Write the file out again
with open('file.txt', 'w') as file:
file.write(filedata)
Unless you've got a massive file to work with which is too big to load into memory in one go, or you are concerned about potential data loss if the process is interrupted during the second step in which you write data to the file.

fileinput already supports inplace editing. It redirects stdout to the file in this case:
#!/usr/bin/env python3
import fileinput
with fileinput.FileInput(filename, inplace=True, backup='.bak') as file:
for line in file:
print(line.replace(text_to_search, replacement_text), end='')

As Jack Aidley had posted and J.F. Sebastian pointed out, this code will not work:
# Read in the file
filedata = None
with file = open('file.txt', 'r') :
filedata = file.read()
# Replace the target string
filedata.replace('ram', 'abcd')
# Write the file out again
with file = open('file.txt', 'w') :
file.write(filedata)`
But this code WILL work (I've tested it):
f = open(filein,'r')
filedata = f.read()
f.close()
newdata = filedata.replace("old data","new data")
f = open(fileout,'w')
f.write(newdata)
f.close()
Using this method, filein and fileout can be the same file, because Python 3.3 will overwrite the file upon opening for write.

You can do the replacement like this
f1 = open('file1.txt', 'r')
f2 = open('file2.txt', 'w')
for line in f1:
f2.write(line.replace('old_text', 'new_text'))
f1.close()
f2.close()

You can also use pathlib.
from pathlib2 import Path
path = Path(file_to_search)
text = path.read_text()
text = text.replace(text_to_search, replacement_text)
path.write_text(text)

(pip install python-util)
from pyutil import filereplace
filereplace("somefile.txt","abcd","ram")
Will replace all occurences of "abcd" with "ram".
The function also supports regex by specifying regex=True
from pyutil import filereplace
filereplace("somefile.txt","\\w+","ram",regex=True)
Disclaimer: I'm the author (https://github.com/MisterL2/python-util)

Open the file in read mode. Read the file in string format. Replace the text as intended. Close the file. Again open the file in write mode. Finally, write the replaced text to the same file.
try:
with open("file_name", "r+") as text_file:
texts = text_file.read()
texts = texts.replace("to_replace", "replace_string")
with open(file_name, "w") as text_file:
text_file.write(texts)
except FileNotFoundError as f:
print("Could not find the file you are trying to read.")

Late answer, but this is what I use to find and replace inside a text file:
with open("test.txt") as r:
text = r.read().replace("THIS", "THAT")
with open("test.txt", "w") as w:
w.write(text)
DEMO

With a single with block, you can search and replace your text:
with open('file.txt','r+') as f:
filedata = f.read()
filedata = filedata.replace('abc','xyz')
f.truncate(0)
f.write(filedata)

Your problem stems from reading from and writing to the same file. Rather than opening fileToSearch for writing, open an actual temporary file and then after you're done and have closed tempFile, use os.rename to move the new file over fileToSearch.

My variant, one word at a time on the entire file.
I read it into memory.
def replace_word(infile,old_word,new_word):
if not os.path.isfile(infile):
print ("Error on replace_word, not a regular file: "+infile)
sys.exit(1)
f1=open(infile,'r').read()
f2=open(infile,'w')
m=f1.replace(old_word,new_word)
f2.write(m)

Using re.subn it is possible to have more control on the substitution process, such as word splitted over two lines, case-(in)sensitive match. Further, it returns the amount of matches which can be used to avoid waste of resources if the string is not found.
import re
file = # path to file
# they can be also raw string and regex
textToSearch = r'Ha.*O' # here an example with a regex
textToReplace = 'hallo'
# read and replace
with open(file, 'r') as fd:
# sample case-insensitive find-and-replace
text, counter = re.subn(textToSearch, textToReplace, fd.read(), re.I)
# check if there is at least a match
if counter > 0:
# edit the file
with open(file, 'w') as fd:
fd.write(text)
# summary result
print(f'{counter} occurence of "{textToSearch}" were replaced with "{textToReplace}".')
Some regex:
add the re.I flag, short form of re.IGNORECASE, for a case-insensitive match
for multi-line replacement re.subn(r'\n*'.join(textToSearch), textToReplace, fd.read()), depending on the data also '\n{,1}'. Notice that for this case textToSearch must be a pure string, not a regex!

Besides the answers already mentioned, here is an explanation of why you have some random characters at the end:
You are opening the file in r+ mode, not w mode. The key difference is that w mode clears the contents of the file as soon as you open it, whereas r+ doesn't.
This means that if your file content is "123456789" and you write "www" to it, you get "www456789". It overwrites the characters with the new input, but leaves any remaining input untouched.
You can clear a section of the file contents by using truncate(<startPosition>), but you are probably best off saving the updated file content to a string first, then doing truncate(0) and writing it all at once.
Or you can use my library :D

I got the same issue. The problem is that when you load a .txt in a variable you use it like an array of string while it's an array of character.
swapString = []
with open(filepath) as f:
s = f.read()
for each in s:
swapString.append(str(each).replace('this','that'))
s = swapString
print(s)

I tried this and used readlines instead of read
with open('dummy.txt','r') as file:
list = file.readlines()
print(f'before removal {list}')
for i in list[:]:
list.remove(i)
print(f'After removal {list}')
with open('dummy.txt','w+') as f:
for i in list:
f.write(i)

you can use sed or awk or grep in python (with some restrictions). Here is a very simple example. It changes banana to bananatoothpaste in the file. You can edit and use it. ( I tested it worked...note: if you are testing under windows you should install "sed" command and set the path first)
import os
file="a.txt"
oldtext="Banana"
newtext=" BananaToothpaste"
os.system('sed -i "s/{}/{}/g" {}'.format(oldtext,newtext,file))
#print(f'sed -i "s/{oldtext}/{newtext}/g" {file}')
print('This command was applied: sed -i "s/{}/{}/g" {}'.format(oldtext,newtext,file))
if you want to see results on the file directly apply: "type" for windows/ "cat" for linux:
####FOR WINDOWS:
os.popen("type " + file).read()
####FOR LINUX:
os.popen("cat " + file).read()

I have done this:
#!/usr/bin/env python3
import fileinput
import os
Dir = input ("Source directory: ")
os.chdir(Dir)
Filelist = os.listdir()
print('File list: ',Filelist)
NomeFile = input ("Insert file name: ")
CarOr = input ("Text to search: ")
CarNew = input ("New text: ")
with fileinput.FileInput(NomeFile, inplace=True, backup='.bak') as file:
for line in file:
print(line.replace(CarOr, CarNew), end='')
file.close ()

I modified Jayram Singh's post slightly in order to replace every instance of a '!' character to a number which I wanted to increment with each instance. Thought it might be helpful to someone who wanted to modify a character that occurred more than once per line and wanted to iterate. Hope that helps someone. PS- I'm very new at coding so apologies if my post is inappropriate in any way, but this worked for me.
f1 = open('file1.txt', 'r')
f2 = open('file2.txt', 'w')
n = 1
# if word=='!'replace w/ [n] & increment n; else append same word to
# file2
for line in f1:
for word in line:
if word == '!':
f2.write(word.replace('!', f'[{n}]'))
n += 1
else:
f2.write(word)
f1.close()
f2.close()

def word_replace(filename,old,new):
c=0
with open(filename,'r+',encoding ='utf-8') as f:
a=f.read()
b=a.split()
for i in range(0,len(b)):
if b[i]==old:
c=c+1
old=old.center(len(old)+2)
new=new.center(len(new)+2)
d=a.replace(old,new,c)
f.truncate(0)
f.seek(0)
f.write(d)
print('All words have been replaced!!!')

I have worked this out as an exercise of a course: open file, find and replace string and write to a new file.
class Letter:
def __init__(self):
with open("./Input/Names/invited_names.txt", "r") as file:
# read the list of names
list_names = [line.rstrip() for line in file]
with open("./Input/Letters/starting_letter.docx", "r") as f:
# read letter
file_source = f.read()
for name in list_names:
with open(f"./Output/ReadyToSend/LetterTo{name}.docx", "w") as f:
# replace [name] with name of the list in the file
replace_string = file_source.replace('[name]', name)
# write to a new file
f.write(replace_string)
brief = Letter()

Like so:
def find_and_replace(file, word, replacement):
with open(file, 'r+') as f:
text = f.read()
f.write(text.replace(word, replacement))

def findReplace(find, replace):
import os
src = os.path.join(os.getcwd(), os.pardir)
for path, dirs, files in os.walk(os.path.abspath(src)):
for name in files:
if name.endswith('.py'):
filepath = os.path.join(path, name)
with open(filepath) as f:
s = f.read()
s = s.replace(find, replace)
with open(filepath, "w") as f:
f.write(s)

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Python: Issue when trying to read and write multiple files - python

After your for loop is over, you need to include whatever is left after the last match: ... i = m.end() output += source_content[i:]) # Here's the end of your file outfile = open(filepath, 'w') ...

Related

How to replace two different lines of text?

How to add for loop in python?

Search string and replace value

How to split text file by id in python

How to search and replace text in a file?

Categories

Resources