python equivalent to sed

python equivalent to sed - python

Is there a way, without a double loop to accomplish what the following sed command does
Input:
Time
Banana
spinach
turkey
sed -i "/Banana/ s/$/Toothpaste/" file
Output:
Time
BananaToothpaste
spinach
turkey
What I have so far is a double list which would take a long time to go through both.
List a has a bunch of numbers
list b has a the same bunch of numbers but in a different order
For each entry in A i want to find the line in B with that same number and add value C to the end of it.
Hope this makes sense, even if my example doesn't.
I was doing the following in Bash and it was working however it was super slow...
for line in $(cat DATSRCLN.txt.utf8); do
srch=$(echo $line | awk -F'^' '{print $1}');
rep=$(echo $line | awk -F'^' '{print $2}');
sed -i "/$(echo $srch)/ s/$/^$(echo $rep)/" tmp.1;
done
Thanks!

Using re.sub():
newstring = re.sub('(Banana)', r'\1Toothpaste', oldstring)
This catches one group (between first parentheses), and replaces it by ITSELF (the \number part) followed by a desired suffix. It is needed to use r'' (raw string) so that the escape is correctly interpreted.

A late comer to the race, here is my implementation for sed in Python:
import re
import shutil
from tempfile import mkstemp
def sed(pattern, replace, source, dest=None, count=0):
"""Reads a source file and writes the destination file.
In each line, replaces pattern with replace.
Args:
pattern (str): pattern to match (can be re.pattern)
replace (str): replacement str
source (str): input filename
count (int): number of occurrences to replace
dest (str): destination filename, if not given, source will be over written.
"""
fin = open(source, 'r')
num_replaced = count
if dest:
fout = open(dest, 'w')
else:
fd, name = mkstemp()
fout = open(name, 'w')
for line in fin:
out = re.sub(pattern, replace, line)
fout.write(out)
if out != line:
num_replaced += 1
if count and num_replaced > count:
break
try:
fout.writelines(fin.readlines())
except Exception as E:
raise E
fin.close()
fout.close()
if not dest:
shutil.move(name, source)
examples:
sed('foo', 'bar', "foo.txt")
will replace all 'foo' with 'bar' in foo.txt
sed('foo', 'bar', "foo.txt", "foo.updated.txt")
will replace all 'foo' with 'bar' in 'foo.txt' and save the result in "foo.updated.txt".
sed('foo', 'bar', "foo.txt", count=1)
will replace only the first occurrence of 'foo' with 'bar' and save the result in the original file 'foo.txt'

If you are using Python3 the following module will help you:
https://github.com/mahmoudadel2/pysed
wget https://raw.githubusercontent.com/mahmoudadel2/pysed/master/pysed.py
Place the module file into your Python3 modules path, then:
import pysed
pysed.replace(<Old string>, <Replacement String>, <Text File>)
pysed.rmlinematch(<Unwanted string>, <Text File>)
pysed.rmlinenumber(<Unwanted Line Number>, <Text File>)

You can actually call sed from python. Many ways to do this but I like to use the sh module. (yum -y install python-sh)
The output of my example program is a follows.
[me#localhost sh]$ cat input
Time
Banana
spinich
turkey
[me#localhost sh]$ python test_sh.py
[me#localhost sh]$ cat input
Time
Toothpaste
spinich
turkey
[me#localhost sh]$
Here is test_sh.py
import sh
sh.sed('-i', 's/Banana/Toothpaste/', 'input')
This will probably only work under LINUX.

It's possible to do this using tmp file with low system requirements and only one iteration without copying whole file into the memory:
#/usr/bin/python
import tempfile
import shutil
import os
newfile = tempfile.mkdtemp()
oldfile = 'stack.txt'
f = open(oldfile)
n = open(newfile,'w')
for i in f:
if i.find('Banana') == -1:
n.write(i)
continue
# Last row
if i.find('\n') == -1:
i += 'ToothPaste'
else:
i = i.rstrip('\n')
i += 'ToothPaste\n'
n.write(i)
f.close()
n.close()
os.remove(oldfile)
shutil.move(newfile,oldfile)

I found the answer supplied by Oz123 to be great, but didn't seem to work 100%. I'm new to python, but modded it and wrapped it up to run in a bash script. This works on osx, using python 2.7.
# Replace 1 occurrence in file /tmp/1
$ search_replace "Banana" "BananaToothpaste" /tmp/1
# Replace 5 occurrences and save in /tmp/2
$ search_replace "Banana" "BananaToothpaste" /tmp/1 /tmp/2 5
search_replace
#!/usr/bin/env python
import sys
import re
import shutil
from tempfile import mkstemp
total = len(sys.argv)-1
cmdargs = str(sys.argv)
if (total < 3):
print ("Usage: SEARCH_FOR REPLACE_WITH IN_FILE {OUT_FILE} {COUNT}")
print ("by default, the input file is replaced")
print ("and the number of times to replace is 1")
sys.exit(1)
# Parsing args one by one
search_for = str(sys.argv[1])
replace_with = str(sys.argv[2])
file_name = str(sys.argv[3])
if (total < 4):
file_name_dest=file_name
else:
file_name_dest = str(sys.argv[4])
if (total < 5):
count = 1
else:
count = int(sys.argv[5])
def sed(pattern, replace, source, dest=None, count=0):
"""Reads a source file and writes the destination file.
In each line, replaces pattern with replace.
Args:
pattern (str): pattern to match (can be re.pattern)
replace (str): replacement str
source (str): input filename
count (int): number of occurrences to replace
dest (str): destination filename, if not given, source will be over written.
"""
fin = open(source, 'r')
num_replaced = 0
fd, name = mkstemp()
fout = open(name, 'w')
for line in fin:
if count and num_replaced < count:
out = re.sub(pattern, replace, line)
fout.write(out)
if out != line:
num_replaced += 1
else:
fout.write(line)
fin.close()
fout.close()
if file_name == file_name_dest:
shutil.move(name, file_name)
else:
shutil.move(name, file_name_dest)
sed(search_for, replace_with, file_name, file_name_dest, count)

With thanks to Oz123 above, here is sed which is not line by line so your replacement can span newlines. Larger files could be a problem.
import re
import shutil
from tempfile import mkstemp
def sed(pattern, replace, source, dest=None):
"""Reads a source file and writes the destination file.
Replaces pattern with replace globally through the file.
This is not line-by-line so the pattern can span newlines.
Args:
pattern (str): pattern to match (can be re.pattern)
replace (str): replacement str
source (str): input filename
dest (str): destination filename, if not given, source will be over written.
"""
if dest:
fout = open(dest, 'w')
else:
fd, name = mkstemp()
fout = open(name, 'w')
with open(source, 'r') as file:
data = file.read()
p = re.compile(pattern)
new_data = p.sub(replace, data)
fout.write(new_data)
fout.close()
if not dest:
shutil.move(name, source)

massedit
you could use it as a command line tool:
# Will change all test*.py in subdirectories of tests.
massedit.py -e "re.sub('failIf', 'assertFalse', line)" -s tests test*.py
you also could use it as a library:
import massedit
filenames = ['massedit.py']
massedit.edit_files(filenames, ["re.sub('Jerome', 'J.', line)"])

you can use sed or awk or grep in python (with some restrictions). Here is a very simple example. It changes banana to bananatoothpaste in the file. You can edit and use it. ( I tested it worked...note: if you are testing under windows you should install "sed" command and set the path first)
import os
file="a.txt"
oldtext="Banana"
newtext=" BananaToothpaste"
os.system('sed -i "s/{}/{}/g" {}'.format(oldtext,newtext,file))
#print(f'sed -i "s/{oldtext}/{newtext}/g" {file}')
print('This command was applied: sed -i "s/{}/{}/g" {}'.format(oldtext,newtext,file))
if you want to see results on the file directly apply: "type" for windows/ "cat" for linux:
####FOR WINDOWS:
os.popen("type " + file).read()
####FOR LINUX:
os.popen("cat " + file).read()

Related

Use Python Regex to search files and return filename

Please help.
I'm searching several .txt files, in several directories for a pattern. If there is a match, I would like to print the filename and location of the match.
Here is my code:
a = ('Z:/rodney/020year/2020-04/')
b = []
for y in os.listdir(a):
b.append(a+y+'/')
for filename in b:
path = filename
for filenames in listdir(path):
with open(path+filenames) as currentfile:
text = currentfile.read()
loan = re.compile(r'2 NNN \d LOANS')
bb = loan.search(text)
with open('z:/rodney/results.txt','a') as f:
f.write(os.path.dirname(path)+' ')
f.write(filenames[:-4]+'\n')
f.write(bb)
Error message = "TypeError: write() argument must be str, not None"
If there is a match, I would like to see only the filename and location of a match. I do not need to see "None" in every file where there is no match.

You have:
bb = loan.search(text)
But if the string you are looking for is not found in text, bb will ne None and consequently f.write(bb) will raise an exception (you did not indicate which line of code was raising the exception, so this is an educated guess).
You need to modify your code to be:
bb = loan.search(text)
if bb:
with open('z:/rodney/results.txt','a') as f:
f.write(os.path.dirname(path)+' ')
f.write(filenames[:-4]+'\n')
As an aside:
You have the statement loan = re.compile(r'2 NNN \d LOANS') in a loop. There is no need for that to be in a loop since it is invariant.

You can avoid using string slicing and bunch of functions to parse file path by using pathlib, where most of needed cases are already implemented. Also you can optimize your code by moving re.compile() out of loop (create once and use). Same with writing result - you don't need to reopen file every time, just open it once before loop start.
Optimized code:
from pathlib import Path
import re
src_dir = Path(r"Z:\rodney\020year\2020-04")
res_fn = r"z:\rodney\results.txt"
with open(res_fn, "w+") as res_f:
search_re = re.compile(r"2\sN{3}\s{28}\d\sLOANS")
for directory in src_dir.iterdir():
if directory.is_dir():
for file in directory.iterdir():
if file.is_file():
with open(file) as of:
bb = search_re.search(of.read())
if bb:
print(file.parent, file.stem, file=res_f)
print(bb.group(), file=res_f)
# res_f.write(file.parent + " " + file.stem + "\n" + bb.group())

Based on your source code, I optimized it.
I use os.walk to access each .txt file and then read it line by line in those txt files and save it in an enum. Then I will check each line in that enum with regex (I referenced Olvin Roght-san). If there is a match, it will print out the exact file location and line for you.
import os
import re
extension = [".txt"]
regex = r"2\sN{3}\s{28}\d\sLOANS"
re_Search = re.compile(regex)
path = "Z:\rodney\020year\2020-04"
for subdir, dirs, files in os.walk(path):
for file in files:
file_path = os.path.join(subdir, file)
ext = os.path.splitext(file)[-1].lower()
if ext in extension:
with open(file_path, "r") as f:
try:
f_content = f.readlines()
except Exception as e:
print(e)
for l_idx, line in enumerate(f_content):
if re_Search.search(line):
print(file_path)
print("At line: {l_idx}".format(l_idx = l_idx+1))
else:
print("Nothing!!")

Running python mapreduce for multiple files

I am trying to implement python mapreduce for multiple files in directory, so that it will take folder and string as an argument and list files with the frequency of that string within that files. The output should be like that:
Filename Output
-------- --------------
x.txt 8
y.txt 12
I have tried to implement it but when I run it with command below:
cat /home/habil/Downloads/hadoop_test/*.txt | python mapper.py "AA" | python reducer.py
It gives me "AA 479" which are the frequency in all 5 files
This is my mapper.py
#!/usr/bin/env python
import sys
import textwrap
from os import listdir
from os.path import isfile, join
#Argument of the path
#folderPath = sys.argv[2]
#onlyfiles = [f for f in listdir(sys.argv[2]) if isfile(join(sys.argv[2], f))]
# Get the string sequence from the user
searchWord = sys.argv[1]
# Length of the word
searchWordLength = len(sys.argv[1])
# helper Function
def locations_of_substring(string, substring):
"""Return a list of locations of a substring."""
substring_length = len(substring)
def recurse(locations_found, start):
location = string.find(substring, start)
if location != -1:
return recurse(locations_found + [location], location+substring_length)
else:
return locations_found
return recurse([], 0)
#--- get all lines from stdin ---
for line in sys.stdin:
#--- remove leading and trailing whitespace---
line = line.strip()
temp = locations_of_substring(line, searchWord)
if len(temp) != 0:
for count in temp:
print '%s\t%s' % (line[count:count+searchWordLength], "1")
And below is my reducer:
#!/usr/bin/env python
import sys
# maps words to their counts
word2count = {}
# input comes from STDIN
for line in sys.stdin:
# remove leading and trailing whitespace
line = line.strip()
# parse the input we got from mapper.py
word, count = line.split('\t', 1)
# convert count (currently a string) to int
try:
count = int(count)
except ValueError:
continue
try:
word2count[word] = word2count[word]+count
except:
word2count[word] = count
# write the tuples to stdout
# Note: they are unsorted
for word in word2count.keys():
print '%s\t%s'% ( word, word2count[word])
How can I get the desired result, so that it will run for each file in the directory, once and print seperate results. Any help or hint is appreciated. Thanks in advance.

How to assign the elements of a list as file names in python?

I am trying to assign the elements of a list as names for some files that live in a directory, so far I created a function that recover the name of a each file from a directory and returns them in a list:
def retrive(directory_path):
path_names = []
for filename in sorted(glob.glob(os.path.join(directory_path, '*.pdf'))):
retrieved_files = filename.split('/')[-1]
path_names.append(retrieved_files)
print (path_names)
The above function returns in a list the names of each file, then I am writing the files into another directory as follows:
path = os.path.join(new_dir_path, "list%d.txt" % i)
#This is the path of each new file:
#print(path)
with codecs.open(path, "w", encoding='utf8') as filename:
for item in [a_list]:
filename.write(item+"\n")
Finally, my question is: how can I assign as a name of each file, each element of path_names?, something like this line:
path = os.path.join(new_dir_path, "list%d.txt" % i)
I also tried to use the format() function. However I still cant assign the the correct name to each file.
Here's the full script:
def transform_directoy(input_directory, output_directory):
import codecs, glob, os
from tika import parser
all_texts = []
for filename in sorted(glob.glob(os.path.join(input_directory, '*.pdf'))):
parsed = parser.from_file(filename)
texts = parsed['content']
all_texts.append(texts)
for i , a_list in enumerate(all_texts):
new_dir_path = output_directory
#print(new_dir_path)
path = os.path.join(new_dir_path, "list%d.txt" % i)
with codecs.open(path, "w", encoding='utf8') as filename:
for item in [a_list]:
filename.write(item+"\n")
The desired output will consist of the actual names of each processed file.

You’re almost there:
for path_name in path_names:
path = os.path.join(new_dir_path, "list%s.txt" % path_name)
#This is the path of each new file:
#print(path)
with codecs.open(path, "w", encoding='utf8') as f:
for item in [a_list]:
f.write(item+"\n")
Update based on updated code sample. You are using different loops here, and that is not ideal unless you are doing processing in between the two loops. Since I am going to keep that structure, we are going to have to make sure to associate each block of content with the original filename. The best structure for that is a dict, and in case order is important, we use an OrderedDict. Now, when we’re looping over the filename, content pairs in the OrderedDict we’ll want to change the extension of the file to match the new file type. Luckily, python has some nice utilities for file/path manipulation in the os.path module. os.path.basename can be used to strip off the directory from a file and os.path.splitext will strip off an extension from a filename. We use both of those to get just the filename without the extension and then append .txt to designate the new file type. Putting it all together, we get :
def transform_directoy(input_directory, output_directory):
import codecs, glob, os
from collections import OrderedDict
from tika import parser
all_texts = OrderedDict()
for filename in sorted(glob.glob(os.path.join(input_directory, '*.pdf'))):
parsed = parser.from_file(filename)
filename = os.path.basename(filename)
texts = parsed['content']
all_texts[filename] = texts
for i, (original_filename, a_list) in enumerate(all_texts.items()):
new_filename, _ = os.path.splitext(original_filename)
new_filename += '.txt'
new_dir_path = output_directory
#print(new_dir_path)
path = os.path.join(new_dir_path, new_filename)
# Print out the name of the file we are processing
print('Transforming %s => %s' % (original_filename, path,))
with codecs.open(path, "w", encoding='utf8') as filename:
for item in [a_list]:
filename.write(item+"\n")
Second update: OP asked how I would write this code if this was all that there was, so here goes:
# move imports to top of file: PEP 8
import codecs, glob, os
from tika import parser
def transform_directoy(input_directory, output_directory):
for filename in sorted(glob.glob(os.path.join(input_directory, '*.pdf'))):
parsed = parser.from_file(filename)
parsed_content = parsed['content']
original_filename = os.path.basename(filename)
new_filename, _ = os.path.splitext(original_filename)
new_filename += '.txt'
path = os.path.join(output_directory, new_filename)
# Print out the name of the file we are processing
print('Transforming %s => %s' % (original_filename, path,))
# no need for a second loop since we can piggy back off the first loop
with codecs.open(path, "w", encoding='utf8') as filename:
# No need for a for loop here since our list only has one item
filename.write(parsed_content)
filename.write("\n")

Python: Issue when trying to read and write multiple files

This script reads and writes all the individual html files in a directory. The script reiterates, highlight and write the output.The issue is, after highlighting the last instance of the search item, the script removes all the remaining contents after the last search instance in the output of each file. Any help here is appreciated.
import os
import sys
import re
source = raw_input("Enter the source files path:")
listfiles = os.listdir(source)
for f in listfiles:
filepath = os.path.join(source+'\\'+f)
infile = open(filepath, 'r+')
source_content = infile.read()
color = ('red')
regex = re.compile(r"(\b in \b)|(\b be \b)|(\b by \b)|(\b user \b)|(\bmay\b)|(\bmight\b)|(\bwill\b)|(\b's\b)|(\bdon't\b)|(\bdoesn't\b)|(\bwon't\b)|(\bsupport\b)|(\bcan't\b)|(\bkill\b)|(\betc\b)|(\b NA \b)|(\bfollow\b)|(\bhang\b)|(\bbelow\b)", re.I)
i = 0; output = ""
for m in regex.finditer(source_content):
output += "".join([source_content[i:m.start()],
"<strong><span style='color:%s'>" % color[0:],
source_content[m.start():m.end()],
"</span></strong>"])
i = m.end()
outfile = open(filepath, 'w')
outfile.seek(0, 2)
outfile.write(output)
print "\nProcess Completed!\n"
infile.close()
outfile.close()
raw_input()

After your for loop is over, you need to include whatever is left after the last match:
...
i = m.end()
output += source_content[i:]) # Here's the end of your file
outfile = open(filepath, 'w')
...

How to search and replace text in a file?

How do I search and replace text in a file using Python 3?
Here is my code:
import os
import sys
import fileinput
print ("Text to search for:")
textToSearch = input( "> " )
print ("Text to replace it with:")
textToReplace = input( "> " )
print ("File to perform Search-Replace on:")
fileToSearch = input( "> " )
#fileToSearch = 'D:\dummy1.txt'
tempFile = open( fileToSearch, 'r+' )
for line in fileinput.input( fileToSearch ):
if textToSearch in line :
print('Match Found')
else:
print('Match Not Found!!')
tempFile.write( line.replace( textToSearch, textToReplace ) )
tempFile.close()
input( '\n\n Press Enter to exit...' )
Input file:
hi this is abcd hi this is abcd
This is dummy text file.
This is how search and replace works abcd
When I search and replace 'ram' by 'abcd' in above input file, it works as a charm. But when I do it vice-versa i.e. replacing 'abcd' by 'ram', some junk characters are left at the end.
Replacing 'abcd' by 'ram'
hi this is ram hi this is ram
This is dummy text file.
This is how search and replace works rambcd

As pointed out by michaelb958, you cannot replace in place with data of a different length because this will put the rest of the sections out of place. I disagree with the other posters suggesting you read from one file and write to another. Instead, I would read the file into memory, fix the data up, and then write it out to the same file in a separate step.
# Read in the file
with open('file.txt', 'r') as file :
filedata = file.read()
# Replace the target string
filedata = filedata.replace('abcd', 'ram')
# Write the file out again
with open('file.txt', 'w') as file:
file.write(filedata)
Unless you've got a massive file to work with which is too big to load into memory in one go, or you are concerned about potential data loss if the process is interrupted during the second step in which you write data to the file.

fileinput already supports inplace editing. It redirects stdout to the file in this case:
#!/usr/bin/env python3
import fileinput
with fileinput.FileInput(filename, inplace=True, backup='.bak') as file:
for line in file:
print(line.replace(text_to_search, replacement_text), end='')

As Jack Aidley had posted and J.F. Sebastian pointed out, this code will not work:
# Read in the file
filedata = None
with file = open('file.txt', 'r') :
filedata = file.read()
# Replace the target string
filedata.replace('ram', 'abcd')
# Write the file out again
with file = open('file.txt', 'w') :
file.write(filedata)`
But this code WILL work (I've tested it):
f = open(filein,'r')
filedata = f.read()
f.close()
newdata = filedata.replace("old data","new data")
f = open(fileout,'w')
f.write(newdata)
f.close()
Using this method, filein and fileout can be the same file, because Python 3.3 will overwrite the file upon opening for write.

You can do the replacement like this
f1 = open('file1.txt', 'r')
f2 = open('file2.txt', 'w')
for line in f1:
f2.write(line.replace('old_text', 'new_text'))
f1.close()
f2.close()

You can also use pathlib.
from pathlib2 import Path
path = Path(file_to_search)
text = path.read_text()
text = text.replace(text_to_search, replacement_text)
path.write_text(text)

(pip install python-util)
from pyutil import filereplace
filereplace("somefile.txt","abcd","ram")
Will replace all occurences of "abcd" with "ram".
The function also supports regex by specifying regex=True
from pyutil import filereplace
filereplace("somefile.txt","\\w+","ram",regex=True)
Disclaimer: I'm the author (https://github.com/MisterL2/python-util)

Open the file in read mode. Read the file in string format. Replace the text as intended. Close the file. Again open the file in write mode. Finally, write the replaced text to the same file.
try:
with open("file_name", "r+") as text_file:
texts = text_file.read()
texts = texts.replace("to_replace", "replace_string")
with open(file_name, "w") as text_file:
text_file.write(texts)
except FileNotFoundError as f:
print("Could not find the file you are trying to read.")

Late answer, but this is what I use to find and replace inside a text file:
with open("test.txt") as r:
text = r.read().replace("THIS", "THAT")
with open("test.txt", "w") as w:
w.write(text)
DEMO

With a single with block, you can search and replace your text:
with open('file.txt','r+') as f:
filedata = f.read()
filedata = filedata.replace('abc','xyz')
f.truncate(0)
f.write(filedata)

Your problem stems from reading from and writing to the same file. Rather than opening fileToSearch for writing, open an actual temporary file and then after you're done and have closed tempFile, use os.rename to move the new file over fileToSearch.

My variant, one word at a time on the entire file.
I read it into memory.
def replace_word(infile,old_word,new_word):
if not os.path.isfile(infile):
print ("Error on replace_word, not a regular file: "+infile)
sys.exit(1)
f1=open(infile,'r').read()
f2=open(infile,'w')
m=f1.replace(old_word,new_word)
f2.write(m)

Using re.subn it is possible to have more control on the substitution process, such as word splitted over two lines, case-(in)sensitive match. Further, it returns the amount of matches which can be used to avoid waste of resources if the string is not found.
import re
file = # path to file
# they can be also raw string and regex
textToSearch = r'Ha.*O' # here an example with a regex
textToReplace = 'hallo'
# read and replace
with open(file, 'r') as fd:
# sample case-insensitive find-and-replace
text, counter = re.subn(textToSearch, textToReplace, fd.read(), re.I)
# check if there is at least a match
if counter > 0:
# edit the file
with open(file, 'w') as fd:
fd.write(text)
# summary result
print(f'{counter} occurence of "{textToSearch}" were replaced with "{textToReplace}".')
Some regex:
add the re.I flag, short form of re.IGNORECASE, for a case-insensitive match
for multi-line replacement re.subn(r'\n*'.join(textToSearch), textToReplace, fd.read()), depending on the data also '\n{,1}'. Notice that for this case textToSearch must be a pure string, not a regex!

Besides the answers already mentioned, here is an explanation of why you have some random characters at the end:
You are opening the file in r+ mode, not w mode. The key difference is that w mode clears the contents of the file as soon as you open it, whereas r+ doesn't.
This means that if your file content is "123456789" and you write "www" to it, you get "www456789". It overwrites the characters with the new input, but leaves any remaining input untouched.
You can clear a section of the file contents by using truncate(<startPosition>), but you are probably best off saving the updated file content to a string first, then doing truncate(0) and writing it all at once.
Or you can use my library :D

I got the same issue. The problem is that when you load a .txt in a variable you use it like an array of string while it's an array of character.
swapString = []
with open(filepath) as f:
s = f.read()
for each in s:
swapString.append(str(each).replace('this','that'))
s = swapString
print(s)

I tried this and used readlines instead of read
with open('dummy.txt','r') as file:
list = file.readlines()
print(f'before removal {list}')
for i in list[:]:
list.remove(i)
print(f'After removal {list}')
with open('dummy.txt','w+') as f:
for i in list:
f.write(i)

you can use sed or awk or grep in python (with some restrictions). Here is a very simple example. It changes banana to bananatoothpaste in the file. You can edit and use it. ( I tested it worked...note: if you are testing under windows you should install "sed" command and set the path first)
import os
file="a.txt"
oldtext="Banana"
newtext=" BananaToothpaste"
os.system('sed -i "s/{}/{}/g" {}'.format(oldtext,newtext,file))
#print(f'sed -i "s/{oldtext}/{newtext}/g" {file}')
print('This command was applied: sed -i "s/{}/{}/g" {}'.format(oldtext,newtext,file))
if you want to see results on the file directly apply: "type" for windows/ "cat" for linux:
####FOR WINDOWS:
os.popen("type " + file).read()
####FOR LINUX:
os.popen("cat " + file).read()

I have done this:
#!/usr/bin/env python3
import fileinput
import os
Dir = input ("Source directory: ")
os.chdir(Dir)
Filelist = os.listdir()
print('File list: ',Filelist)
NomeFile = input ("Insert file name: ")
CarOr = input ("Text to search: ")
CarNew = input ("New text: ")
with fileinput.FileInput(NomeFile, inplace=True, backup='.bak') as file:
for line in file:
print(line.replace(CarOr, CarNew), end='')
file.close ()

I modified Jayram Singh's post slightly in order to replace every instance of a '!' character to a number which I wanted to increment with each instance. Thought it might be helpful to someone who wanted to modify a character that occurred more than once per line and wanted to iterate. Hope that helps someone. PS- I'm very new at coding so apologies if my post is inappropriate in any way, but this worked for me.
f1 = open('file1.txt', 'r')
f2 = open('file2.txt', 'w')
n = 1
# if word=='!'replace w/ [n] & increment n; else append same word to
# file2
for line in f1:
for word in line:
if word == '!':
f2.write(word.replace('!', f'[{n}]'))
n += 1
else:
f2.write(word)
f1.close()
f2.close()

def word_replace(filename,old,new):
c=0
with open(filename,'r+',encoding ='utf-8') as f:
a=f.read()
b=a.split()
for i in range(0,len(b)):
if b[i]==old:
c=c+1
old=old.center(len(old)+2)
new=new.center(len(new)+2)
d=a.replace(old,new,c)
f.truncate(0)
f.seek(0)
f.write(d)
print('All words have been replaced!!!')

I have worked this out as an exercise of a course: open file, find and replace string and write to a new file.
class Letter:
def __init__(self):
with open("./Input/Names/invited_names.txt", "r") as file:
# read the list of names
list_names = [line.rstrip() for line in file]
with open("./Input/Letters/starting_letter.docx", "r") as f:
# read letter
file_source = f.read()
for name in list_names:
with open(f"./Output/ReadyToSend/LetterTo{name}.docx", "w") as f:
# replace [name] with name of the list in the file
replace_string = file_source.replace('[name]', name)
# write to a new file
f.write(replace_string)
brief = Letter()

Like so:
def find_and_replace(file, word, replacement):
with open(file, 'r+') as f:
text = f.read()
f.write(text.replace(word, replacement))

def findReplace(find, replace):
import os
src = os.path.join(os.getcwd(), os.pardir)
for path, dirs, files in os.walk(os.path.abspath(src)):
for name in files:
if name.endswith('.py'):
filepath = os.path.join(path, name)
with open(filepath) as f:
s = f.read()
s = s.replace(find, replace)
with open(filepath, "w") as f:
f.write(s)

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

python equivalent to sed - python

Using re.sub(): newstring = re.sub('(Banana)', r'\1Toothpaste', oldstring) This catches one group (between first parentheses), and replaces it by ITSELF (the \number part) followed by a desired suffix. It is needed to use r'' (raw string) so that the escape is correctly interpreted.

Related

Use Python Regex to search files and return filename

Running python mapreduce for multiple files

How to assign the elements of a list as file names in python?

Python: Issue when trying to read and write multiple files

How to search and replace text in a file?

Categories

Resources