How to add quotes in the file - python

I have file below
123,PEN BOOK
124,BALL
125,BOOK
126,PENCIL BOOK
I need to add quotes
Expected out
"123","PEN BOOK"
"124","BALL"
"125","BOOK"
"126","PENCIL BOOK"

Assume you have a file test.txt with the following content:
123,PEN
124,BALL
125,BOOK
126,PENCIL
You can use a code like the following, to create a temp file with the content with quotes and replace the original file:
import os
with open("test.txt") as i: # open file for reading, i = input file
with open("temp.txt", "w") as o: # open temp file in write mode, o = output
for l in i: # read each line
o.write('"{}","{}"\n'.format(l.split(',')[0],l.split(',')[1].split('\n')[0]))
os.remove('test.txt') # remove the old file
os.rename('temp.txt','test.txt') # resave the temp file as the new file
Output:
"123","PEN"
"124","BALL"
"125","BOOK"
"126","PENCIL"

I've updated my answer to cover additional case of text containing spaces.
Seeing as you have a regex tag in your question, you can use something like this:
import re
text = """123,PEN
124,BALL
125,BOOK
126,PENCIL
123,PEN BOOK"""
new_text = re.sub(r'(\d+),([\w\s]+)$', r'"\1","\2"', text, flags=re.M)

Related

Reading a text file that contains Japanese sentences

I am working with a Japanese dataset. I have a text file that contains Japanese sentences.
I wrote the following program to read the lines:
import os
wikiCreated_senteces_dir = os.path.join(os.getcwd(), "sentences_jp_spaceSeperated_20210622_part1_notBeautiful.txt")
wikiCreated_senteces_dir_corrected_name = "sentences_jp_spaceSeperated_20210622_part1_Beautiful.txt"
all_sentences = []
cnt = 0
with open(wikiCreated_senteces_dir, "r", encoding='utf8') as fp:
print("success")
for line in fp:
print(line)
And the output is:
success []
My text file looks like this:
How should I change my program to be able to read it?
Here is the link to my txt file
problem solved. I just had to write
"./sentences_jp_spaceSeperated_20210622_part1_notBeautiful.txt"
instead of
"sentences_jp_spaceSeperated_20210622_part1_notBeautiful.txt
"

Python: file write and iterating re.sub from dict only writes last occurrence

I'm unable to figure out how to write and save all re.sub iteration from a dict. Only the last occurrence is saved to the file. I have a translation-worksheet.csv formatted as:
locale, lang, Foo-v2, Bar-v2
de_DE, German, German-Foo-v2, German-Bar-v2
zh_CN, Chinese, 零件-Foo-v2, 零件-Bar-v2
There's a folder with a file for each language: target/de_DE_v123.xml
The contents of a file:
<trans-unit id="14_de_DE" resname="documentGroup.translation">
<source xml:lang="en-GB">Foo-v2</source>
<target xml:lang="de-DE">German-Foo-v1</target>
</trans-unit>
<trans-unit id="1759_de_DE" resname="documentGroup.translation">
<source xml:lang="en-GB">Bar-v2</source>
<target xml:lang="de-DE">German-Bar-v1</target>
</trans-unit>
The goal is to go into each translation file and update all target text. Regex must be used because the target translation text must be over-written regardless of what's currently.
import glob
import pandas as pd
import re
data = pd.read_csv('translate-worksheet.csv', sep=',', header=0)
englishTranslation = data.columns[2:] #get English text
for k, v in data.iterrows():
locale = v[0]
docGroup = v[2:]
findnreplace = dict(zip(englishTranslation,docGroup)) #{english source: translated target}
print("Working on language:"+locale)
for propFile in glob.glob('target\\*'+locale+'*.xml'):
print(" xliff file:"+propFile)
with open(propFile, 'r+', encoding='utf-8') as f:
content = f.read()
for source, target in findnreplace.items():
print(" Replacing:"+source+", with:"+target)
match = re.sub(r'(?<='+source+'<\/source>)[\r\n]+([^\r\n]+)\>(.*?)\<',r"\1"+">"+target+"<", content,flags=re.MULTILINE)
f.seek(0)
f.write(match)
print(match)
Expected output:
<trans-unit id="14_de_DE" resname="documentGroup.translation">
<source xml:lang="en-GB">Foo-v2</source>
<target xml:lang="de-DE">German-Foo-v2</target>
</trans-unit>
<trans-unit id="1759_de_DE" resname="documentGroup.translation">
<source xml:lang="en-GB">Bar-v2</source>
<target xml:lang="de-DE">German-Bar-v2</target>
</trans-unit>
Actual output:
<trans-unit id="14_de_DE" resname="documentGroup.translation">
<source xml:lang="en-GB">Foo-v2</source>
<target xml:lang="de-DE">German-Foo-v1</target>
</trans-unit>
<trans-unit id="1759_de_DE" resname="documentGroup.translation">
<source xml:lang="en-GB">Bar-v2</source> <target xml:lang="de-DE">German-Bar-v2</target>
</trans-unit>
I'm new to Python and welcome all critiquing to improve the code overall.
UPDATE with solution:
This may probably very inefficient code because it opens the file, modifies it, and closes it each time, but it works and my files are only 15kb each. I changed it from "open the file and for every source and target in the dict, do something" to "for every source and target in the dict, open the file and do something.
for propFile in glob.glob('target\\*'+locale+'*.xml'):
print(" xliff file:"+propFile)
for source, target in findnreplace.items():
with open(propFile, 'r+', encoding='utf-8') as f:
content = f.read()
f.seek(0)
print(" Replacing:"+source+", with:"+target)
match = re.sub(r'(?<='+source+'<\/source>)[\r\n]+([^\r\n]+)\>(.*?)\<',r"\1"+">"+target+"<", content,flags=re.MULTILINE)
f.write(match)
f.truncate()
print(match)
Based on your code, it looks like you want to replace a block of text within an existing text file using regex. For this the basic logic is:
Find the text you want to replace
Store the existing file text before this text
Store the existing file text after this text
Create the replacement text to be used in the updated file
Rewrite the file with the 'before' text, the replacement text, and the 'after' text
Without your actual data, I can't confirm this updated code works, but it should be close:
for source, target in findnreplace.items():
print(" Replacing:"+source+", with:"+target)
# find start\end index of text to be replaced
srch = re.search(r'(?<='+source+'<\/source>)[\r\n]+([^\r\n]+)\>(.*?)\<',r"\1"+">"+target+"<", content,flags=re.MULTILINE)
startidx, endidx = .span() # position of text within file
# get replacement text
match = re.sub(r'(?<='+source+'<\/source>)[\r\n]+([^\r\n]+)\>(.*?)\<',r"\1"+">"+target+"<", content,flags=re.MULTILINE)
f.seek(0) # from file start
preblk = f.read(startidx) # all text before replace block
f.seek(endidx) # end of replace block
postblk = f.read(endidx) # all text after replace block
f.seek(0) # restart from beginning
f.truncate(0) # clear file contents
f.write(preblk)
f.write(match)
f.write(postblk)
print(match)
This may probably very inefficient code because it opens the file, modifies it, and closes it each time, but it works and my files are only 15kb each. I changed it from "open the file and for every source and target in the dict, do something" to "for every source and target in the dic, open the file and do something.
for propFile in glob.glob('target\\*'+locale+'*.xml'):
print(" xliff file:"+propFile)
for source, target in findnreplace.items():
with open(propFile, 'r+', encoding='utf-8') as f:
content = f.read()
f.seek(0)
print(" Replacing:"+source+", with:"+target)
match = re.sub(r'(?<='+source+'<\/source>)[\r\n]+([^\r\n]+)\>(.*?)\<',r"\1"+">"+target+"<", content,flags=re.MULTILINE)
f.write(match)
f.truncate()
print(match)

How to modify specific line in sql file?

I am trying to modify a line in sql file in a directory.
Currently have written the code which navigates to files directory according to user input .
each sql file in that dir has these 2 lines:
--liquibase formatted sql
--changeset Jack:1 runOnChange:true splitStatements:false stripComments:false
I am trying to do is loop through all files and I want to change-set every-time the script runs.
so those 2 lines will look like this :
--liquibase formatted sql
--changeset Ryan:2 runOnChange:true splitStatements:false stripComments:false
The part I want to change in line is constant but the content is different for each file like in other file it will be Jie:6 I want to replace that with Priyal:7. So the name part is the person who is running the script and the number after : is incremented
Is there a cleaner way to achieve this :
This is just sample code I have which configures path and everything :
anbpath = os.path.abspath("copy_views.py")
print(anbpath)
sqldir = os.path.abspath(os.path.join(os.path.dirname( __file__ ), '..', 'database'))
path_to_views = sqldir+"/sql/edm/changes/"+source_release_version+"/mutable/view"
print(path_to_views)
destdir = os.path.abspath(os.path.join(os.path.dirname( __file__ ),'..', 'database'))
path_to_dest_rel_ver = destdir+"/sql/edm/changes/"+dest_release_version
path_to_dest = path_to_dest_rel_ver+"/mutable/view"
I will traverse all files in path_to_dest using os.walk
If your file is called "file.txt" and if the name is constant then what you are looking for is
# Read the contents of file
with open("file.txt", 'r') as fp:
lines = fp.readlines()
#Idendify tokens and numbers with in them and increment them
words = lines[1].split()
tokens = words[1].split(":")
words[1] = "name{0}:{1}".format(int(tokens[0][4:])+1, int(tokens[1])+1)
lines[1] = ' '.join(words)
# Write back the updated lines
with open("file.txt", 'w') as fp:
fp.writelines(lines)
Initial content of file
"--liquibase formatted sql",
"--changeset name3:21 runOnChange:true splitStatements:false stripComments:false"]
Afer modification
"--liquibase formatted sql",
"--changeset name4:22 runOnChange:true splitStatements:false stripComments:false"]
However, if the name is not constant then we can still split the token at ":" to indentify the number after ":" but to indentify the number ending at the name we will have to use regex.
You can use regex to solve this:
The following code will replace the string "name1:1" by "name2:2" if it finds the correct sequence and preserve the rest of the line:
import re
# read the file, line by line and then:
line_fixed = re.sub(r"(--changeset )name1:1(.*)", r"\1name2:2\2", line)
# then save the fixed line to a temporary file until you read all file
Here's a way to do it with regular expressions:
import re
lines = [
"--liquibase formatted sql",
"--changeset Jack:2 runOnChange:true splitStatements:false stripComments:false",
"--liquibase formatted sql",
"--changeset Ryan:6 runOnChange:true splitStatements:false stripComments:false",
# etc ...
]
def update_line(line):
p = re.compile(r'--changeset (.+):(\d+) runOnChange')
matches = p.match(line)
if not matches:
return line
else:
replacement = '--changeset {}:{} runOnChange'.format(matches.group(1),
int(matches.group(2))+1)
return p.sub(replacement, line)
for line in lines:
updated_line = update_line(line)
print(repr(updated_line))
Output:
'--liquibase formatted sql'
'--changeset Jack:3 runOnChange:true splitStatements:false stripComments:false'
'--liquibase formatted sql'
'--changeset Ryan:7 runOnChange:true splitStatements:false stripComments:false'

Search,replace text and save as based on text in document in Python

All, I am just getting started with python and I thought this may be a good time to see if it can help me automate a lot of repeative tasks I have to complete.
I am using a script I found on Gethub that will search and replace and then write a new file with the name output.txt. It works fine, but Since I have lots of these files I need to be able to name them different names based on the Text in the final modified document.
To make this a little more difficult the name of the file is based on the text I will be modifing the document with.
So, basically after I run this script, I have a file that sits at C:\Program Files (x86)\Python35-32\Scripts\Text_Find_and_Replace\Result with the name of output.txt in this Modified new file I would like to name it based on what text is in a particular line of the file. So in the modified file of output.txt I would like to have it rename the file to the plain text in line 35.
I have figured out how to read the line within the file using
import linecache
line = linecache.getline("readme.txt", 1)
line
>>> line
'This is Python version 3.5.1\n'
I just need to figure out how to rename the file based on the variable "line"
Any Ideas?
#!/usr/bin/python
import os
import sys
import string
import re
## information/replacingvalues.txt this is the text of the values you want in your final document
information = open("C:\Program Files (x86)\Python35- 32\Scripts\Text_Find_and_Replace\information/replacingvalues.txt", 'r')
#Text_Find_and_Replace\Result\output.txt This is the dir and the sum or final document
output = open("C:\Program Files (x86)\Python35-32\Scripts\Text_Find_and_Replace\Result\output.txt", 'w')
#field = open("C:\Program Files (x86)\Python35- 32\Scripts\Text_Find_and_Replace\Field/values.txt"
# Field is the file or words you will be replacing
field = open("C:\Program Files (x86)\Python35- 32\Scripts\Text_Find_and_Replace\Field/values.txt", 'r')
##
##
# modified code for autohot key
# Text_Find_and_Replace\Test/remedy line 1.ahk is the original doc you want modified
with open("C:\Program Files (x86)\Python35- 32\Scripts\Text_Find_and_Replace\Test/remedy line 1.ahk", 'r') as myfile:
inline = myfile.read()
#orig code
##with open("C:\Program Files (x86)\Python35- 32\Scripts\Text_Find_and_Replace\Test/input.txt", 'r') as myfile:
## inline = myfile.read()
informations = []
fields = []
dictionary = {}
i = 0
for line in information:
informations.append(line.splitlines())
for lines in field:
fields.append(lines.split())
i = i+1;
if (len(fields) != len(informations) ):
print ("replacing values and values have different numbers")
exit();
else:
for i in range(0, i):
rightvalue = str(informations[i])
rightvalue = rightvalue.strip('[]')
rightvalue = rightvalue[1:-1]
leftvalue = str(fields[i])
leftvalue = leftvalue.strip('[]')
leftvalue = leftvalue.strip("'")
dictionary[leftvalue] = rightvalue
robj = re.compile('|'.join(dictionary.keys()))
result = robj.sub(lambda m: dictionary[m.group(0)], inline)
output.write(result)
information.close;
output.close;
field.close;
I figured out how...
import os
import linecache
linecache.clearcache()
newfilename= linecache.getline("C:\python 3.5/remedy line 1.txt",37)
filename = ("C:\python 3.5/output.ahk")
os.rename(filename, newfilename.strip())
linecache.clearcache()

How to search and replace text in a file?

How do I search and replace text in a file using Python 3?
Here is my code:
import os
import sys
import fileinput
print ("Text to search for:")
textToSearch = input( "> " )
print ("Text to replace it with:")
textToReplace = input( "> " )
print ("File to perform Search-Replace on:")
fileToSearch = input( "> " )
#fileToSearch = 'D:\dummy1.txt'
tempFile = open( fileToSearch, 'r+' )
for line in fileinput.input( fileToSearch ):
if textToSearch in line :
print('Match Found')
else:
print('Match Not Found!!')
tempFile.write( line.replace( textToSearch, textToReplace ) )
tempFile.close()
input( '\n\n Press Enter to exit...' )
Input file:
hi this is abcd hi this is abcd
This is dummy text file.
This is how search and replace works abcd
When I search and replace 'ram' by 'abcd' in above input file, it works as a charm. But when I do it vice-versa i.e. replacing 'abcd' by 'ram', some junk characters are left at the end.
Replacing 'abcd' by 'ram'
hi this is ram hi this is ram
This is dummy text file.
This is how search and replace works rambcd
As pointed out by michaelb958, you cannot replace in place with data of a different length because this will put the rest of the sections out of place. I disagree with the other posters suggesting you read from one file and write to another. Instead, I would read the file into memory, fix the data up, and then write it out to the same file in a separate step.
# Read in the file
with open('file.txt', 'r') as file :
filedata = file.read()
# Replace the target string
filedata = filedata.replace('abcd', 'ram')
# Write the file out again
with open('file.txt', 'w') as file:
file.write(filedata)
Unless you've got a massive file to work with which is too big to load into memory in one go, or you are concerned about potential data loss if the process is interrupted during the second step in which you write data to the file.
fileinput already supports inplace editing. It redirects stdout to the file in this case:
#!/usr/bin/env python3
import fileinput
with fileinput.FileInput(filename, inplace=True, backup='.bak') as file:
for line in file:
print(line.replace(text_to_search, replacement_text), end='')
As Jack Aidley had posted and J.F. Sebastian pointed out, this code will not work:
# Read in the file
filedata = None
with file = open('file.txt', 'r') :
filedata = file.read()
# Replace the target string
filedata.replace('ram', 'abcd')
# Write the file out again
with file = open('file.txt', 'w') :
file.write(filedata)`
But this code WILL work (I've tested it):
f = open(filein,'r')
filedata = f.read()
f.close()
newdata = filedata.replace("old data","new data")
f = open(fileout,'w')
f.write(newdata)
f.close()
Using this method, filein and fileout can be the same file, because Python 3.3 will overwrite the file upon opening for write.
You can do the replacement like this
f1 = open('file1.txt', 'r')
f2 = open('file2.txt', 'w')
for line in f1:
f2.write(line.replace('old_text', 'new_text'))
f1.close()
f2.close()
You can also use pathlib.
from pathlib2 import Path
path = Path(file_to_search)
text = path.read_text()
text = text.replace(text_to_search, replacement_text)
path.write_text(text)
(pip install python-util)
from pyutil import filereplace
filereplace("somefile.txt","abcd","ram")
Will replace all occurences of "abcd" with "ram".
The function also supports regex by specifying regex=True
from pyutil import filereplace
filereplace("somefile.txt","\\w+","ram",regex=True)
Disclaimer: I'm the author (https://github.com/MisterL2/python-util)
Open the file in read mode. Read the file in string format. Replace the text as intended. Close the file. Again open the file in write mode. Finally, write the replaced text to the same file.
try:
with open("file_name", "r+") as text_file:
texts = text_file.read()
texts = texts.replace("to_replace", "replace_string")
with open(file_name, "w") as text_file:
text_file.write(texts)
except FileNotFoundError as f:
print("Could not find the file you are trying to read.")
Late answer, but this is what I use to find and replace inside a text file:
with open("test.txt") as r:
text = r.read().replace("THIS", "THAT")
with open("test.txt", "w") as w:
w.write(text)
DEMO
With a single with block, you can search and replace your text:
with open('file.txt','r+') as f:
filedata = f.read()
filedata = filedata.replace('abc','xyz')
f.truncate(0)
f.write(filedata)
Your problem stems from reading from and writing to the same file. Rather than opening fileToSearch for writing, open an actual temporary file and then after you're done and have closed tempFile, use os.rename to move the new file over fileToSearch.
My variant, one word at a time on the entire file.
I read it into memory.
def replace_word(infile,old_word,new_word):
if not os.path.isfile(infile):
print ("Error on replace_word, not a regular file: "+infile)
sys.exit(1)
f1=open(infile,'r').read()
f2=open(infile,'w')
m=f1.replace(old_word,new_word)
f2.write(m)
Using re.subn it is possible to have more control on the substitution process, such as word splitted over two lines, case-(in)sensitive match. Further, it returns the amount of matches which can be used to avoid waste of resources if the string is not found.
import re
file = # path to file
# they can be also raw string and regex
textToSearch = r'Ha.*O' # here an example with a regex
textToReplace = 'hallo'
# read and replace
with open(file, 'r') as fd:
# sample case-insensitive find-and-replace
text, counter = re.subn(textToSearch, textToReplace, fd.read(), re.I)
# check if there is at least a match
if counter > 0:
# edit the file
with open(file, 'w') as fd:
fd.write(text)
# summary result
print(f'{counter} occurence of "{textToSearch}" were replaced with "{textToReplace}".')
Some regex:
add the re.I flag, short form of re.IGNORECASE, for a case-insensitive match
for multi-line replacement re.subn(r'\n*'.join(textToSearch), textToReplace, fd.read()), depending on the data also '\n{,1}'. Notice that for this case textToSearch must be a pure string, not a regex!
Besides the answers already mentioned, here is an explanation of why you have some random characters at the end:
You are opening the file in r+ mode, not w mode. The key difference is that w mode clears the contents of the file as soon as you open it, whereas r+ doesn't.
This means that if your file content is "123456789" and you write "www" to it, you get "www456789". It overwrites the characters with the new input, but leaves any remaining input untouched.
You can clear a section of the file contents by using truncate(<startPosition>), but you are probably best off saving the updated file content to a string first, then doing truncate(0) and writing it all at once.
Or you can use my library :D
I got the same issue. The problem is that when you load a .txt in a variable you use it like an array of string while it's an array of character.
swapString = []
with open(filepath) as f:
s = f.read()
for each in s:
swapString.append(str(each).replace('this','that'))
s = swapString
print(s)
I tried this and used readlines instead of read
with open('dummy.txt','r') as file:
list = file.readlines()
print(f'before removal {list}')
for i in list[:]:
list.remove(i)
print(f'After removal {list}')
with open('dummy.txt','w+') as f:
for i in list:
f.write(i)
you can use sed or awk or grep in python (with some restrictions). Here is a very simple example. It changes banana to bananatoothpaste in the file. You can edit and use it. ( I tested it worked...note: if you are testing under windows you should install "sed" command and set the path first)
import os
file="a.txt"
oldtext="Banana"
newtext=" BananaToothpaste"
os.system('sed -i "s/{}/{}/g" {}'.format(oldtext,newtext,file))
#print(f'sed -i "s/{oldtext}/{newtext}/g" {file}')
print('This command was applied: sed -i "s/{}/{}/g" {}'.format(oldtext,newtext,file))
if you want to see results on the file directly apply: "type" for windows/ "cat" for linux:
####FOR WINDOWS:
os.popen("type " + file).read()
####FOR LINUX:
os.popen("cat " + file).read()
I have done this:
#!/usr/bin/env python3
import fileinput
import os
Dir = input ("Source directory: ")
os.chdir(Dir)
Filelist = os.listdir()
print('File list: ',Filelist)
NomeFile = input ("Insert file name: ")
CarOr = input ("Text to search: ")
CarNew = input ("New text: ")
with fileinput.FileInput(NomeFile, inplace=True, backup='.bak') as file:
for line in file:
print(line.replace(CarOr, CarNew), end='')
file.close ()
I modified Jayram Singh's post slightly in order to replace every instance of a '!' character to a number which I wanted to increment with each instance. Thought it might be helpful to someone who wanted to modify a character that occurred more than once per line and wanted to iterate. Hope that helps someone. PS- I'm very new at coding so apologies if my post is inappropriate in any way, but this worked for me.
f1 = open('file1.txt', 'r')
f2 = open('file2.txt', 'w')
n = 1
# if word=='!'replace w/ [n] & increment n; else append same word to
# file2
for line in f1:
for word in line:
if word == '!':
f2.write(word.replace('!', f'[{n}]'))
n += 1
else:
f2.write(word)
f1.close()
f2.close()
def word_replace(filename,old,new):
c=0
with open(filename,'r+',encoding ='utf-8') as f:
a=f.read()
b=a.split()
for i in range(0,len(b)):
if b[i]==old:
c=c+1
old=old.center(len(old)+2)
new=new.center(len(new)+2)
d=a.replace(old,new,c)
f.truncate(0)
f.seek(0)
f.write(d)
print('All words have been replaced!!!')
I have worked this out as an exercise of a course: open file, find and replace string and write to a new file.
class Letter:
def __init__(self):
with open("./Input/Names/invited_names.txt", "r") as file:
# read the list of names
list_names = [line.rstrip() for line in file]
with open("./Input/Letters/starting_letter.docx", "r") as f:
# read letter
file_source = f.read()
for name in list_names:
with open(f"./Output/ReadyToSend/LetterTo{name}.docx", "w") as f:
# replace [name] with name of the list in the file
replace_string = file_source.replace('[name]', name)
# write to a new file
f.write(replace_string)
brief = Letter()
Like so:
def find_and_replace(file, word, replacement):
with open(file, 'r+') as f:
text = f.read()
f.write(text.replace(word, replacement))
def findReplace(find, replace):
import os
src = os.path.join(os.getcwd(), os.pardir)
for path, dirs, files in os.walk(os.path.abspath(src)):
for name in files:
if name.endswith('.py'):
filepath = os.path.join(path, name)
with open(filepath) as f:
s = f.read()
s = s.replace(find, replace)
with open(filepath, "w") as f:
f.write(s)

Categories