Python find, replace or add string in file - python

Hi and thanks for looking :)
I have a text tile that is over 2500 lines, each line contains information about a video file. One of the tags(so to speak) is for watched status, I am looking for a way of changing it from one value to another or add in a new value if it is not set. The code below works but it has to open and close the file for each searchvalue, this means its very slow. Can any one suggest a way of opening the file once and doing all the searches in one pass?
Thanks
for x in y:
print ' --> ' + x['title'].encode('utf-8')
searchValue = x['movieid']
addValue = "\t_w\t1\t"
checkvalue = "\t_w\t0\t"
for line in fileinput.input(file, inplace=1):
if searchValue in line:
if checkvalue in line:
line = line.replace(checkvalue, addValue)
elif not addValue in line:
line = line + addValue
sys.stdout.write(line)
This is what i ended up with, thanks to everyone for your input.
myfile_list = open(file).readlines()
newList = []
for line in myfile_list:
for x in y:
if x['movieid'] in line:
print ' --> ' + x['title'].encode('utf-8')
if checkvalue in line:
line = line.replace(checkvalue, addValue)
elif not addValue in line:
line = line.replace('\n', addValue+'\n')
newList.append(line)
outref = open(file,'w')
outref.writelines(newList)
outref.close()
Edit
I have come across an issue with the encoding, The file is encoded in utf-8 but it errors out or just does not find a match when the search value is
'Hannibal - S01E01 - Ap\xe9ritif.mkv'
the matching line in the file looks like
_F /share/Storage/NAS/Videos/Tv/Hannibal/Season 01/Hannibal - S01E01 - Apéritif.mkv _rt 43 _r 8.4 _s 1 _v c0=h264,f0=24,h0=720,w0=1280 _IT 717ac9d _id 1671 _et Apéritif _DT 7142d53 _FT 7142d53 _A 4212,4211,2533,4216 _C T _G j|d|h|t _R GB:TV-MA _T Hannibal _U thetvdb:259063 imdb:tt2243973 _V HDTV _W 4210 _Y 71 _ad 2013-04-04 _e 1 _ai Apéritif _m 1117
I have tried codecs.open and decode().encode() options but it allways errors out, I believe its the accented letters in the line that is the issue as it can do the if searchValue in line: if the line does not have an accented letter. This is what I am currently trying but I am open to other methods.
if os.path.isfile("/share/Apps/oversight/index.db"):
newfile = ""
#searchValueFix = searchValue.decode('latin-1', 'replace').encode('utf8', 'replace')
#print searchValueFix
#print searchValue
addValue = "\t_w\t1\t"
replacevalue = "\t_w\t0\t"
file = open("/share/Apps/oversight/index.db", "r")
for line in file:
if searchValue in line:
if replacevalue in line:
line = line.replace(replacevalue, addValue)
elif not addValue in line:
line = line.replace(searchValue+"\t", searchValue+addValue)
newfile = newfile + line
file.close()
file = open("/share/Apps/oversight/index.db", "w")
file.write(newfile)
file.close()
newfile = ""

Similar to the method proposed by PyNEwbie, you can write lines 1 by 1:
myfile_list = open(file).readlines()
outref = open(myfile, 'w')
for line in myfile_list:
# do something to line
outref.write(line)
outref.close()

Yes read in your file to a list
myfile_list = open(file).readlines()
newList = []
for line in myfile_list:
.
.
newList.append(line) # this is the line after you have made your changes
outref = open(myfile,'w')
outref.writelines(newList)
outref.close()

Related

Lines missing in python

I am writing a code in python where I am removing all the text after a specific word but in output lines are missing. I have a text file in unicode which have 3 lines:
my name is test1
my name is
my name is test 2
What I want is to remove text after word "test" so I could get the output as below
my name is test
my name is
my name is test
I have written a code but it does the task but also removes the second line "my name is"
My code is below
txt = ""
with open(r"test.txt", 'r') as fp:
for line in fp.readlines():
splitStr = "test"
index = line.find(splitStr)
if index > 0:
txt += line[:index + len(splitStr)] + "\n"
with open(r"test.txt", "w") as fp:
fp.write(txt)
It looks like if there is no keyword found the index become -1.
So you are avoiding the lines w/o keyword.
I would modify your if by adding the condition as follows:
txt = ""
with open(r"test.txt", 'r') as fp:
for line in fp.readlines():
splitStr = "test"
index = line.find(splitStr)
if index > 0:
txt += line[:index + len(splitStr)] + "\n"
elif index < 0:
txt += line
with open(r"test.txt", "w") as fp:
fp.write(txt)
No need to add \n because the line already contains it.
Your code does not append the line if the splitStr is not defined.
txt = ""
with open(r"test.txt", 'r') as fp:
for line in fp.readlines():
splitStr = "test"
index = line.find(splitStr)
if index != -1:
txt += line[:index + len(splitStr)] + "\n"
else:
txt += line
with open(r"test.txt", "w") as fp:
fp.write(txt)
In my solution I simulate the input file via io.StringIO. Compared to your code my solution remove the else branch and only use one += operater. Also splitStr is set only one time and not on each iteration. This makes the code more clear and reduces possible errore sources.
import io
# simulates a file for this example
the_file = io.StringIO("""my name is test1
my name is
my name is test 2""")
txt = ""
splitStr = "test"
with the_file as fp:
# each line
for line in fp.readlines():
# cut somoething?
if splitStr in line:
# find index
index = line.find(splitStr)
# cut after 'splitStr' and add newline
line = line[:index + len(splitStr)] + "\n"
# append line to output
txt += line
print(txt)
When handling with files in Python 3 it is recommended to use pathlib for that like this.
import pathlib
file_path = pathlib.Path("test.txt")
# read from wile
with file_path.open('r') as fp:
# do something
# write back to the file
with file_path.open('w') as fp:
# do something
Suggestion:
for line in fp.readlines():
i = line.find('test')
if i != -1:
line = line[:i]

Reading a Python File to EOF while performing if statments

I am working on creating a program to concatenate rows within a file. Each file has a header, datarows labeled DAT001 to DAT113 and a trailer. Each line of concatenated rows will have DAT001 to DAT100 and 102-113 is optional. I need to print the header, concatenating DAT001-113 and when the file finds a row with DAT001 I need to start a new line concatenating DAT001-113 again. After that is all done, I will print the trailer. I have an IF statement started but it only writes the header and skips all other logic. I apologize that this is very basic - but I am struggling with reading rows over and over again without knowing how long the file might be.
I have tried the below code but it won't read or print after the header.
import pandas as pd
destinationFile = "./destination-file.csv"
sourceFile = "./TEST.txt"
header = "RHR"
data = "DPSPOS"
beg_data = "DAT001"
data2 = "DAT002"
data3 = "DAT003"
data4 = "DAT004"
data5 = "DAT005"
data6 = "DAT006"
data7 = "DAT007"
data8 = "DAT008"
data100 = "DAT100"
data101 = "DAT101"
data102 = "DAT102"
data103 = "DAT103"
data104 = "DAT104"
data105 = "DAT105"
data106 = "DAT106"
data107 = "DAT107"
data108 = "DAT108"
data109 = "DAT109"
data110 = "DAT110"
data111 = "DAT111"
data112 = "DAT112"
data113 = "DAT113"
req_data = ''
opt101 = ''
opt102 = ''
with open(sourceFile) as Tst:
for line in Tst.read().split("\n"):
if header in line:
with open(destinationFile, "w+") as dst:
dst.write(line)
elif data in line:
if beg_data in line:
req_data = line+line+line+line+line+line+line+line+line
if data101 in line:
opt101 = line
if data102 in line:
opt102 = line
new_line = pd.concat(req_data,opt101,opt102)
with open(destinationFile, "w+") as dst:
dst.write(new_line)
else:
if trailer in line:
with open(destinationFile, "w+") as dst:
dst.write(line)
Just open the output file once for the whole loop, not every time through the loop.
Check whether the line begins with DAT101. If it does, write the trailer to the current line and start a new line by printing the header.
Then for every line that begins with DAT, write it to the file in the current line.
first_line = True
with open(sourceFile) as Tst, open(destinationFile, "w+") as dst:
for line in Tst.read().split("\n"):
# start a new line when reading DAT101
if line.startswith(beg_data):
if not first_line: # need to end the current line
dst.write(trailer + '\n')
first_line = False
dst.write(header)
# copy all the lines that begin with `DAT`
if line.startswith('DAT'):
dst.write(line)
# end the last line
dst.write(trailer + '\n')
See if the following code helps make progress. It was not tested because no
Minimum Runnable Example is provided.
with open(destinationFile, "a") as dst:
# The above will keep the file open until after all the indented code runs
with open(sourceFile) as Tst:
# The above will keep the file open until after all the indented code runs
for line in Tst.read().split("\n"):
if header in line:
dst.write(line)
elif data in line:
if beg_data in line:
req_data = line + line + line + line + line + line + line + line + line
if data101 in line:
opt101 = line
if data102 in line:
opt102 = line
new_line = pd.concat(req_data, opt101, opt102)
dst.write(new_line)
else:
if trailer in line:
dst.write(line)
# With is a context manager which will automatically close the files.

How to replace line breaks except if a character appears in that line in Python 3

I am trying to create a python code that will delete line break (\n) if a character (<) does not appear in that line. Currently I cannot see any effects. This is the code i wrote so far:
protein=open(r"string.txt","r+")
def main():
print(protein.readlines())
print()
rewrite()
def rewrite():
for line in protein:
if line == "<":
print (line)
if line != "<":
line.replace("\n"," ")
print(protein)
protein.close
main()
Input Example:
Name1<<
Data1
Data1
Data1
Name2<<
Data2
Data2
Data2
Expected Output Example:
Name1<<
Data1Data1Data1
Name2<<
Data2Data2Data2
This is the below code you asked
file_handle = open("string.txt", "r")
protein = file_handle.readlines()
final_string = ""
for line in protein:
if "<" in line:
final_string += line
else:
final_string += line.strip()
print(final_string)
file_handle.close()
But this produces the result as
Name1<<
Data1Data1Data1Name2<<
Data2Data2Data2
But if you want the desired output, use the below code
file_handle = open("string.txt", "r")
protein = file_handle.readlines()
final_string = ""
for line in protein:
if "<" in line:
final_string += "\n" + line
else:
final_string += line.strip()
print(final_string.strip())
file_handle.close()
# if you want to update the file content
file_handle = open("string.txt", "w")
file_handle.write(final_string)
file_handle.close()
Output of above code will be
Name1<<
Data1Data1Data1
Name2<<
Data2Data2Data2
Use FileInput
for line in fileinput.FileInput("+some_file+", inplace=1):
...
you can use the inputfile module to perform inplace editing of the file data. Assuming the first line will be name line so just print it then for each additional line if there is no << at the end then print the line without a new line char, otherwise print a new line char then the line
import fileinput
with fileinput.input('query4.txt', inplace=True) as protein:
print(protein.readline(), end='')
for line in protein:
line = line.rstrip()
if line.endswith('<<'):
print("\n" + line)
else:
print(line, end='')
This seems to work
protein=open(r"string.txt","r+")
def main():
rewrite()
def rewrite():
s = ""
nl = False
for line in protein.readlines():
if "<" in line:
if nl:
s += '\n'
nl = False
s += line
else:
s += line.replace('\n', '')
nl = True
print(s)
protein.close()
main()
Here is another way to achieve your goal.
def rewrite(protein):
results = ""
for line in protein:
if "<" in line:
results += "\n" + line
else:
line = line.replace("\n", "")
results += line
print(results)
def main():
f = open(r"string.txt","r+")
protein = f.readlines()
rewrite(protein)
f.close()
main()
Output:
Name1<<
Data1Data1Data1
Name2<<
Data2Data2Data2

How do I match two plain text files line by line using Python

As per my requirement, I wish to match two text files line by line in Python on Windows platform. for example I have the following text files:
File1:
My name is xxx
command completed successfully.
My mother's name is yyy
My mobile number is 12345
the heavy lorry crashed into the building at midnight
lorry eat in the faculty a red apple
File2:
My name is xxx
command . successfully.
The name of my mother is
what a heavy lorry it is that crashed into the building
lorry eat an apple in the faculty
I apologize for not being clear enough so my problem is how can i align a script movie with its subtitles, i writ the following code in Python but it's not enough to get the alignement from the two text files:
# Open file for reading in text mode (default mode)
f1 = open('F:/CONTRIBUTION 2017/SCRIPT-SUBTITLES CODES/Script Alignement Papers/f1.txt','r')
f2 = open('F:/CONTRIBUTION 2017/SCRIPT-SUBTITLES CODES/Script Alignement Papers/f2.txt','r')
#Print confirmation
# print("-----------------------------------")
#print("Comparing files ", " > " + fname1, " < " +fname2, sep='\n')
# print("-----------------------------------")
# Read the first line from the files
f1_line = f1.readline()
f2_line = f2.readline()
# Initialize counter for line number
line_no = 1
# Loop if either file1 or file2 has not reached EOF
while f1_line != '' or f2_line != '':
# Strip the leading whitespaces
f1_line = f1_line.rstrip()
f2_line = f2_line.rstrip()
# Compare the lines from both file
if f1_line != f2_line:
# If a line does not exist on file2 then mark the output with + sign
if f2_line == '' and f1_line != '':
print("=================================================================")
print("=================================================================")
print("line does not exist on File 2 ====================")
print("=================================================================")
print(">+", "Line-%d" % line_no, f1_line)
# otherwise output the line on file1 and mark it with > sign
elif f1_line != '':
print("=================================================================")
print("=================================================================")
print("otherwise output the line on file1 ====================")
print("=================================================================")
print(">", "Line-%d" % line_no, f1_line)
# If a line does not exist on file1 then mark the output with + sign
if f1_line == '' and f2_line != '':
print("=================================================================")
print("=================================================================")
print("=line does not exist on File 1 ====================")
print("=================================================================")
print("<+", "Line-%d" % line_no, f2_line)
# otherwise output the line on file2 and mark it with < sign
elif f2_line != '':
print("=================================================================")
print("=================================================================")
print("otherwise output the line on file2 ====================")
print("=================================================================")
print("<", "Line-%d" % line_no, f2_line)
# Print a blank line
print()
#Read the next line from the file
f1_line = f1.readline()
f2_line = f2.readline()
#Increment line counter
line_no += 1
# Close the files
f1.close()
f2.close()
If can anyone help to do this matching, i would be very grateful.
It would be good to post code you tried writting. This feels like we are doing your homework and makes you look lazy. That being said, take a look at the following:
with open(file1, 'r') as f1, open(file2, 'r') as f2:
if f1.readlines() == f2.readlines():
print('Files {} & {} are identical!'.format(file1, file2))
PS: This checks whether the files are identical. If you want something like a logical comparison you have to do some research first.
One possible way is to store the lines of the file in a list and then compare the lists.
lines_of_file1 = []
file = open("file1.txt","r")
line = 'sample'
while line != '':
line = file.readline()
lines_of_file1.append(line)
file.close()
lines_of_file2 = []
file = open("file2.txt","r")
line = 'sample'
while line != '':
line = file.readline()
lines_of_file2.append(line)
file.close()
same = True
for line1 in lines_of_file1:
for line2 in lines_of_file2:
if line1 != line2:
same = False
break
if same:
print("Files are same")
else:
print("Files are not same")
Hope that helps.

How to find a specific line of text in a text file with python?

def match_text(raw_data_file, concentration):
file = open(raw_data_file, 'r')
lines = ""
print("Testing")
for num, line in enumerate(file.readlines(), 0):
w = ' WITH A CONCENTRATION IN ' + concentration
if re.search(w, line):
for i in range(0, 6):
lines += linecache.getline(raw_data_file, num+1)
try:
write(lines, "lines.txt")
print("Lines Data Created...")
except:
print("Could not print Line Data")
else:
print("Didn't Work")
I am trying to open a .txt file and search for a specific string.
If you are simply trying to write all of the lines that hold your string to a file, this will do.
def match_text(raw_data_file, concentration):
look_for = ' WITH A CONCENTRATION IN ' + concentration
with open(raw_data_file) as fin, open('lines.txt', 'w') as fout:
fout.writelines(line for line in fin if look_for in line)
Fixed my own issue. The following works to find a specific line and get the lines following the matched line.
def match_text(raw_data_file, match_this_text):
w = match_this_text
lines = ""
with open(raw_data_file, 'r') as inF:
for line in inF:
if w in line:
lines += line //Will add the matched text to the lines string
for i in range(0, however_many_lines_after_matched_text):
lines += next(inF)
//do something with 'lines', which is final multiline text
This will return multiple lines plus the matched string that the user wants. I apologize if the question was confusing.

Categories