Removing complete lines of text in a text file (on Python) - python

I am working with an output log file that has 12 thousand lines of code, most of which include something that looks like this:
"760.0132 EXP window1: blendMode = 'avg'"
My goal is to entirely remove any line that has "EXP window1: blendMode = 'avg'". I can remove that text bit from all of the lines where it is found, but not the number. This is the code I have used to delete the text bits (borrowed from another stack overflow question/answer):
infile = "01_Day1_run1.txt"
outfile = "01_Day1_run1_cleaned.txt"
delete_list = [" EXP window1: blendMode = 'avg'"]
fin = open(infile)
fout = open(outfile, "w+")
for line in fin:
for word in delete_list:
line = line.replace(word, "")
fout.write(line)
fin.close()
fout.close()
I was hoping that I would be able to add something like
delete_list = ["1**.**** EXP window1: blendMode = 'avg'"]
in order to delete any number that includes all of the text, and also any number in that line, but it does not seem to work. Any advice on how to best clean up the log file would be greatly appreciated.
Thanks very much,
Simon

Why do you want to do this using Python? You can do this with a simple grep -v or findstr /V, as in following example:
Prompt>grep -v "blendmode" input.txt >output.txt

infile = "01_Day1_run1.txt"
outfile = "01_Day1_run1_cleaned.txt"
delete_list = [" EXP window1: blendMode = 'avg'"]
fin = open(infile)
fout = open(outfile, "a")
for line in fin:
for word in delete_list:
if word in line:
wordCheck = False
break
else:
wordCheck = True
if wordCheck:
fout.write(line)
fin.close()
fout.close()
Maybe cleaner:
with open("01_Day1_run1.txt", "r") as infile, open("01_Day1_run1_cleaned.txt", "a") as outfile:
for line in infile:
if not any(filter in line for filter in delete_list ):
outfile.write(line)

infile = "01_Day1_run1.txt"
outfile = "01_Day1_run1_cleaned.txt"
delete_string = "EXP window1: blendMode = 'avg'"
fin = open(infile)
fout = open(outfile, "a")
for line in fin.readLines():
if delete_list not in line:
fout.write(line)
fin.close()
fout.close()

Related

does not print into the original file

in the last 2 lines the file1 stays blanks even with the write function. the rest of the code works flawlessly
def modQuantity(filepath: str,):
model = input("Model: ")
size = input("size")
newquantity = input("New Quantity: ")
file = open(filepath, 'r')
tempfile = open(filepath+"temp", 'w')
for line in file:
sep = line.split()
if sep[0] == model and sep[1] == size:
tempfile.write(f"{sep[0]} {sep[1]} {newquantity}\n")
else:
tempfile.write(f"{line}")
tempfile.close()
file.close()
tempfile1 = open(filepath+"temp", 'r')
file1 = open(filepath, 'w')
for line1 in tempfile1:
file1.write(f"{line1}")
You didn't close the file so it hadn't chance to flush the content. You can use that method by yourself or close the file.
I recommend to use contextmanager so you can be sure file is flushed and closed:
with open(filepath, 'w') as file1:
for line1 in tempfile1:
file1.write(f"{line1}")

I want to delete just a one searched row from my test file

This is my code and file data given bellow. I have file with the name of test. In which I’m trying find a num in first column. If that number is found in any line, I want to delete that row.
def deleteline():
n=5
outfile=open('test.txt','r+')
line = outfile.readline()
while line !='':
lines= line.rstrip('\n')
listline=lines.split(',')
num=int(listline[0])
if n==num:
print(listline[1])
outfile.write(lines)
else:
print("no")
line= outfile.readline()
outfile.close()
This should solve your problem:
def deleteline():
n = 5
outfile = open('test.txt', 'r')
lines = outfile.readlines()
outfile.close()
outfile = open('test.txt', 'w')
for line in lines:
lineStrip = line.rstrip('\n')
listLine = lineStrip.split(',')
num = int(listLine[0])
if(num == n):
outfile.write('')
else:
outfile.write(line)
outfile.close()
As suggested by Simon in above comment, from this answer, modify the check condition like this:
with open("test.txt", "r") as f:
lines = f.readlines()
with open("test.txt", "w") as f:
for line in lines:
tempLine=line.strip("\n")
tempLine=tempLine.split(',')
if int(tempLine[0]) != 4:
f.write(line)

Python reading and writing to a .txt file

I need to read the names from the babynames2014.txt file and then create two new files, separating the boys and girls names. The resulting files should be called boynames2014.txt and girlnames.txt. The babynames2014.txt files looks like this:
1 Noah Emma
2 Liam Olivia
3 Mason Sophia
4 Jacob Isabella
and continues until it reaches 100 boy and girls names.
The code I have written so far creates both of the new text files but the boynames2014 contains nothing and the girlnames2014 contains only the name Noah with the number 1 before it like this: 1Noah.
I think that I will need to use readline() and line.split()
somewhere, I'm just not sure where and how to use them correctly. I also need to use a try/except block to handle the exception in case the babynames2014.txt file is not found.
infile = open("babynames2014.txt", "r")
outfile = open("boynames2014.txt", "w")
outfile = open("girlnames2014.txt", "w")
line = infile.readline()
datafield = line.split()
boyname2014 = datafield[0]
girlname2014 = datafield[1]
outfile.write(boyname2014)
outfile.write(girlname2014)
infile.close()
outfile.close()
I have only studied Python for 2-3 months and really appreciate any advice to help me learn more!
I've noticed one thing that is logically not correct i.e., outfile for both boynames2014.txt and girlnames2014.txt
You should've done like this.
infile = open("babynames2014.txt", "r")
outfile_boys = open("boynames2014.txt", "w")
outfile_girls = open("girlnames2014.txt", "w")
Then, you have to read the infile and split by new line for required data as following.
lines = infile.read().split("\n")
Then iterate over the lines as below and split by space(default).
for line in lines:
datafield = line.split()
boyname2014 = datafield[1]
girlname2014 = datafield[2]
outfile_boys.write(boyname2014 + '\n')
outfile_girls.write(girlname2014 + '\n')
I've selected 1 and 2 index for data field because your file contains data like :
1 boy_name girl_name
Splitting by space delivers boy_name to 1st index and girl_name to 2nd index
Then close your files as usual.
infile.close()
outfile_boys.close()
outfile_girls.close()
Hope it helps!
You need to have seperate pointers for output files.
`
infile = open("babynames2014.txt", "r")
outfileboy = open("boynames2014.txt", "w")
outfilegirl = open("girlnames2014.txt", "w")
for line in infile.readlines():
names = line.split(" ")
outfileboy.write(str(names[1]+"\n")
outfilegirl.write(str(names[2]+"\n")
outfileboy.close()
outfilegirl.close()
`
outfile1 = open("boynames2014.txt", "w")
outfile2 = open("girlnames2014.txt", "w")
with open('babynames2014.txt') as infile:
for line in infile:
datafield = line.split()
boyname2014 = datafield[0]
girlname2014 = datafield[1]
outfile1.write(boyname2014)
outfile2.write(girlname2014)
outfile1.close()
outfile2.close()
readline() only reads a single line (as the name might suggest)
so only the first line get read (1 Noah Emma )
To read the all the lines and split them and write them to a file try:
# use two different names for the files
# you had one name `outfile` which was being
# overwritten so tht why boy file was empty
infile = open("babynames2014.txt", "r")
boyfile = open("boynames2014.txt", "w")
girlfile = open("girlnames2014.txt", "w")
with open('babynames2014', 'r') as f:
for l in f.readlines(): # notice readlines instead of readline
_, boy, girl = l.split() # assumes the separator is a space
print(boy, file=boyfile)
print(girl, file=girlfile)
# don't forget to close your file desciptors
boyfile.close()
girlfile.close()
Here you go,
#! /usr/bin/python
import sys
boy_file = str(sys.argv[1])
girl_file = str(sys.argv[2])
all_records = [line.strip() for line in open('babynames2014', 'r')]
f1 = open(boy_file, "w")
f2 = open(girl_file, "w")
for record in all_records:
split_record = record.split(' ')
boy_name = split_record[1]
girl_name = split_record[2]
f1.write(boy_name+"\n")
f2.write(girl_name+"\n")
f1.close()
f2.close()
You have specified the same variable name both the output files. outfile.
infile = open("babynames2014.txt", "r")
outfileb = open("boynames2014.txt", "w")
outfileg = open("girlnames2014.txt", "w")
line = infile.readline()
datafield = line.split()
boyname2014 = datafield[0]
girlname2014 = datafield[1]
outfileb.write(boyname2014)
outfileg.write(girlname2014)
infile.close()
outfileb.close()
outfileg.close()
and you need to loop through the input file in order to get all the names.
You can use ''.join([i for i in s if not i.isdigit()]) to remove the number from the names.
infile = open("babynames2014.txt", "r")
outfileb = open("boynames2014.txt", "w")
outfileg = open("girlnames2014.txt", "w")
tmp = infile.readline()
line=''.join([i for i in tmp if not i.isdigit()])
datafield = line.split()
boyname2014 = datafield[0]
girlname2014 = datafield[1]
outfileb.write(boyname2014)
outfileg.write(girlname2014)
infile.close()
outfileb.close()
outfileg.close()
Want to consider a regex solution?
with open("babynames2014.txt", "r") as f1,open("boynames2014.txt", "w") as boys,open("girlnames2014.txt","w") as girls:
# Note this will not work for name which has speacial charecters like `-,$,etc`
boy_regex = re.compile(r"^\d\s?([a-zA-z0-9]+)\s[a-zA-z0-9]+$",re.MULTILINE)
girl_regex = re.compile(r"^\d\s?[a-zA-z0-9]+\s([a-zA-z0-9]+)$",re.MULTILINE)
boys.write('\n'.join(boy_regex.findall(f1.read())))
girls.write('\n'.join(girl_regex.findall(f1.read())))

Python: Search for keywords in a txt file that was converted from ebook and replace keywords.

The book I converted to a txt file is about 400 pages long. I want to loop through about a 1000 keywords and add a prefix to each keyword in the book. I am having trouble searching the whole book.
lines = open('list.txt', 'r').read().split("\n")
inpot = open('in.txt').read()
fout = open('in.txt', 'w')
def wr(lines):
with open('in.txt', 'r') as inF:
for line in inF:
if lines in line:
fout.write(line.replace(lines, "$"+lines))
for i in range(len(lines)):
wr(lines[i])
I think this is what you're trying to achieve:
import os
import re
# Get keywords.
with open('list.txt', 'r') as f:
keywords = f.read().split("\n")
rekeywords = '(' + '|'.join(keywords) + ')'
# Write new file with '$' in front of keywords.
with open('in.txt', 'r') as f_in, open('out.txt', 'w') as f_out:
for line in f_in:
f_out.write(re.sub(rekeywords, r'$\1', line))
# Replace old file with new.
os.remove('in.txt')
os.rename('out.txt', 'in.txt')
import re
def replace_keywords(input_file, keywords):
for line in input_file.readlines():
tmp_line = line
for keyword in keywords:
i = 0
while i >= 0:
m = keyword.search(tmp_line, match_index)
if m:
tmp_line = tmp_line[i:] + '$' + tmp_line[:i]
yield tmp_line
with open('lines.txt', 'r') as f:
regex_keywords = [re.compile('\\b{0}\\b'.format(k), re.I) for k in f.readlines()]
with open('output.txt', 'w') as output:
with open('input.txt', 'r') as input:
print >> output, '\n'.join(replace_keywords(input, regex_keywords))

How to add a copy of each line in a txt (containing two parts separated by a particular symbol) with the order of the two parts inverted?

I have txt with a number of lines (x#y). Each file has two parts (x, y) separated by a particular symbol (#). How would a python script that reads each line in a txt and adds a new line under each existing line, where the order of the two parts (x#y) is inverted (y#x).
What I'm trying to do presented as input/output:
INPUT:
x1#y1
x2#y2
x3#y3
OUTPUT:
x1#y1
y1#x1
x2#y2
y2#x2
x3#y3
y3#x3
How can this be done with python?
Here's one way:
infilename = 'in.dat'
outfilename = 'out.dat'
sep = '#'
with open(infilename) as infile, open(outfilename,'w') as outfile:
for line in infile:
split = line.strip().partition(sep)
outfile.write(line)
outfile.write(''.join(reversed(split)) + '\n')
and then
~/coding$ cat in.dat
x1#y1
x2#y2
x3#y3
~/coding$ python inverter.py
~/coding$ cat out.dat
x1#y1
y1#x1
x2#y2
y2#x2
x3#y3
y3#x3
Assumes the name of your file is bar.txt, and that you want to write it back to bar.txt. It also does no error checking nor cares about memory usage.
if __name__ == "__main__":
myfile = open("bar.txt", "rb")
lines = myfile.readlines()
myfile.close()
myfile = open("bar.txt", "wb")
for l in lines:
ls = l.strip()
myfile.write(ls + "\n")
lsplit = ls.split("#")
myfile.write(lsplit[1] + "#" + lsplit[0] + "\n")
myfile.close()
There are cleaner ways to do this, but you could use something like:
f = open('my_file.txt', 'r')
lines = f.readlines()
f.close()
outfile = open('my_file2.txt', 'w')
# write each line, followed by flipped line
for line in lines:
outfile.write('%s\n' % line)
parts = line.split('#')
outfile.write('%s#%s\n' % [parts[1], parts[0]])
outfile.close()
You can use open and read function to read your file and than use this function,
>>> st = "x1#y1"
>>> def myfunc(string):
... mylist = re.split(r'(#)',string)
... mylist.reverse()
... print "".join(mylist), string
...
>>> myfunc(st)
y1#x1 x1#y1
and than use write to write the strings into your new file.
def swap(delimiter="#", input="input.txt", ouput="output.txt"):
with open(input, "r") as input_file, open(ouput, "w") as output_file:
for line in input_file:
line = line.strip()
output_line = delimiter.join(reversed(line.split(delimiter)))
output_file.write(line+"\n")
output_file.write(output_line+"\n")
swap()
Riffing on #DSM:
with open(infilename) as infile, open(outfilename, 'w') as outfile:
lines = [line.rstrip() for line in infile]
outfile.write("\n".join("%s\n%s%s%s" (line, y, sep, x)
for line in lines
for x, y in line.split(sep)) + "\n")
lines could also be a generator statement instead of a list comprehension:
lines = (line.rstrip() for line in infile)
Later: I did not realize until now that OP wanted the original line followed by the reversed line. Adjusted accordingly.

Categories