Insert line at middle of file with Python? - python

Is there a way to do this? Say I have a file that's a list of names that goes like this:
Alfred
Bill
Donald
How could I insert the third name, "Charlie", at line x (in this case 3), and automatically send all others down one line? I've seen other questions like this, but they didn't get helpful answers. Can it be done, preferably with either a method or a loop?

This is a way of doing the trick.
with open("path_to_file", "r") as f:
contents = f.readlines()
contents.insert(index, value)
with open("path_to_file", "w") as f:
contents = "".join(contents)
f.write(contents)
index and value are the line and value of your choice, lines starting from 0.

If you want to search a file for a substring and add a new text to the next line, one of the elegant ways to do it is the following:
import os, fileinput
old = "A"
new = "B"
for line in fileinput.FileInput(file_path, inplace=True):
if old in line :
line += new + os.linesep
print(line, end="")

There is a combination of techniques which I found useful in solving this issue:
with open(file, 'r+') as fd:
contents = fd.readlines()
contents.insert(index, new_string) # new_string should end in a newline
fd.seek(0) # readlines consumes the iterator, so we need to start over
fd.writelines(contents) # No need to truncate as we are increasing filesize
In our particular application, we wanted to add it after a certain string:
with open(file, 'r+') as fd:
contents = fd.readlines()
if match_string in contents[-1]: # Handle last line to prevent IndexError
contents.append(insert_string)
else:
for index, line in enumerate(contents):
if match_string in line and insert_string not in contents[index + 1]:
contents.insert(index + 1, insert_string)
break
fd.seek(0)
fd.writelines(contents)
If you want it to insert the string after every instance of the match, instead of just the first, remove the else: (and properly unindent) and the break.
Note also that the and insert_string not in contents[index + 1]: prevents it from adding more than one copy after the match_string, so it's safe to run repeatedly.

You can just read the data into a list and insert the new record where you want.
names = []
with open('names.txt', 'r+') as fd:
for line in fd:
names.append(line.split(' ')[-1].strip())
names.insert(2, "Charlie") # element 2 will be 3. in your list
fd.seek(0)
fd.truncate()
for i in xrange(len(names)):
fd.write("%d. %s\n" %(i + 1, names[i]))

The accepted answer has to load the whole file into memory, which doesn't work nicely for large files. The following solution writes the file contents with the new data inserted into the right line to a temporary file in the same directory (so on the same file system), only reading small chunks from the source file at a time. It then overwrites the source file with the contents of the temporary file in an efficient way (Python 3.8+).
from pathlib import Path
from shutil import copyfile
from tempfile import NamedTemporaryFile
sourcefile = Path("/path/to/source").resolve()
insert_lineno = 152 # The line to insert the new data into.
insert_data = "..." # Some string to insert.
with sourcefile.open(mode="r") as source:
destination = NamedTemporaryFile(mode="w", dir=str(sourcefile.parent))
lineno = 1
while lineno < insert_lineno:
destination.file.write(source.readline())
lineno += 1
# Insert the new data.
destination.file.write(insert_data)
# Write the rest in chunks.
while True:
data = source.read(1024)
if not data:
break
destination.file.write(data)
# Finish writing data.
destination.flush()
# Overwrite the original file's contents with that of the temporary file.
# This uses a memory-optimised copy operation starting from Python 3.8.
copyfile(destination.name, str(sourcefile))
# Delete the temporary file.
destination.close()
EDIT 2020-09-08: I just found an answer on Code Review that does something similar to above with more explanation - it might be useful to some.

You don't show us what the output should look like, so one possible interpretation is that you want this as the output:
Alfred
Bill
Charlie
Donald
(Insert Charlie, then add 1 to all subsequent lines.) Here's one possible solution:
def insert_line(input_stream, pos, new_name, output_stream):
inserted = False
for line in input_stream:
number, name = parse_line(line)
if number == pos:
print >> output_stream, format_line(number, new_name)
inserted = True
print >> output_stream, format_line(number if not inserted else (number + 1), name)
def parse_line(line):
number_str, name = line.strip().split()
return (get_number(number_str), name)
def get_number(number_str):
return int(number_str.split('.')[0])
def format_line(number, name):
return add_dot(number) + ' ' + name
def add_dot(number):
return str(number) + '.'
input_stream = open('input.txt', 'r')
output_stream = open('output.txt', 'w')
insert_line(input_stream, 3, 'Charlie', output_stream)
input_stream.close()
output_stream.close()

Parse the file into a python list using file.readlines() or file.read().split('\n')
Identify the position where you have to insert a new line, according to your criteria.
Insert a new list element there using list.insert().
Write the result to the file.

location_of_line = 0
with open(filename, 'r') as file_you_want_to_read:
#readlines in file and put in a list
contents = file_you_want_to_read.readlines()
#find location of what line you want to insert after
for index, line in enumerate(contents):
if line.startswith('whatever you are looking for')
location_of_line = index
#now you have a list of every line in that file
context.insert(location_of_line, "whatever you want to append to middle of file")
with open(filename, 'w') as file_to_write_to:
file_to_write_to.writelines(contents)
That is how I ended up getting whatever data I want to insert to the middle of the file.
this is just pseudo code, as I was having a hard time finding clear understanding of what is going on.
essentially you read in the file to its entirety and add it into a list, then you insert your lines that you want to that list, and then re-write to the same file.
i am sure there are better ways to do this, may not be efficient, but it makes more sense to me at least, I hope it makes sense to someone else.

A simple but not efficient way is to read the whole content, change it and then rewrite it:
line_index = 3
lines = None
with open('file.txt', 'r') as file_handler:
lines = file_handler.readlines()
lines.insert(line_index, 'Charlie')
with open('file.txt', 'w') as file_handler:
file_handler.writelines(lines)

I write this in order to reutilize/correct martincho's answer (accepted one)
! IMPORTANT: This code loads all the file into ram and rewrites content to the file
Variables index, value may be what you desire, but pay attention to making value string and end with '\n' if you don't want it to mess with existing data.
with open("path_to_file", "r+") as f:
# Read the content into a variable
contents = f.readlines()
contents.insert(index, value)
# Reset the reader's location (in bytes)
f.seek(0)
# Rewrite the content to the file
f.writelines(contents)
See the python docs about file.seek method: Python docs

Below is a slightly awkward solution for the special case in which you are creating the original file yourself and happen to know the insertion location (e.g. you know ahead of time that you will need to insert a line with an additional name before the third line, but won't know the name until after you've fetched and written the rest of the names). Reading, storing and then re-writing the entire contents of the file as described in other answers is, I think, more elegant than this option, but may be undesirable for large files.
You can leave a buffer of invisible null characters ('\0') at the insertion location to be overwritten later:
num_names = 1_000_000 # Enough data to make storing in a list unideal
max_len = 20 # The maximum allowed length of the inserted line
line_to_insert = 2 # The third line is at index 2 (0-based indexing)
with open(filename, 'w+') as file:
for i in range(line_to_insert):
name = get_name(i) # Returns 'Alfred' for i = 0, etc.
file.write(F'{i + 1}. {name}\n')
insert_position = file.tell() # Position to jump back to for insertion
file.write('\0' * max_len + '\n') # Buffer will show up as a blank line
for i in range(line_to_insert, num_names):
name = get_name(i)
file.write(F'{i + 2}. {name}\n') # Line numbering now bumped up by 1.
# Later, once you have the name to insert...
with open(filename, 'r+') as file: # Must use 'r+' to write to middle of file
file.seek(insert_position) # Move stream to the insertion line
name = get_bonus_name() # This lucky winner jumps up to 3rd place
new_line = F'{line_to_insert + 1}. {name}'
file.write(new_line[:max_len]) # Slice so you don't overwrite next line
Unfortunately there is no way to delete-without-replacement any excess null characters that did not get overwritten (or in general any characters anywhere in the middle of a file), unless you then re-write everything that follows. But the null characters will not affect how your file looks to a human (they have zero width).

Related

filtering a weird text file in python

I have a text file in which each ID line starts with > and the next line(s) are the a sequence of characters. And the next line after the sequence of characters would be an other ID line starting with >. but in some of them, instead of sequence I have “Sequence unavailable”. The sequence after the ID line can be one or more lines.
like this example:
>ENSG00000173153|ENST00000000442|64073050;64074640|64073208;64074651
AAGCAGCCGGCGGCGCCGCCGAGTGAGGGGACGCGGCGCGGTGGGGCGGCGCGGCCCGAGGAGGCGGCGGAGGAGGGGCCGCCCGCGGCCCCCGGCTCACTCCGGCACTCCGGGCCGCTC
>ENSG00000004139|ENST00000003834
Sequence unavailable
I want to filter out those IDs with “Sequence unavailable”. The output should look like this:
output:
>ENSG00000173153|ENST00000000442|64073050;64074640|64073208;64074651
AAGCAGCCGGCGGCGCCGCCGAGTGAGGGGACGCGGCGCGGTGGGGCGGCGCGGCCCGAGGAGGCGGCGGAGGAGGGGCCGCCCGCGGCCCCCGGCTCACTCCGGCACTCCGGGCCGCTC
do you know how to do that in python?
Unlike the other answers, I’d strongly recommand against parsing the FASTA format manually. It’s not too hard but there are pitfalls, and it’s completely unnecessary since efficient, well-tested implementations exist:
Use Bio.SeqIO from BioPython; for example:
from Bio import SeqIO
for record in SeqIO.parse(filename, 'fasta'):
if record.seq != 'Sequenceunavailable':
SeqIO.write(record, outfile, 'fasta')
Note the missing space in 'Sequenceunavailable': reading the sequences in FASTA format will omit spaces.
How about this:
with open(filename, 'r+') as f:
data = f.read()
data = data.split('>')
result = ['>{}'.format(item) for item in data if item and 'Sequence unavailable' not in item]
f.seek(0)
for line in result:
f.write(line)
def main():
filename = open('text.txt', 'rU').readlines()
filterFile(filename)
def filterFile(SequenceFile):
outfile = open('outfile', 'w')
for line in SequenceFile:
if line.startswith('>'):
sequence = line.next()
if sequence.startswith('Sequence unavailable'):
//nothing should happen I suppose?
else:
outfile.write(line + "\n" + sequence + "\n")
main()
I unfortunately can't test this code right now but I made this out of the top of my head! Please test it and let me know what the outcome is so I can adjust the code :-)
So I don't exactly know how large these files will get, just in case, I'm doing it without mapping the file in memory:
with open(filename) as fh:
with open(filename+'.new', 'w+') as fh_new:
for idline, geneseq in zip(*[iter(fh)] * 2):
if geneseq.strip() != 'Sequence unavailable':
fh_new.write(idline)
fh_new.write(geneseq)
It works by creating a new file, then the zip thing is some magic to read the 2 lines of the file, the idline will be the first part and the geneseq the second part.
This solution should be relatively cheap in computer power but will create an extra output file.

I want to replace a certain column of a file with a list - Python

I have a stock file which looks like this:
12334232:seat belt:2.30:12:10:30
14312332:toy card:3.40:52:10:30
12512312:xbox one:5.30:23:10:30
12543243:laptop:1.34:14:10:30
65478263:banana:1.23:23:10:30
27364729:apple:4.23:42:10:30
28912382:orange:1.12:16:10:30
12892829:elephant:6.45:14:10:30
I want to replace the items in the fourth column if they are below the numbers in the fifth column after a certain transaction to the numbers in the sxith column. How would I replace the items in the fourth column?
Everytime I use the following lines of code below, it overwrites the whole file with nothing (deletes everything)
for line in stockfile:
c=line.split(":")
print("pass")
if stock_order[i] == User_list[i][0]:
stockfile.write(line.replace(current_stocklevel_list[i], reorder_order[i] ) )
else:
i = i + 1
I want the stockfile to look like this after it has replaced the necessary items in the column:
12334232:seat belt:2.30:30:10:30
14312332:toy card:3.40:30:10:30
12512312:xbox one:5.30:30:10:30
12543243:laptop:1.34:30::10:30
65478263:banana:1.23:30:10:30
27364729:apple:4.23:30:10:30
28912382:orange:1.12:30:10:30
12892829:elephant:6.45:30:10:30
If you are opening file after some time, you should use "a" (append) as a mode so that file doesn't get truncated.
Write pointer will automatically be on the end of file.
So:
f = open("filename", "a")
f.seek(0) # To start from beginning
But if you want to read and write, then add "+" to the mode and file wouldn't be truncated as well.
f = open("filename", "r+")
Both read and write pointers will be at the beginning of file, you'll need to seek only onto position where you wish to start writing/reading.
But you are doing it wrong.
See, file's content will be overwritten, not inserted automatically.
If you are in writable mode and at the end of file content will be added.
So, you either need to load whole file, make changes you need and write everything back.
Or, you have to write changes at some point and shift remaining content truncating the file if the content is shorter than before.
mmap module can help you to treat file as a string,. You will be able to efficiently shift data and to resize the file.
But, if you really want to change file in place, you should have the file with fixed length of columns. So, when you want to change a value, you do not need to shift anything back and forth. Just find the right row and col, seek there, write new value over the old one (making sure to delete all of the old) and that is just that.
You should try to read in the data first:
with open('inputfile', 'r') as infile:
data = infile.readlines()
Then you can loop over the data and edit as needed and write it out:
with open('outputfile', 'w') as outfile:
for line in data:
c = line.split(":")
if random.randint(1,3) == 1:
# update fourth column based on some good reason
c[3] += 2
outfile.write(':'.join(c) + '\n')
Or you could do it on go with something like:
with open('inputfile', 'r') as infile, open('outputfile', 'w') as outfile:
line = infile.readline()
c = line.split(":")
if random.randint(1,3) == 1:
# update fourth column based on some good reason
c[3] += 2
outfile.write(':'.join(c) + '\n')
os.rename('outputfile', 'inputfile')

Python3 - dumping a JSON data into penultimate line of a file [duplicate]

Is there a way to do this? Say I have a file that's a list of names that goes like this:
Alfred
Bill
Donald
How could I insert the third name, "Charlie", at line x (in this case 3), and automatically send all others down one line? I've seen other questions like this, but they didn't get helpful answers. Can it be done, preferably with either a method or a loop?
This is a way of doing the trick.
with open("path_to_file", "r") as f:
contents = f.readlines()
contents.insert(index, value)
with open("path_to_file", "w") as f:
contents = "".join(contents)
f.write(contents)
index and value are the line and value of your choice, lines starting from 0.
If you want to search a file for a substring and add a new text to the next line, one of the elegant ways to do it is the following:
import os, fileinput
old = "A"
new = "B"
for line in fileinput.FileInput(file_path, inplace=True):
if old in line :
line += new + os.linesep
print(line, end="")
There is a combination of techniques which I found useful in solving this issue:
with open(file, 'r+') as fd:
contents = fd.readlines()
contents.insert(index, new_string) # new_string should end in a newline
fd.seek(0) # readlines consumes the iterator, so we need to start over
fd.writelines(contents) # No need to truncate as we are increasing filesize
In our particular application, we wanted to add it after a certain string:
with open(file, 'r+') as fd:
contents = fd.readlines()
if match_string in contents[-1]: # Handle last line to prevent IndexError
contents.append(insert_string)
else:
for index, line in enumerate(contents):
if match_string in line and insert_string not in contents[index + 1]:
contents.insert(index + 1, insert_string)
break
fd.seek(0)
fd.writelines(contents)
If you want it to insert the string after every instance of the match, instead of just the first, remove the else: (and properly unindent) and the break.
Note also that the and insert_string not in contents[index + 1]: prevents it from adding more than one copy after the match_string, so it's safe to run repeatedly.
You can just read the data into a list and insert the new record where you want.
names = []
with open('names.txt', 'r+') as fd:
for line in fd:
names.append(line.split(' ')[-1].strip())
names.insert(2, "Charlie") # element 2 will be 3. in your list
fd.seek(0)
fd.truncate()
for i in xrange(len(names)):
fd.write("%d. %s\n" %(i + 1, names[i]))
The accepted answer has to load the whole file into memory, which doesn't work nicely for large files. The following solution writes the file contents with the new data inserted into the right line to a temporary file in the same directory (so on the same file system), only reading small chunks from the source file at a time. It then overwrites the source file with the contents of the temporary file in an efficient way (Python 3.8+).
from pathlib import Path
from shutil import copyfile
from tempfile import NamedTemporaryFile
sourcefile = Path("/path/to/source").resolve()
insert_lineno = 152 # The line to insert the new data into.
insert_data = "..." # Some string to insert.
with sourcefile.open(mode="r") as source:
destination = NamedTemporaryFile(mode="w", dir=str(sourcefile.parent))
lineno = 1
while lineno < insert_lineno:
destination.file.write(source.readline())
lineno += 1
# Insert the new data.
destination.file.write(insert_data)
# Write the rest in chunks.
while True:
data = source.read(1024)
if not data:
break
destination.file.write(data)
# Finish writing data.
destination.flush()
# Overwrite the original file's contents with that of the temporary file.
# This uses a memory-optimised copy operation starting from Python 3.8.
copyfile(destination.name, str(sourcefile))
# Delete the temporary file.
destination.close()
EDIT 2020-09-08: I just found an answer on Code Review that does something similar to above with more explanation - it might be useful to some.
You don't show us what the output should look like, so one possible interpretation is that you want this as the output:
Alfred
Bill
Charlie
Donald
(Insert Charlie, then add 1 to all subsequent lines.) Here's one possible solution:
def insert_line(input_stream, pos, new_name, output_stream):
inserted = False
for line in input_stream:
number, name = parse_line(line)
if number == pos:
print >> output_stream, format_line(number, new_name)
inserted = True
print >> output_stream, format_line(number if not inserted else (number + 1), name)
def parse_line(line):
number_str, name = line.strip().split()
return (get_number(number_str), name)
def get_number(number_str):
return int(number_str.split('.')[0])
def format_line(number, name):
return add_dot(number) + ' ' + name
def add_dot(number):
return str(number) + '.'
input_stream = open('input.txt', 'r')
output_stream = open('output.txt', 'w')
insert_line(input_stream, 3, 'Charlie', output_stream)
input_stream.close()
output_stream.close()
Parse the file into a python list using file.readlines() or file.read().split('\n')
Identify the position where you have to insert a new line, according to your criteria.
Insert a new list element there using list.insert().
Write the result to the file.
location_of_line = 0
with open(filename, 'r') as file_you_want_to_read:
#readlines in file and put in a list
contents = file_you_want_to_read.readlines()
#find location of what line you want to insert after
for index, line in enumerate(contents):
if line.startswith('whatever you are looking for')
location_of_line = index
#now you have a list of every line in that file
context.insert(location_of_line, "whatever you want to append to middle of file")
with open(filename, 'w') as file_to_write_to:
file_to_write_to.writelines(contents)
That is how I ended up getting whatever data I want to insert to the middle of the file.
this is just pseudo code, as I was having a hard time finding clear understanding of what is going on.
essentially you read in the file to its entirety and add it into a list, then you insert your lines that you want to that list, and then re-write to the same file.
i am sure there are better ways to do this, may not be efficient, but it makes more sense to me at least, I hope it makes sense to someone else.
A simple but not efficient way is to read the whole content, change it and then rewrite it:
line_index = 3
lines = None
with open('file.txt', 'r') as file_handler:
lines = file_handler.readlines()
lines.insert(line_index, 'Charlie')
with open('file.txt', 'w') as file_handler:
file_handler.writelines(lines)
I write this in order to reutilize/correct martincho's answer (accepted one)
! IMPORTANT: This code loads all the file into ram and rewrites content to the file
Variables index, value may be what you desire, but pay attention to making value string and end with '\n' if you don't want it to mess with existing data.
with open("path_to_file", "r+") as f:
# Read the content into a variable
contents = f.readlines()
contents.insert(index, value)
# Reset the reader's location (in bytes)
f.seek(0)
# Rewrite the content to the file
f.writelines(contents)
See the python docs about file.seek method: Python docs
Below is a slightly awkward solution for the special case in which you are creating the original file yourself and happen to know the insertion location (e.g. you know ahead of time that you will need to insert a line with an additional name before the third line, but won't know the name until after you've fetched and written the rest of the names). Reading, storing and then re-writing the entire contents of the file as described in other answers is, I think, more elegant than this option, but may be undesirable for large files.
You can leave a buffer of invisible null characters ('\0') at the insertion location to be overwritten later:
num_names = 1_000_000 # Enough data to make storing in a list unideal
max_len = 20 # The maximum allowed length of the inserted line
line_to_insert = 2 # The third line is at index 2 (0-based indexing)
with open(filename, 'w+') as file:
for i in range(line_to_insert):
name = get_name(i) # Returns 'Alfred' for i = 0, etc.
file.write(F'{i + 1}. {name}\n')
insert_position = file.tell() # Position to jump back to for insertion
file.write('\0' * max_len + '\n') # Buffer will show up as a blank line
for i in range(line_to_insert, num_names):
name = get_name(i)
file.write(F'{i + 2}. {name}\n') # Line numbering now bumped up by 1.
# Later, once you have the name to insert...
with open(filename, 'r+') as file: # Must use 'r+' to write to middle of file
file.seek(insert_position) # Move stream to the insertion line
name = get_bonus_name() # This lucky winner jumps up to 3rd place
new_line = F'{line_to_insert + 1}. {name}'
file.write(new_line[:max_len]) # Slice so you don't overwrite next line
Unfortunately there is no way to delete-without-replacement any excess null characters that did not get overwritten (or in general any characters anywhere in the middle of a file), unless you then re-write everything that follows. But the null characters will not affect how your file looks to a human (they have zero width).

Writing certain contents from one file to another in python

I have a file that I need to write certain contents to a new file.
The current contents is as follows:
send from #1373846594 to pool/10.0.68.61#1374451276 estimated size is 7.83G
send from #1374451276 to pool/10.0.68.61#1375056084 estimated size is 10.0G
I need the new file to show:
#1373846594 --> pool/10.0.68.61#1374451276 --> 7.83G
#1374451276 --> pool/10.0.68.61#1375056084 --> 10.0G
I have tried:
with open("file", "r") as drun:
for _,_,snap,_,pool_,_,_,size in zip(*[iter(drun)]*9):
drun.write("{0}\t{1}\t{2}".format(snap,pool,size))
I know I am either way off or just not quite there but I am not sure where to go next with this. Any help would be appreciated.
You want to split your lines using str.split(), and you'll need to write to another file first, then move that back into place; reading and writing to the same file is tricky and should be avoided unless you are working with fixed record sizes.
However, the fileinput module makes in-place file editing easy enough:
import fileinput
for line in fileinput.input(filename, inplace=True):
components = line.split()
snap, pool, size = components[2], components[4], components[-1]
print '\t'.join((snap,pool,size))
The print statement writes to sys.stdout, which fileinput conveniently redirects when inplace=True is set. This means you are writing to the output file (that replaces the original input file), writing a bonus newline on every loop too.
inf = open(file)
outf = open(outfile,'w')
for line in inf:
parts = line.split()
outf.write("{0}-->{1}-->{2}".format(parts[2], parts[4], parts[8]))
inf.close()
outf.close()
Perhaps something simple using a regex pattern match:
with open('output_file', 'w') as outFile:
for line in open('input_file'):
line = line.split()
our_patterns = [i for i in line if re.search('^#', i) or \
re.search('^pool', i) or \
re.search('G$', i)]
outFile.write(' --> '.join(our_patterns) + '\n')
The pattern matching will extract any parts that begin with # or pool, as well as the final size that ends with G. These parts are then joined with the --> and written to file. Hope this helps
SOURCE, DESTINATION, SIZE = 2, 4, 8
with open('file.txt') as drun:
for line in drun:
pieces = line.split()
print(pieces[SOURCE], pieces[DESTINATION], pieces[SIZE], sep=' --> ', file=open('log.txt', 'a'))

Fastest Way to Delete a Line from Large File in Python

I am working with a very large (~11GB) text file on a Linux system. I am running it through a program which is checking the file for errors. Once an error is found, I need to either fix the line or remove the line entirely. And then repeat...
Eventually once I'm comfortable with the process, I'll automate it entirely. For now however, let's assume I'm running this by hand.
What would be the fastest (in terms of execution time) way to remove a specific line from this large file? I thought of doing it in Python...but would be open to other examples. The line might be anywhere in the file.
If Python, assume the following interface:
def removeLine(filename, lineno):
Thanks,
-aj
You can have two file objects for the same file at the same time (one for reading, one for writing):
def removeLine(filename, lineno):
fro = open(filename, "rb")
current_line = 0
while current_line < lineno:
fro.readline()
current_line += 1
seekpoint = fro.tell()
frw = open(filename, "r+b")
frw.seek(seekpoint, 0)
# read the line we want to discard
fro.readline()
# now move the rest of the lines in the file
# one line back
chars = fro.readline()
while chars:
frw.writelines(chars)
chars = fro.readline()
fro.close()
frw.truncate()
frw.close()
Modify the file in place, offending line is replaced with spaces so the remainder of the file does not need to be shuffled around on disk. You can also "fix" the line in place if the fix is not longer than the line you are replacing
import os
from mmap import mmap
def removeLine(filename, lineno):
f=os.open(filename, os.O_RDWR)
m=mmap(f,0)
p=0
for i in range(lineno-1):
p=m.find('\n',p)+1
q=m.find('\n',p)
m[p:q] = ' '*(q-p)
os.close(f)
If the other program can be changed to output the fileoffset instead of the line number, you can assign the offset to p directly and do without the for loop
As far as I know, you can't just open a txt file with python and remove a line. You have to make a new file and move everything but that line to it. If you know the specific line, then you would do something like this:
f = open('in.txt')
fo = open('out.txt','w')
ind = 1
for line in f:
if ind != linenumtoremove:
fo.write(line)
ind += 1
f.close()
fo.close()
You could of course check the contents of the line instead to determine if you want to keep it or not. I also recommend that if you have a whole list of lines to be removed/changed to do all those changes in one pass through the file.
If the lines are variable length then I don't believe that there is a better algorithm than reading the file line by line and writing out all lines, except for the one(s) that you do not want.
You can identify these lines by checking some criteria, or by keeping a running tally of lines read and suppressing the writing of the line(s) that you do not want.
If the lines are fixed length and you want to delete specific line numbers, then you may be able to use seek to move the file pointer... I doubt you're that lucky though.
Update: solution using sed as requested by poster in comment.
To delete for example the second line of file:
sed '2d' input.txt
Use the -i switch to edit in place. Warning: this is a destructive operation. Read the help for this command for information on how to make a backup automatically.
def removeLine(filename, lineno):
in = open(filename)
out = open(filename + ".new", "w")
for i, l in enumerate(in, 1):
if i != lineno:
out.write(l)
in.close()
out.close()
os.rename(filename + ".new", filename)
I think there was a somewhat similar if not exactly the same type of question asked here. Reading (and writing) line by line is slow, but you can read a bigger chunk into memory at once, go through that line by line skipping lines you don't want, then writing this as a single chunk to a new file. Repeat until done. Finally replace the original file with the new file.
The thing to watch out for is when you read in a chunk, you need to deal with the last, potentially partial line you read, and prepend that into the next chunk you read.
#OP, if you can use awk, eg assuming line number is 10
$ awk 'NR!=10' file > newfile
I will provide two alternatives based on the look-up factor (line number or a search string):
Line number
def removeLine2(filename, lineNumber):
with open(filename, 'r+') as outputFile:
with open(filename, 'r') as inputFile:
currentLineNumber = 0
while currentLineNumber < lineNumber:
inputFile.readline()
currentLineNumber += 1
seekPosition = inputFile.tell()
outputFile.seek(seekPosition, 0)
inputFile.readline()
currentLine = inputFile.readline()
while currentLine:
outputFile.writelines(currentLine)
currentLine = inputFile.readline()
outputFile.truncate()
String
def removeLine(filename, key):
with open(filename, 'r+') as outputFile:
with open(filename, 'r') as inputFile:
seekPosition = 0
currentLine = inputFile.readline()
while not currentLine.strip().startswith('"%s"' % key):
seekPosition = inputFile.tell()
currentLine = inputFile.readline()
outputFile.seek(seekPosition, 0)
currentLine = inputFile.readline()
while currentLine:
outputFile.writelines(currentLine)
currentLine = inputFile.readline()
outputFile.truncate()

Categories