I have text that is key-value pairs separated by '='. I would like to replace the line if the key matches. if not, i would like to append it at the bottom. I've tried several ways, including:
def split_command_key_and_value(command):
if '=' in command:
command2 = command.split('=')
return command2
def test(command, path):
command2 = split_command_key_and_value(command)
pattern = command2[0]
myfile = open(path,'r') # open file handle for read
# use r'', you don't need to replace '\' with '/'
result = open(path, 'w') # open file handle for write
for line in myfile:
line = line.strip() # it's always a good behave to strip what you read from files
if pattern in line:
line = command # if match, replace line
result.write(line) # write every line
myfile.close() # don't forget to close file handle
result.close()
I know the above is just to replace text, but it deletes the text in the file, and I can't see why. Could someone point me in the right direction?
Thanks
Update:
I'm almost there, but some of my lines have similar keys, so mutiple lines are matching when only 1 should. I've tried to incorporate a regex boundary in my loop with no luck. My code is below. Does anyone have a suggestion?
There is some text in the file that isn't key-value, so I would like to skip that.
def modify(self, name, value):
comb = name + ' ' + '=' + ' ' + value + '\n'
with open('/file/', 'w') as tmpstream:
with open('/file/', 'r') as stream:
for line in stream:
if setting_name in line:
tmpstream.write(comb)
else:
tmpstream.write(line)
I think I got it. See code below.
def modify(self, name, value):
comb = name + ' ' + '=' + ' ' + value + '\n'
mylist = []
with open('/file/', 'w') as tmpstream:
with open('/file/', 'r') as stream:
for line in stream:
a = line.split()
b = re.compile('\\b'+name+'\\b')
if len(a) > 0:
if b.search(a[0]):
tmpstream.write(comb)
else:
tmpstream.write(line)
I spoke too soon. It stops at the key-value I provide. So, it only writes one line, and doesn't write the lines that don't match.
def modify(name, value):
comb = name + ' ' + '=' + ' ' + value + '\n'
mylist = []
with open('/file1', 'w') as tmpstream:
with open('/file2', 'r') as stream:
for line in stream:
a = line.split()
b = re.compile('\\b'+name+'\\b')
if len(a) > 0:
if b.search(a[0]):
tmpstream.write(comb)
else:
tmpstream.write(line)
Can anyone see the issue?
Because when you open file for writing
result = open(path, 'w') # open file handle for write
you just erase it content. Try to write in different file and after all work done replace old file with new one. Or read all data into memory and then process it and write to file.
with open(path) as f:
data = f.read()
with open(path, 'w') as f:
for l in data:
# make job here
first of all you are reading an writing the same file ...
you could first read it all and the write line by line
with open(path,'r') as f:
myfile = f.read() # read everything in the variable "myfile"
result = open(path, 'w') # open file handle for write
for line in myfile.splitlines(): # process the original file content 1 line at a time
# as before
I strongly recommend reading python's documentation on how to read and write files.
If you open an existing file in write-mode open(path, 'w'), its content will be erased:
mode can be (...) 'w' for only writing (an existing file with the same name will be erased)
To replace a line in python you can have a look at this: Search and replace a line in a file in Python
Here is one the solutions provided there adapted to your context (tested for python3):
from tempfile import mkstemp
from shutil import move
from os import close
def test(filepath, command):
# Split command into key/value
key, _ = command.split('=')
matched_key = False
# Create a temporary file
fh, tmp_absolute_path = mkstemp()
with open(tmp_absolute_path, 'w') as tmp_stream:
with open(filepath, 'r') as stream:
for line in stream:
if key in line:
matched_key = True
tmp_stream.write(command + '\n')
else:
tmp_stream.write(line)
if not matched_key:
tmp_stream.write(command + '\n')
close(fh)
move(tmp_absolute_path, filepath)
Note that with the code above every line that matches key (key=blob or blob=key) will be replaced.
Related
I need to create a file that changes the date and name of a .txt, but I can only change one or the other with this code I found on the internet, can anyone give me any tips?
Print
import os
from ast import Str
file = open("example.txt", "r")
replacement = ""
data = "02/07/2022"
name = "Alan"
for line in file:
line = line.strip()
changes = line.replace("__/__/____", data)
replacement = replacement + changes + "\n"
file.close()
fout = open("final.txt", "w")
fout.write(replacement)
fout.close()
You don't need to do this a line a time. You can replace that entire program with this:
data = "02/07/2022"
name = "Alan"
text = open("example.txt", "r").read().replace("__/__/____", data)
open("final.txt", "w").write(text)
I want to search for particular text and replace the line if the text is present in that line.
In this code I replace line 125, but want to replace dynamically according to the text:
file = open("config.ini", "r")
lines = file.readlines()
lines[125] = "minimum_value_gain = 0.01" + '\n'
f.writelines(lines)
f.close()
How do I make it that if a line has:
minimum_value_gain =
then replace that line with:
minimum_value_gain = 0.01
There is no reason for you to manually parse a config.ini file textually. You should use configparser to make things much simpler. This library reads the file for you, and in a way converts it to a dict so processing the data is much easier. For your task you can do something like:
import configparser
config = configparser.ConfigParser()
config.read("config.ini")
for section in config:
if config.has_option(section, "minimum_value_gain"):
config.set(section, "minimum_value_gain", "0.01")
with open("config.ini", 'w') as f:
config.write(f)
Since you are replacing complete line so if statement will do the trick for you, no need to replace text
#updated make sure one line doesn't have both values
file = open("config.ini", "r")
lines=file.readlines()
newlines = []
for line in lines:
if "minimum_value_gain" in line:
line = "minimum_value_gain = 0.01" + '\n'
if "score_threshold" in line:
line = "Values you want to add"+'\n'
newlines.append(line)
f.writelines(newlines)
f.close()
Little bit messy and not optimized but get's the job the, first readlines and inserts the next_text to the given pos(line). If the line doesn't exists Raises IndexError, else writes to the file
def replace_in_file(filename: str, search_text: str, string_to_add: str) -> None:
with open(filename, "r+") as file_to_write:
lines = file_to_write.readlines()
file_to_write.seek(0)
file_to_write.truncate()
for line in lines:
if line.startswith(search_text):
line = line.rstrip("\n") + string_to_add + "\n"
file_to_write.write(line)
replace_in_file("sdf.txt", "minimum_value_gain", " = 0.01")
You can use also the regex library of Python.
Here is an example.
It is better not to read and write in the same file, that is not good practice. Write in a different file then eventually rename it.
import re
pattern = 'minimum_value_gain'
string_to_replace = 'minimum_value_gain = 0.01\n'
file = open("config.ini", "r")
fileout = open("new_config.ini", "a")
lines=file.readlines()
newlines = [string_to_replace if re.match(pattern, line) else line for line in lines]
f.close()
fileout.writelines(lines)
fileout.close()
You can rename the file afterwards :
import os
os.remove("config.ini")
os.rename("new_config.ini", "config.ini")
Set the string you would like to look for (match_string = 'example')
Have a list output_list that is empty
Use with open(x,y) as z: (this will automatically close the file after completion)
for each line in file.readlines() - run through each line of the file
The if statement adds your replacement line if the match_string is in the line, else just the adds the line
NOTE: All variables can be any name that is not reserved (don't call something just 'list')
match_string = 'example'
output_list = []
with open("config.ini", "r") as file:
for line in file.readlines():
if match_string in line:
output_list.append('minimum_value_gain = 0.01\n')
else:
output_list.append(line)
Maybe not ideal for the first introduction to Python (or more readable) - But I would have done the problem as follows:
with open('config.ini', 'r') as in_file:
out_file = ['minimum_value_gain = 0.01\n' if 'example' in line else line for line in in_file.readlines()]
To replace a specific text in a string
a = 'My name is Zano'
b = a.replace('Zano', 'Zimmer')
I'm new to python and I'm trying various small things to see how they work:
items = dict()
with open(path) as f:
content = f.readlines()
for line in content:
splitStuff = line.split('!')
if splitStuff[0] in item:
items[splitStuff[0]] += ',' + Results[1]
else:
items[splitStuff[0]] = Results[1]
f.close()
with open(path2, 'a') as f:
for key, value in items.items():
f.write(key + '!' + value)
f.close()
It opens a file with this content:
3!Angel
3!Devil
4!Nasko
4!Pesho
4!Gosho
5!Kalin
6!Gancho
6!Boncho
6!Toncho
6!Moncho
And ends up writing a file with this content:
3!Angel
,Devil
4!Nasko
,Pesho
,Gosho
5!Kalin
6!Gancho
,Boncho
,Toncho
,Moncho
The part I don't understand is where are those new lines appearing from every time I edit a value?
EDIT: This is the desired output.
3!Angel,Devil
4!Nasko,Pesho,Gosho
5!Kalin
6!Gancho,Boncho,Toncho,Moncho
EDIT2: Never mind figured it out. Its because there are new lines in the original file and apparently reading file line by line catches them as well in python, unlike c# where they are ignored.
Lines you read with readlines() have a trailing newline.
for line in content:
line = line.rstrip()
splitStuff = line.split('!')
... etc ...
A solution could look like this:
path = "file1"
path2 = "file2"
items = dict()
with open(path) as f:
content = f.readlines()
for line in content:
splitStuff = line.split('!')
if splitStuff[0] in items:
items[splitStuff[0]] += ',' + splitStuff[1][:-1]
else:
items[splitStuff[0]] = splitStuff[1][:-1]
f.close()
with open(path2, 'a') as f:
for key, value in items.items():
f.write(key + '!' + value)
f.write("\n")
f.close()
You just had to remove the newline from each line of the file by adding [:-1].
I'm just starting to learn python and have a textfile that looks like this:
Hello
World
Hello
World
And I want to add the numbers '55' to the beggining and end of every string that starts with 'hello'
The numbers '66' to the beggining and every of every string that starts with 'World'
etc
So my final file should look like this:
55Hello55
66World66
55Hello55
66World66
I'm reading the file in all at once, storing it in a string, and then trying to append accordingly
fp = open("test.txt","r")
strHolder = fp.read()
print(strHolder)
if 'Hello' in strHolder:
strHolder = '55' + strHolder + '55'
if 'World' in strHolder:
strHolder = '66' + strHolder + '66'
print(strHolder)
fp.close()
However, my string values '55' and '66' are always being added to the front of the file and end of the file, not the front of a certain string and to the end of the string, where I get this output of the string:
6655Hello
World
Hello
World
5566
Any help would be much appreciated.
You are reading the whole file at once with .read().
You can read it line by line in a for loop.
new_file = []
fp = open("test.txt", "r")
for line in fp:
line = line.rstrip("\n") # The string ends in a newline
# str.rstrip("\n") removes newlines at the end
if "Hello" in line:
line = "55" + line + "55"
if "World" in line:
line = "66" + line + "66"
new_file.append(line)
fp.close()
new_file = "\n".join(new_file)
print(new_file)
You could do it all at once, by reading the whole file and splitting by "\n" (newline)
new_file = []
fp = open("text.txt")
fp_read = fp.read()
fp.close()
for line in fp_read.split("\n"):
if "Hello" # ...
but this would load the whole file into memory at once, while the for loop only loads line by line (So this may not work for larger files).
The behaviour of this is that if the line has "Hello" in it, it will get "55" before and after it (even if the line is " sieohfoiHellosdf ") and the same for "World", and if it has both "Hello" and "World" (e.g. "Hello, World!" or "asdifhoasdfhHellosdjfhsodWorldosadh") it will get "6655" before and after it.
Just as a side note: You should use with to open a file as it makes sure that the file is closed later.
new_file = []
with open("test.txt") as fp: # "r" mode is default
for line in fp:
line = line.rstrip("\n")
if "Hello" in line:
line = "55" + line + "55"
if "World" in line:
line = "66" + line + "66"
new_file.append(line)
new_file = "\n".join(new_file)
print(new_file)
You need to iterate over each line of the file in order to get the desired result. In your code you are using .read(), instead use .readlines() to get list of all lines.
Below is the sample code:
lines = []
with open("test.txt", "r") as f:
for line in f.readlines(): # < Iterate over each line
if line.startswith("Hello"): # <-- check if line starts with "Hello"
line = "55{}55".format(line)
elif line.startswith("World"):
line = "66{}66".format(line)
lines.append(line)
print "\n".join(lines)
Why to use with? Check Python doc:
The ‘with‘ statement clarifies code that previously would use try...finally blocks to ensure that clean-up code is executed. In this section, I’ll discuss the statement as it will commonly be used. In the next section, I’ll examine the implementation details and show how to write objects for use with this statement.
The ‘with‘ statement is a control-flow structure whose basic structure is:
with expression [as variable]: with-block
The expression is evaluated, and it should result in an object that supports the context management protocol (that is, has enter() and exit() methods).
once you have read the file:
read_file = read_file.replace('hello','55hello55')
It'll replace all hellos with 55hello55
and use with open(text.txt, 'r' ) as file_hndler:
To read a text file, I recommend the following way which is compatible with Python 2 & 3:
import io
with io.open("test", mode="r", encoding="utf8") as fd:
...
Here, I make the assumption that your file use uft8 encoding.
Using a with statement make sure the file is closed at the end of reading even if a error occurs (an exception). To learn more about context manager, take a look at the Context Library.
There are several ways to read a text file:
read the whole file with: fd.read(), or
read line by line with a loop: for line in fd.
If you read the whole file, you'll need to split the lines (see str.splitlines. Here are the two solutions:
with io.open("test", mode="r", encoding="utf8") as fd:
content = fd.read()
for line in content.splilines():
if "Hello" in line:
print("55" + line + "55")
if "World" in line:
print("66" + line + "66")
Or
with io.open("test", mode="r", encoding="utf8") as fd:
for line in content.splilines():
line = line[:-1]
if "Hello" in line:
print("55" + line + "55")
if "World" in line:
print("66" + line + "66")
If you need to write the result in another file you can open the output file in write mode and use print(thing, file=out) as follow:
with io.open("test", mode="r", encoding="utf8") as fd:
with io.open("test", mode="w", encoding="utf8") as out:
for line in content.splilines():
line = line[:-1]
if "Hello" in line:
print("55" + line + "55", file=out)
if "World" in line:
print("66" + line + "66", file=out)
If you use Python 2, you'll need the following directive to use the print function:
from __future__ import print_function
I'm converting text directly to epub and I'm having a problem automatically splitting the HTML book file into separate header/chapter files. At the moment, the code below partially works but only creates every other chapter file. So half the header/chapter files are missing from the output. Here is the code:
def splitHeaderstoFiles(fpath):
infp = open(fpath, 'rt', encoding=('utf-8'))
for line in infp:
# format and split headers to files
if '<h1' in line:
#-----------format header file names and other stuff ------------#
# create a new file for the header/chapter section
path = os.getcwd() + os.sep + header
with open(path, 'wt', encoding=('utf-8')) as outfp:
# write html top meta headers
outfp = addMetaHeaders(outfp)
# add the header
outfp = outfp.write(line)
# add the chapter/header bodytext
for line in infp:
if '<h1' not in line:
outfp.write(line)
else:
outfp.write('</body>\n</html>')
break
else:
continue
infp.close()
The problem occurs in the second 'for loop' at the bottom of the code, when I look for the next h1 tag to stop the split. I cannot use seek() or tell() to rewind or move back one line so the program can find the next header/chapter on the next iteration. Apparently you cannot use these in python in a for loop containing an implicit iter or next object in operation. Just gives a 'can't do non-zero cur-relative seeks' error.
I've also tried the while line != ' ' + readline() combination in the code which also gives the same error as above.
Does anyone know an easy way to split HTML headers/chapters of varying lengths into separate files in python? Are there any special python modules(such as pickles) that could help make this task easier?
I'm using Python 3.4
My grateful thanks in advance for any solutions to this problem...
I ran into similar problem a while ago, here is a simplified solution:
from itertools import count
chapter_number = count(1)
output_file = open('000-intro.html', 'wb')
with open('index.html', 'rt') as input_file:
for line in input_file:
if '<h1' in line:
output_file.close()
output_file = open('{:03}-chapter'.format(next(chapter_number)), 'wb')
output_file.write(line)
output_file.close()
In this approach, the first block of text leading to the first h1 block is written into 000-intro.html, the first chapter will be written into 001-chapter.html and so on. Please modify it to taste.
The solution is a simple one: Upon encountering the h1 tag, close the last output file and open a new one.
You are looping over your input file twice, which is likely causing your problems:
for line in infp:
...
with open(path, 'wt', encoding=('utf-8')) as outfp:
...
for line in infp:
...
Each for is going to have it's own iterator, so you are going to loop over the file many times.
You might try transforming your for loop into a while so you're not using two different iterators:
while infp:
line = infp.readline()
if '<h1' in line:
with open(...) as outfp:
while infp:
line = infp.readline()
if '<h1' in line:
break
outfp.writeline(...)
Alternatively, you may wish to use an HTML parser (i.e., BeautifulSoup). Then you can do something like what is described here: https://stackoverflow.com/a/8735688/65295.
Update from comment - essentially, read the entire file all at once so you can freely move back or forward as necessary. This probably won't be a performance issue unless you have a really really big file (or very little memory).
lines = infp.readlines() # read the entire file
i = 0
while i < len(lines):
if '<h1' in lines[i]:
with open(...) as outfp:
j = i + 1
while j < len(lines):
if '<h1' in lines[j]:
break
outfp.writeline(lines[j])
# line j has an <h1>, set i to j so we detect the it at the
# top of the next loop iteration.
i = j
else:
i += 1
I eventually found the answer to the above problem. The code below does alot more that just get the file header. It also simultaneously loads two parallel list arrays with formatted file name data(with extension) and pure header name data respectively so I can use these lists to fill in the and formatted filename extension in these html files within a while loop in one hit. The code now works well and is shown below.
def splitHeaderstoFiles(dir, inpath):
count = 1
t_count = 0
out_path = ''
header = ''
write_bodytext = False
file_path_names = []
pure_header_names = []
inpath = dir + os.sep + inpath
with open(inpath, 'rt', encoding=('utf-8')) as infp:
for line in infp:
if '<h1' in line:
#strip html tags, convert to start caps
p = re.compile(r'<.*?>')
header = p.sub('', line)
header = capwords(header)
line_save = header
# Add 0 for count below 10
if count < 10:
header = '0' + str(count) + '_' + header
else:
header = str(count) + '_' + header
# remove all spaces + add extension in header
header = header.replace(' ', '_')
header = header + '.xhtml'
count = count + 1
#create two parallel lists used later
out_path = dir + os.sep + header
outfp = open(out_path, 'wt', encoding=('utf-8'))
file_path_names.insert(t_count, out_path)
pure_header_names.insert(t_count, line_save)
t_count = t_count + 1
# Add html meta headers and write it
outfp = addMainHeaders(outfp)
outfp.write(line)
write_bodytext = True
# add header bodytext
elif write_bodytext == True:
outfp.write(line)
# now add html titles and close the html tails on all files
max_num_files = len(file_path_names)
tmp = dir + os.sep + 'temp1.tmp'
i = 0
while i < max_num_files:
outfp = open(tmp, 'wt', encoding=('utf-8'))
infp = open(file_path_names[i], 'rt', encoding=('utf-8'))
for line in infp:
if '<title>' in line:
line = line.strip(' ')
line = line.replace('<title></title>', '<title>' + pure_header_names[i] + '</title>')
outfp.write(line)
else:
outfp.write(line)
# add the html tail
if '</body>' in line or '</html>' in line:
pass
else:
outfp.write(' </body>' + '\n</html>')
# clean up
infp.close()
outfp.close()
shutil.copy2(tmp, file_path_names[i])
os.remove(tmp)
i = i + 1
# now rename just the title page
if os.path.isfile(file_path_names[0]):
title_page_name = file_path_names[0]
new_title_page_name = dir + os.sep + '01_Title.xhtml'
os.rename(title_page_name, new_title_page_name)
file_path_names[0] = '01_Title.xhtml'
else:
logmsg27(DEBUG_FLAG)
os._exit(0)
# xhtml file is no longer needed
if os.path.isfile(inpath):
os.remove(inpath)
# returned list values are also used
# later to create epub opf and ncx files
return(file_path_names, pure_header_names)
#Hai Vu and #Seth -- Thanks for all your help.