How to replace two different lines of text? - python

I need to create a file that changes the date and name of a .txt, but I can only change one or the other with this code I found on the internet, can anyone give me any tips?
Print
import os
from ast import Str
file = open("example.txt", "r")
replacement = ""
data = "02/07/2022"
name = "Alan"
for line in file:
line = line.strip()
changes = line.replace("__/__/____", data)
replacement = replacement + changes + "\n"
file.close()
fout = open("final.txt", "w")
fout.write(replacement)
fout.close()

You don't need to do this a line a time. You can replace that entire program with this:
data = "02/07/2022"
name = "Alan"
text = open("example.txt", "r").read().replace("__/__/____", data)
open("final.txt", "w").write(text)

Related

Eliminate specific number in a data file using Python

I have a large file and I want to delete all the values '24' within the data file. I have used this code but it doesn't do what I want. Suggestions please. Thanks
This is the data file
24,24,24,24,24,24,1000,1000,24,24,24,1000,1000,1000,1000,24,24,24,24,24,24,24,24,24,24,1000,1000,1000,1000,1000,1000,1000,1000,24,24,24,24,1000,1000,1000,1000,24,1000,24,24,24,24,1000,1000,1000,1000,1000,24,24,24,24,24,24,1000,24,24,24,24,1000,1000,1000,1000,1000,1000,1000,1000,1000,24,24,24,24,1000,1000,1000,1000,24,1000,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,1000,1000,24,24,24,24,24,24,1000,1000,1000,24,24,24,24,1000,1000,1000,1000,1000,1000,1000,1000,1000,24,24,24,24,24,24,24,24,24,24,24,24,24,1000,1000,24,24,24,24,24,24,24,24,24,1000,1000,1000,24,24,24,1000,24,24,1000,1000,24,24,24,24,1000,1000,1000,1000,1000,1000,1000,24,24,24,1000,1000,1000,1000,1000,1000,24,24,24,1000,1000,1000,1000,1000,1000,1000,24,24,24,24,1000,1000,24,1000,1000,24,24,1000,1000,1000,1000,1000,1000,1000,24,24,24,1000,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,1000,1000,24,24,24,1000,1000,1000,1000,1000,24,24,24,24,24,24,24,24,1000,1000,1000,1000,1000,24,24,24,24,24,24,1000,24,24,24,24,24,24,24,24,24,1000,1000,1000,1000,1000,1000,24,24,24,24,24,24,24,24,24,24,1000,1000,1000,24,1000,1000,1000,1000,24,24,1000,1000,24,24,24,24,24,24,24,1000,24,24,24,24,24,24,1000,1000,1000,1000,1000,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,1000,1000,1000,1000,1000
Code
content = open('txt1.txt', 'r').readlines()
cleandata = []
for line in content:
line = {i:None for i in line.replace("\n", "").split()}
for value in line.copy():
if value == "24":
line.pop(value)
cleandata.append(" ".join(line) + "\n")
open('txt2.txt', 'w').writelines(cleandata)
This should do it:
content = open('txt1.txt', 'r').readlines()
cleandata = []
for line in content:
line = line.replace('24','')
cleandata.append(line)
open('txt2.txt', 'w').writelines(cleandata)
You could use a regex for it, to match the 24 and delete it.
import re
regex24 = re.compile(r"\b24,?\b")
f = open('txt1.txt', 'r')
cleans = [regex24.sub("", line) for line in f.readlines()]
open('txt2.txt', 'w').writelines(cleans)

How to replace a line that has a specific text

I want to search for particular text and replace the line if the text is present in that line.
In this code I replace line 125, but want to replace dynamically according to the text:
file = open("config.ini", "r")
lines = file.readlines()
lines[125] = "minimum_value_gain = 0.01" + '\n'
f.writelines(lines)
f.close()
How do I make it that if a line has:
minimum_value_gain =
then replace that line with:
minimum_value_gain = 0.01
There is no reason for you to manually parse a config.ini file textually. You should use configparser to make things much simpler. This library reads the file for you, and in a way converts it to a dict so processing the data is much easier. For your task you can do something like:
import configparser
config = configparser.ConfigParser()
config.read("config.ini")
for section in config:
if config.has_option(section, "minimum_value_gain"):
config.set(section, "minimum_value_gain", "0.01")
with open("config.ini", 'w') as f:
config.write(f)
Since you are replacing complete line so if statement will do the trick for you, no need to replace text
#updated make sure one line doesn't have both values
file = open("config.ini", "r")
lines=file.readlines()
newlines = []
for line in lines:
if "minimum_value_gain" in line:
line = "minimum_value_gain = 0.01" + '\n'
if "score_threshold" in line:
line = "Values you want to add"+'\n'
newlines.append(line)
f.writelines(newlines)
f.close()
Little bit messy and not optimized but get's the job the, first readlines and inserts the next_text to the given pos(line). If the line doesn't exists Raises IndexError, else writes to the file
def replace_in_file(filename: str, search_text: str, string_to_add: str) -> None:
with open(filename, "r+") as file_to_write:
lines = file_to_write.readlines()
file_to_write.seek(0)
file_to_write.truncate()
for line in lines:
if line.startswith(search_text):
line = line.rstrip("\n") + string_to_add + "\n"
file_to_write.write(line)
replace_in_file("sdf.txt", "minimum_value_gain", " = 0.01")
You can use also the regex library of Python.
Here is an example.
It is better not to read and write in the same file, that is not good practice. Write in a different file then eventually rename it.
import re
pattern = 'minimum_value_gain'
string_to_replace = 'minimum_value_gain = 0.01\n'
file = open("config.ini", "r")
fileout = open("new_config.ini", "a")
lines=file.readlines()
newlines = [string_to_replace if re.match(pattern, line) else line for line in lines]
f.close()
fileout.writelines(lines)
fileout.close()
You can rename the file afterwards :
import os
os.remove("config.ini")
os.rename("new_config.ini", "config.ini")
Set the string you would like to look for (match_string = 'example')
Have a list output_list that is empty
Use with open(x,y) as z: (this will automatically close the file after completion)
for each line in file.readlines() - run through each line of the file
The if statement adds your replacement line if the match_string is in the line, else just the adds the line
NOTE: All variables can be any name that is not reserved (don't call something just 'list')
match_string = 'example'
output_list = []
with open("config.ini", "r") as file:
for line in file.readlines():
if match_string in line:
output_list.append('minimum_value_gain = 0.01\n')
else:
output_list.append(line)
Maybe not ideal for the first introduction to Python (or more readable) - But I would have done the problem as follows:
with open('config.ini', 'r') as in_file:
out_file = ['minimum_value_gain = 0.01\n' if 'example' in line else line for line in in_file.readlines()]
To replace a specific text in a string
a = 'My name is Zano'
b = a.replace('Zano', 'Zimmer')

replace line if found or append - python

I have text that is key-value pairs separated by '='. I would like to replace the line if the key matches. if not, i would like to append it at the bottom. I've tried several ways, including:
def split_command_key_and_value(command):
if '=' in command:
command2 = command.split('=')
return command2
def test(command, path):
command2 = split_command_key_and_value(command)
pattern = command2[0]
myfile = open(path,'r') # open file handle for read
# use r'', you don't need to replace '\' with '/'
result = open(path, 'w') # open file handle for write
for line in myfile:
line = line.strip() # it's always a good behave to strip what you read from files
if pattern in line:
line = command # if match, replace line
result.write(line) # write every line
myfile.close() # don't forget to close file handle
result.close()
I know the above is just to replace text, but it deletes the text in the file, and I can't see why. Could someone point me in the right direction?
Thanks
Update:
I'm almost there, but some of my lines have similar keys, so mutiple lines are matching when only 1 should. I've tried to incorporate a regex boundary in my loop with no luck. My code is below. Does anyone have a suggestion?
There is some text in the file that isn't key-value, so I would like to skip that.
def modify(self, name, value):
comb = name + ' ' + '=' + ' ' + value + '\n'
with open('/file/', 'w') as tmpstream:
with open('/file/', 'r') as stream:
for line in stream:
if setting_name in line:
tmpstream.write(comb)
else:
tmpstream.write(line)
I think I got it. See code below.
def modify(self, name, value):
comb = name + ' ' + '=' + ' ' + value + '\n'
mylist = []
with open('/file/', 'w') as tmpstream:
with open('/file/', 'r') as stream:
for line in stream:
a = line.split()
b = re.compile('\\b'+name+'\\b')
if len(a) > 0:
if b.search(a[0]):
tmpstream.write(comb)
else:
tmpstream.write(line)
I spoke too soon. It stops at the key-value I provide. So, it only writes one line, and doesn't write the lines that don't match.
def modify(name, value):
comb = name + ' ' + '=' + ' ' + value + '\n'
mylist = []
with open('/file1', 'w') as tmpstream:
with open('/file2', 'r') as stream:
for line in stream:
a = line.split()
b = re.compile('\\b'+name+'\\b')
if len(a) > 0:
if b.search(a[0]):
tmpstream.write(comb)
else:
tmpstream.write(line)
Can anyone see the issue?
Because when you open file for writing
result = open(path, 'w') # open file handle for write
you just erase it content. Try to write in different file and after all work done replace old file with new one. Or read all data into memory and then process it and write to file.
with open(path) as f:
data = f.read()
with open(path, 'w') as f:
for l in data:
# make job here
first of all you are reading an writing the same file ...
you could first read it all and the write line by line
with open(path,'r') as f:
myfile = f.read() # read everything in the variable "myfile"
result = open(path, 'w') # open file handle for write
for line in myfile.splitlines(): # process the original file content 1 line at a time
# as before
I strongly recommend reading python's documentation on how to read and write files.
If you open an existing file in write-mode open(path, 'w'), its content will be erased:
mode can be (...) 'w' for only writing (an existing file with the same name will be erased)
To replace a line in python you can have a look at this: Search and replace a line in a file in Python
Here is one the solutions provided there adapted to your context (tested for python3):
from tempfile import mkstemp
from shutil import move
from os import close
def test(filepath, command):
# Split command into key/value
key, _ = command.split('=')
matched_key = False
# Create a temporary file
fh, tmp_absolute_path = mkstemp()
with open(tmp_absolute_path, 'w') as tmp_stream:
with open(filepath, 'r') as stream:
for line in stream:
if key in line:
matched_key = True
tmp_stream.write(command + '\n')
else:
tmp_stream.write(line)
if not matched_key:
tmp_stream.write(command + '\n')
close(fh)
move(tmp_absolute_path, filepath)
Note that with the code above every line that matches key (key=blob or blob=key) will be replaced.

Cleaning up a messy data file to a more readable format in Python?

I have a text file (heavily modified for this example) which has some data that I want to extract and do some calculations with it. However the text file is extremely messy, so I'm trying to clean it up and write it out to new files first.
Here is the .txt file I'm working with: http://textuploader.com/5elql
I am trying to extract the data which is under the titles (called “Important title”). The only possible way to do that is to first locate a string which always occurs in the file, and its called “DATASET” because all the mess above and below the important data will cover an arbitrary number of lines, difficult to remove manually. Once that’s done I want to store the data in separate files so that it is easier to analyse like this:
http://textuploader.com/5elqw
The file names will be concatenated with the title + the date.
Here is what I have tried so far
with open("example.txt") as file:
for line in file:
if line.startswith('DATASET:'):
fileTitle = line[9:]
if line.startswith("DATE:"):
fileDate = line[:]
print(fileTitle+fileDate)
OUTPUT
IMPORTANT TITLE 1
DATE: 12/30/2015
IMPORTANT TITLE 2
DATE: 01/03/2016
So it appears my loop manages to locate the lines where the titles inside the file are and print them out. But this is where I run out of steam. I have no idea on how to extract the data under those titles from there onwards. I have tried using file.readlines() but it outputs all the mess that is in between Important Title 1 and Important Title 2.
Any advice on how I can read all the data under the titles and output them into separate files? Thanks for your time.
You could use regex.
import re
pattern = r"(\s+X\s+Y\s*)|(\s*\d+\s+\d+\s*)"
prog = re.compile(pattern)
with open("example.txt") as file:
cur_filename = ''
content = ""
for line in file:
if line.startswith('DATASET:'):
fileTitle = line[9:]
elif line.startswith("DATE:"):
fileDate = line[6:]
cur_filename = (fileTitle.strip() + fileDate.strip()).replace('/', '-')
print(cur_filename)
content_title = fileTitle + line
elif prog.match(line):
content += line
elif cur_filename and content:
with open(cur_filename, 'w') as fp:
fp.write(content_title)
fp.write(content)
cur_filename = ''
content = ''
I don't know exactly how you want to store your data but assuming you want a dictionary you could use regex to check if the incoming line matched the pattern, then because fileTitle isn't global you could use that as the key and add the values. I also added rstrip('\r\n') to remove the newline characters after fileTitle.
import re
#if you don't want to store the X and Y, just use re.compile('\d\s+\d+')
p = re.compile('(\d\s+\d+)|(X\s+Y)')
data={}
with open("input.txt") as file:
for line in file:
if line.startswith('DATASET:'):
fileTitle = line[9:].rstrip('\r\n')
if line.startswith("DATE:"):
fileDate = line[:]
print(fileTitle+fileDate)
if p.match(line):
if fileTitle not in data:
data[fileTitle]=[]
line=line.rstrip('\r\n')
data[fileTitle].append(line.split('\t'))
if len(data[fileTitle][len(data[fileTitle])-1]) == 3:
data[fileTitle][len(data[fileTitle])-1].pop()
print data
Yet another regex solution:
sep = '*************************\n'
pattern = r'DATASET[^%]*'
good_stuff = re.compile(pattern)
pattern = r'^DATASET: (.*?)$'
title = re.compile(pattern, flags = re.MULTILINE)
pattern = r'^DATE: (.*?)$'
date = re.compile(pattern, flags = re.MULTILINE)
with open(r'foo.txt') as f:
data = f.read()
for match in good_stuff.finditer(data):
data = match.group()
important_title = title.search(data).group(1)
important_date = date.search(data).group(1)
important_date = important_date.replace(r'/', '-')
fname = important_title + important_date + '.txt'
print(sep, fname)
print(data)
##with open(fname, 'w') as f:
## f.write(data)

How to split text file by id in python

I have a bunch of text files containing tab separated tables. The second column contains an id number, and each file is already sorted by that id number. I want to separate each file into multiple files by the id number in column 2. Here's what I have.
readpath = 'path-to-read-file'
writepath = 'path-to-write-file'
for filename in os.listdir(readpath):
with open(readpath+filename, 'r') as fh:
lines = fh.readlines()
lastid = 0
f = open(writepath+'checkme.txt', 'w')
f.write(filename)
for line in lines:
thisid = line.split("\t")[1]
if int(thisid) <> lastid:
f.close()
f = open(writepath+thisid+'-'+filename,'w')
lastid = int(thisid)
f.write(line)
f.close()
What I get is simply a copy of all the read files with the first id number from each file in front of the new filenames. It is as if
thisid = line.split("\t")[1]
is only done once in the loop. Any clue to what is going on?
EDIT
The problem was my files used \r rather than \r\n to terminate lines. Corrected code (simply adding 'rU' when opening the read file and swapping != for <>):
readpath = 'path-to-read-file'
writepath = 'path-to-write-file'
for filename in os.listdir(readpath):
with open(readpath+filename, 'rU') as fh:
lines = fh.readlines()
lastid = 0
f = open(writepath+'checkme.txt', 'w')
f.write(filename)
for line in lines:
thisid = line.split("\t")[1]
if int(thisid) != lastid:
f.close()
f = open(writepath+thisid+'-'+filename,'w')
lastid = int(thisid)
f.write(line)
f.close()
If you're dealing with tab delimited files, then you can use the csv module, and take advantage of the fact that itertools.groupby will do the previous/current tracking of the id for you. Also utilise os.path.join to make sure your filenames end up joining correctly.
Untested:
import os
import csv
from itertools import groupby
readpath = 'path-to-read-file'
writepath = 'path-to-write-file'
for filename in os.listdir(readpath):
with open(os.path.join(readpath, filename)) as fin:
tabin = csv.reader(fin, delimiter='\t')
for file_id, rows in groupby(tabin, lambda L: L[1]):
with open(os.path.join(writepath, file_id + '-' + filename), 'w') as fout:
tabout = csv.writer(fout, delimiter='\t')
tabout.writerows(rows)

Categories