How to increment last number in file python - python

I have below text in the file
firefox-x 46.0:
google 5.1.0.1:
- request
- branch
I need to extract the last letter of first line and increase by one version and append to same file. My append part will be
firefox-x 46.1:
google 5.1.0.1:
- request
- branch
able to extract last integer but how to update and write to same file.
import re
with open('branch.txt','r') as fh:
first_line = fh.readline()
#print (first_line)
last_number = re.findall(".*(?:\D|^)(\d+)", first_line)
for i in last_number:
to_int = int(i)
#print (to_int)
next_num = (to_int +1)
print (next_num)

You may use
import re
rx = r'\d+(?=:$)'
s="""firefox-x 46.0:
google 5.1.0.1:
- request
- branch"""
print(re.sub(rx, lambda x: str(int(x.group(0)) + 1), s, 1, re.M))
Output:
firefox-x 46.1:
google 5.1.0.1:
- request
- branch
See the Python demo
The \d+(?=:$) regex with re.M flag will match 1+ digits that are followed with : at the end of a line and 1 passed as the count argument to re.sub will only perform a single replacement.
The lambda x: str(int(x.group(0)) + 1) part will take the first match, cast the 1+ digits matched to an int, add 1 to the value and cast it back to a string.
To read and write to another file:
import re
rx = r'\d+(?=:$)'
with open('branch.txt', 'r') as fr:
data = fr.read()
with open('branch.out.txt', 'w') as fw:
fw.write(re.sub(rx , lambda x: str(int(x.group(0)) + 1), data, 1, re.M))

Related

Python Parsing an A2L file used in automotive industries

I have a file from which i am trying to parse some data.
I am having issues in following case .
In case one if I find "NO_COMPU_METHOD" in line then I will save the word before it , which is SWORD
SWORD NO_COMPU_METHOD 0 0 -32768 32767
In case two if I find "0 NO_COMPU_METHOD" in line then I will save the word before it , which is SSV__UWORD_Z
MEAS_SMAP SSV__UWORD_Z 0 NO_COMPU_METHOD 9 0 65535zz
Here is the code i am trying to use
import sys
import numpy as np
import regex
with open('CANAPE_main.a2l', 'r') as myfile:
print('Opening file ',myfile)
contents = myfile.read()
test_string = "0 NO_COMPU_METHOD "
for x in lines:
if 'NO_COMPU_METHOD' in x:
match = regex.search(test_string, x)
print(match)
if match:
temp0 = x.strip()
dType = temp0.split()
cal_var_dataType.append(dType[2])
else:
temp1 = x.strip()
dType1 = temp1.split()
meas_var_dataType.append(dType1[0])
The issue is that ,since i am using split() function, python parses "0 NO_COMPU_METHOD" as "0" and "NO_COMPU_METHOD" and hence i am facing problem . Is there any way I can parse the string with white spaces. Is there anything wrong in my regular expression code

Search and Replace a string in text file

I would like to search for a line in a text file which contains the string "SECTION=C-BEAM" and replace the first 13 characters in the "next line" by reading a pattern from first line (pattern highlighted in bold (see example below - read 1.558 from first line and replace it with 1.558/2 =0.779 in the second line). The number to read from first line is always in between the strings "H_" and "H_0".
Example Input:
SECTION, ELSET=DIORH_1_558H_0_76W_241_1, SECTION=C-BEAM, MAT=XYZ;
0., 1, 2, 3, 4, 5
Output as follows:
SECTION, ELSET=DIORH_1_558H_0_76W_241_1, SECTION=C-BEAM, MAT=XYZ;
0.779, 1, 2, 3, 4, 5
This is what I have tried so far.
file_in = open(test_input, 'rb')
file_out = open(test_output, 'wb')
lines = file_in.readlines()
print ("Total no. of lines to process: ", len(lines))
for i in range(len(lines)):
if lines.startswith("SECTION") and "SECTION=C-BEAM" in lines:
start_index = lines.find("H_")+1
end_index = lines.find("H_0")
x = lines[start_index:end_index]/2.0
print (x)
lines[i+1]= lines[i+1].replace(" 0.",x)+lines[i+1][13:]
file_out.write(lines[i])
file_in.close()
file_out.close()
As you have mentioned that the content resides in a file, I tried to store some other random lines in a string other than the pattern you are looking for.
Tested below piece of code and it works. I assume there is only one such occurrence in the file.If there are multiple occurrences in the file that can be done through a loop though.
import re
st = '''These are some different lines - you need not worry about.
SECTION, ELSET=DIORH_1_558H_0_76W_241_1, SECTION=C-BEAM, MAT=XYZ;
0., 1, 2, 3, 4, 5
These are more different lines - you need not worry about.
0.,2 numbers'''
num = str(float(re.findall('.*H_(.+)H_0.*SECTION=C-BEAM.*\n.*',st)[0].replace("_","."))/2)
print (re.sub(r'(.*SECTION=C-BEAM.*\n)(0\.)(,.*)',r'\g<1>'+num+r'\g<3>',st))
# re.findall('.*H_(.*)H_0.*SECTION=C-BEAM.*\n.*',st) --> Returns ['1_558']. Extract 1_558 by indexing it -[0]
# Then replace "_" with "." Convert to a float, divide by 2 and then convert the result to string again
# .* means 0 or more non-newline characters,.+ means 1 or more non-newline characters "\n" stands for new line.
# (.+) means characters inside the bracket from the overall pattern will be extracted
# Second line of the code: I replaced the desired number("0.") for the matching patternin the second line.
# Divided the pattern in to 3 groups: 1) Before the pattern "0." 2) The pattern "0." itself 3) After the pattern "0.".
# Replaced the pattern "0." with "group 1 + num + group 2"
Output as shown below:
Basic python regex should do it :
my_text = """SECTION, ELSET=DIORH_1_558H_0_76W_241_1, SECTION=C-BEAM, MAT=XYZ;\n0., 1, 2, 3, 4, 5"""
# This find the index of the first occurence of your regex in my_text
index = my_text.find('SECTION=C-BEAM')
# You select everything before the first occurence of your regex
# and count the number of lines (\n is the escape line character)
nb_line = my_text[:index].count('\n')
# Now you wand to find the index of the beginning of the n + 1 line.
# You can do this thanks to finditer function
# This creates the list of index of a specified regex,
# you select the n + 1 (here it is nb_line because python indexing starts at 0)
index = [m.start() for m in re.finditer(r"\n",my_text)][nb_line]
# the you re build the wanted string with :
# the beginning of your string until the n + 1 line,
# the text you want (0.779)
# the text after the substring you removed (you need to know the length of the string you want to remove here 2
string_to_remove = "0."
my_text = my_text[:index+1] + '0.779' + my_text[index + 1 + len(string_to_remove):]
print(my_text)

searching word following a giving pattern

I want get the word 'MASTER_INACTIVE' in the string:
'p_esco_link->state = MASTER_INACTIVE; /*M-t10*/'
by searching reg-expression 'p_esco_link->state =' to find the following word.
I have to replace date accessing to API functions. I try some reg-expression in python 3.6, but it does not work.
pattern = '(?<=\bp_esco_link->state =\W)\w+'
if __name__ == "__main__":
syslogger.info(sys.argv)
if version_info.major != 3:
raise Exception('Olny work on Python 3.x')
with open(cFile, encoding='utf-8') as file_obj:
lineNum = 0
for line in file_obj:
print(len(line))
re_obj = re.compile(pattern)
result = re.search(pattern, line)
lineNum += 1
#print(result)
if result:
print(str(lineNum) + ' ' +str(result.span()) + ' ' + result.group())
excepted Python re module can find the position of 'MASTER_INACTIVE' and put it into result.group().
error message is that Python re module find nothing.
Your pattern is working fine,
Just change the bellow line in your code,
pattern = r'(?<=\bp_esco_link->state =\W)\w+' # add r prefix
Check this sample work, I added line as your string.
import re
pattern = r'(?<=\bp_esco_link->state =\W)\w+'
line = 'p_esco_link->state = MASTER_INACTIVE; /*M-t10*/'
re_obj = re.compile(pattern)
result = re.search(pattern, line)
print(result.span()) # (21, 36)
print(result.group()) # 'MASTER_INACTIVE'
Check below question to get more understand about 'r' prefix,
Python regex - r prefix
What exactly do “u” and “r” string flags do, and what are raw string literals?
What does preceding a string literal with “r” mean? [duplicate]

Replace single quotes with double quotes in python, for use with insert into database

Was wondering whether anyone has a clever solution for fixing bad
insert statements in Python, exported by a not so clever program. It didn't add
two single quotes for strings with a single quote in the string. To
make it a bit easier all the values being inserted are strings.
So it has:
INSERT INTO addresses VALUES ('1','1','CUCKOO'S NEST','CUCKOO'S NEST STREET');
instead of:
INSERT INTO addresses VALUES ('1','1','CUCKOO''S NEST','CUCKOO''S NEST STREET');
Obviously there are multiple lines of this and I don't want to replace
the enclosing single quotes as well.
Was thinking of using split and join, but I'm not sure how to easily update the split values while looping in a loop. Sorry I'm a noob. Something like the below, where I'm not sure how to do #update bit
import sys
fileIN = open('a.sql', "r")
line = fileIN.readline()
while line:
bits = line.split("','")
for bit in bits:
if bit.find("'") > -1:
#update bit
line_out = "','".join(bits)
sys.stdout.write(line_out)
line = fileIN.readline()
Thanks
Based on katrielalex's suggestion, how about this:
>>> import re
>>> s = "INSERT INTO addresses VALUES ('1','1','CUCKOO'S NEST','CUCKOO'S NEST STREET');"
>>> def repl(m):
if m.group(1) in ('(', ',') or m.group(2) in (',', ')'):
return m.group(0)
return m.group(1) + "''" + m.group(2)
>>> re.sub("(.)'(.)", repl, s)
"INSERT INTO addresses VALUES ('1','1','CUCKOO''S NEST','CUCKOO''S NEST STREET');"
and if you're into negative lookbehind assertions, this is the headache inducing pure regex version:
re.sub("((?<![(,])'(?![,)]))", "''", s)
while line:
# Restrain line2 to inside parentheses
line1, rest = line.split('(')
line2, line3 = rest.split(')')
# A bit more cleaner
new_bits = []
for bit in line2.split(','):
# Remove border ' characters
bit = bit[1:-1]
# Duplicate the ones inside
if "'" in bit:
bit = bit.replace("'", "''")
# Re-add border '
new_bits.append("'" + bit + "'")
sys.stdout.write(line1 + '(' + ','.join(new_bits + ')' + line3)
line = fileIN.readline()
Warning: This depends way too much on the formatting of the SQL statement. However, if your input is only ever going to have the format "statements (params) end" then this will work every time.
import sys
fileIN = open('a.sql', "r")
line = fileIN.readline()
while line:
#split out the parameters (between the ()'s)
start, temp = line.split("(")
params, end = temp.split(")")
#replace the "'"s in the parameters (without the start and end quote)
newParams = "','".join([x.replace("'", "''") for x in params[1:-1].split("','")])
#join the statement back together
line_out = start + "('" + newParams + "')" + end
#next line
sys.stdout.write(line_out)
line = fileIN.readline()
Explanation:
Split the string into 3 parts: The query start, the parameters, and the end.
The generator takes the parameters (without the starting/ending 's), splits it on ',', and, for every element in the list the split generates (the individual data entries), replaces the 's with ''s.
The last line then joins the query start, the new params (with the parenthesis and quotes that were removed previously), and the end of the statement.
Another answer:
a = "INSERT INTO addresses VALUES ('1','1','CUCKOO'S NEST','CUCKOO'S NEST STREET');"
open_par = a.find("(")
close_par = a.find(")")
b = a[open_par+1:close_par]
c = b.split(",")
d = map(lambda x: '"' + x.strip().strip("'") + '"',c)
result = a[:open_par+1] + ",".join(d) + a[close_par:]
Went with:
import sys
import re
def repl(m):
if m.group(1) in ('(', ',') or m.group(2) in (',', ')'):
return m.group(0)
return m.group(1) + "''" + m.group(2)
fileIN = open('a.sql', "r")
line = fileIN.readline()
while line:
line_out = re.sub("(.)'(.)", repl, line)
sys.stdout.write(line_out)
# Next line.
line = fileIN.readline()

Python RegEx nested search and replace

I need to to a RegEx search and replace of all commas found inside of quote blocks.
i.e.
"thing1,blah","thing2,blah","thing3,blah",thing4
needs to become
"thing1\,blah","thing2\,blah","thing3\,blah",thing4
my code:
inFile = open(inFileName,'r')
inFileRl = inFile.readlines()
inFile.close()
p = re.compile(r'["]([^"]*)["]')
for line in inFileRl:
pg = p.search(line)
# found comment block
if pg:
q = re.compile(r'[^\\],')
# found comma within comment block
qg = q.search(pg.group(0))
if qg:
# Here I want to reconstitute the line and print it with the replaced text
#print re.sub(r'([^\\])\,',r'\1\,',pg.group(0))
I need to filter only the columns I want based on a RegEx, filter further,
then do the RegEx replace, then reconstitute the line back.
How can I do this in Python?
The csv module is perfect for parsing data like this as csv.reader in the default dialect ignores quoted commas. csv.writer reinserts the quotes due to the presence of commas. I used StringIO to give a file like interface to a string.
import csv
import StringIO
s = '''"thing1,blah","thing2,blah","thing3,blah"
"thing4,blah","thing5,blah","thing6,blah"'''
source = StringIO.StringIO(s)
dest = StringIO.StringIO()
rdr = csv.reader(source)
wtr = csv.writer(dest)
for row in rdr:
wtr.writerow([item.replace('\\,',',').replace(',','\\,') for item in row])
print dest.getvalue()
result:
"thing1\,blah","thing2\,blah","thing3\,blah"
"thing4\,blah","thing5\,blah","thing6\,blah"
General Edit
There was
"thing1\\,blah","thing2\\,blah","thing3\\,blah",thing4
in the question, and now it is not there anymore.
Moreover, I hadn't remarked r'[^\\],'.
So, I completely rewrite my answer.
"thing1,blah","thing2,blah","thing3,blah",thing4
and
"thing1\,blah","thing2\,blah","thing3\,blah",thing4
being displays of strings (I suppose)
import re
ss = '"thing1,blah","thing2,blah","thing3\,blah",thing4 '
regx = re.compile('"[^"]*"')
def repl(mat, ri = re.compile('(?<!\\\\),') ):
return ri.sub('\\\\',mat.group())
print ss
print repr(ss)
print
print regx.sub(repl, ss)
print repr(regx.sub(repl, ss))
result
"thing1,blah","thing2,blah","thing3\,blah",thing4
'"thing1,blah","thing2,blah","thing3\\,blah",thing4 '
"thing1\blah","thing2\blah","thing3\,blah",thing4
'"thing1\\blah","thing2\\blah","thing3\\,blah",thing4 '
You can try this regex.
>>> re.sub('(?<!"),(?!")', r"\\,",
'"thing1,blah","thing2,blah","thing3,blah",thing4')
#Gives "thing1\,blah","thing2\,blah","thing3\,blah",thing4
The logic behind this is to substitute a , with \, if it is not immediately both preceded and followed by a "
I came up with an iterative solution using several regex functions:
finditer(), findall(), group(), start() and end()
There's a way to turn all this into a recursive function that calls itself.
Any takers?
outfile = open(outfileName,'w')
p = re.compile(r'["]([^"]*)["]')
q = re.compile(r'([^\\])(,)')
for line in outfileRl:
pg = p.finditer(line)
pglen = len(p.findall(line))
if pglen > 0:
mpgstart = 0;
mpgend = 0;
for i,mpg in enumerate(pg):
if i == 0:
outfile.write(line[:mpg.start()])
qg = q.finditer(mpg.group(0))
qglen = len(q.findall(mpg.group(0)))
if i > 0 and i < pglen:
outfile.write(line[mpgend:mpg.start()])
if qglen > 0:
for j,mqg in enumerate(qg):
if j == 0:
outfile.write( mpg.group(0)[:mqg.start()] )
outfile.write( re.sub(r'([^\\])(,)',r'\1\\\2',mqg.group(0)) )
if j == (qglen-1):
outfile.write( mpg.group(0)[mqg.end():] )
else:
outfile.write(mpg.group(0))
if i == (pglen-1):
outfile.write(line[mpg.end():])
mpgstart = mpg.start()
mpgend = mpg.end()
else:
outfile.write(line)
outfile.close()
have you looked into str.replace()?
str.replace(old, new[, count])
Return a copy of the string with all occurrences of substring old
replaced by new. If the optional argument count is given, only the
first count occurrences are replaced.
here is some documentation
hope this helps

Categories