import re
def test ( var ):
op="""
1/1/1/1 up up :99005 53476 99005 g993-2-17a
1/1/1/2 up up :99005 53148 99005 g993-2-17a
1/1/1/3 up up :99005 53793 99005 g993-2-17a
"""
op=op.splitlines()
for line in op:
pattern = "([0-9]+/[0-9]+/[0-9]+/[0-9]+) *?([a-z]+) *?([a-z]+) :([0-9]+) +?([0-9]+) +?([0-9]+) +?([a-z0-9-]+)"
if re.search(pattern, line):
match=re.search(pattern, line)
var1=re.sub(r'/', '_', match.group(1))
x = var+"_"+ var1
print x
if_index = match.group(1)
adm_state = match.group(2)
exec("global %s" % (x))
exec("%s = {}" % (x))
exec("%s['whole']=match.group(0)" % (x))
exec("%s['if_index']=match.group(1)" % (x))
exec("%s['adm_state']=match.group(2)" % (x))
exec("%s['opr_state']=match.group(3)" % (x))
exec("%s['tx_rate_us']=match.group(5)" % (x))
exec("%s['tx_rate_ds']=match.group(6)" % (x))
exec("%s['op_mode']=match.group(7)" % (x))
print info_1_1_1_1['if_index']
test("info")
print info_1_1_1_1
Hi everyone am new to python and scripting. The above one is my script and my aim is creating multiple dictionary and assigning key and value pair for the corresponding dictionary. For each line i wanted to create separate dictionary. And i wanted to to the dictionary with the same name from global space. If anything not clear let me correct it.
In global space i wanted to access dictionary like info_1_1_1_1['whole']
global doesn't persist between two exec invocations. This would work:
exec("global bar\nbar=3\n")
But dynamic setting of variables is a strong code smell. Every time you find yourself doing something similar to this, you should immediately stop and reevaluate if there is another way to do this. In this case, I suggest using a dictionary instead:
import re
data = {}
def test ( var ):
op="""
1/1/1/1 up up :99005 53476 99005 g993-2-17a
1/1/1/2 up up :99005 53148 99005 g993-2-17a
1/1/1/3 up up :99005 53793 99005 g993-2-17a
"""
op=op.splitlines()
for line in op:
pattern = "([0-9]+/[0-9]+/[0-9]+/[0-9]+) *?([a-z]+) *?([a-z]+) :([0-9]+) +?([0-9]+) +?([0-9]+) +?([a-z0-9-]+)"
if re.search(pattern, line):
match=re.search(pattern, line)
var1=re.sub(r'/', '_', match.group(1))
x = var+"_"+ var1
print(x)
data[x] = {
"whole": match.group(0),
"if_index": match.group(1),
"adm_state": match.group(2),
"opr_state": match.group(3),
"tx_rate_us": match.group(5),
"tx_rate_ds": match.group(6),
"op_mode": match.group(7),
}
print(data["info_1_1_1_1"]['if_index'])
test("info")
print(data["info_1_1_1_1"])
Related
I'm trying to parse a blocks of text in python 2.7 using itertools.groupby
The data has the following structure:
BEGIN IONS
TITLE=cmpd01_scan=23
RTINSECONDS=14.605
PEPMASS=694.299987792969 505975.375
CHARGE=2+
615.839727 1760.3752441406
628.788226 2857.6264648438
922.4323436 2458.0959472656
940.4432533 9105.5
END IONS
BEGIN IONS
TITLE=cmpd01_scan=24
RTINSECONDS=25.737
PEPMASS=694.299987792969 505975.375
CHARGE=2+
575.7636234 1891.1656494141
590.3553938 2133.4477539063
615.8339562 2433.4252929688
615.9032114 1784.0628662109
END IONS
I need to extract information from the line beigining with "TITLE=", "PEPMASS=","CHARGE=".
The code I'm using as follows:
import itertools
import re
data_file='Test.mgf'
def isa_group_separator(line):
return line=='END IONS\n'
regex_scan = re.compile(r'TITLE=')
regex_precmass=re.compile(r'PEPMASS=')
regex_charge=re.compile(r'CHARGE=')
with open(data_file) as f:
for (key,group) in itertools.groupby(f,isa_group_separator):
#print(key,list(group))
if not key:
precmass_match = filter(regex_precmass.search,group)
print precmass_match
scan_match= filter(regex_scan.search,group)
print scan_match
charge_match = filter(regex_charge.search,group)
print charge_match
However, the output only picks up the "PEPMASS=" line,and if 'scan_match' assignment is done before 'precmass_match', the "TITLE=" line is printed only;
> ['PEPMASS=694.299987792969 505975.375\n'] [] []
> ['PEPMASS=694.299987792969 505975.375\n'] [] []
can someone point out what I'm doing wrong here?
The reason for this is that group is an iterator and it runs only once.
Please find the modified script that does the job.
import itertools
import re
data_file='Test.mgf'
def isa_group_separator(line):
return line == 'END IONS\n'
regex_scan = re.compile(r'TITLE=')
regex_precmass = re.compile(r'PEPMASS=')
regex_charge = re.compile(r'CHARGE=')
with open(data_file) as f:
for (key, group) in itertools.groupby(f, isa_group_separator):
if not key:
g = list(group)
precmass_match = filter(regex_precmass.search, g)
print precmass_match
scan_match = filter(regex_scan.search, g)
print scan_match
charge_match = filter(regex_charge.search, g)
print charge_match
I might try to parse this way (without using groupby(
import re
file = """\
BEGIN IONS
TITLE=cmpd01_scan=23
RTINSECONDS=14.605
PEPMASS=694.299987792969 505975.375
CHARGE=2+
615.839727 1760.3752441406
628.788226 2857.6264648438
922.4323436 2458.0959472656
940.4432533 9105.5
END IONS
BEGIN IONS
TITLE=cmpd01_scan=24
RTINSECONDS=25.737
PEPMASS=694.299987792969 505975.375
CHARGE=2+
575.7636234 1891.1656494141
590.3553938 2133.4477539063
615.8339562 2433.4252929688
615.9032114 1784.0628662109
END IONS""".splitlines()
pat = re.compile(r'(TITLE|PEPMASS|CHARGE)=(.+)')
data = []
for line in file:
m = pat.match(line)
if m is not None:
if m.group(1) == 'TITLE':
data.append([])
data[-1].append(m.group(2))
print(data)
Prints:
[['cmpd01_scan=23', '694.299987792969 505975.375', '2+'], ['cmpd01_scan=24', '694.299987792969 505975.375', '2+']]
Although I've been using Perl for many years, I've always had trouble with anything more than fairly basic use of Regular Expresions in the language. This is
only a worse situation now, as I'm trying to learn Python... and the use of re() is even more unclear to me.
I'm trying to check for a match if a substring is in a string, using re()
and also am using capture groups to extract some info from the matching process. However, I can't get things to work in a couple of
contexts; when using a re() call and assigning the returned values all
within an "if" statement.. and how to handle the situation when .groups items are not defined
in the match objects (when a match is not made).
So, what follows are examples of what I'm trying to do coded in Perl and Python, with their respective outputs.
I'd appreciate any pointers on how I might better approach the problem using Python.
Perl Code:
use strict;
use warnings;
my ($idx, $dvalue);
while (my $rec = <DATA>) {
chomp($rec);
if ( ($idx, $dvalue) = ($rec =~ /^XA([0-9]+)=(.*?)!/) ) {
printf(" Matched:\n");
printf(" rec: >%s<\n", $rec);
printf(" index = >%s< value = >%s<\n", $idx, $dvalue);
} elsif ( ($idx, $dvalue) = ($rec =~ /^PZ([0-9]+)=(.*?[^#])!/) ) {
printf(" Matched:\n");
printf(" rec: >%s<\n", $rec);
printf(" index = >%s< value = >%s<\n", $idx, $dvalue);
} else {
printf("\n Unknown Record format, \\%s\\\n\n", $rec);
}
}
close(DATA);
exit(0)
__DATA__
DUD=ABC!QUEUE=D23!
XA32=7!P^=32!
PZ112=123^!PQ=ABC!
Perl Output:
Unknown Record format, \DUD=ABC!QUEUE=D23!\
Matched:
rec: >XA32=7!P^=32!<
index = >32< value = >7<
Matched:
rec: >PZ112=123^!PQ=ABC!<
index = >112< value = >123^<
Python Code:
import re
string = 'XA32=7!P^=32!'
with open('data.dat', 'r') as fh:
for rec in fh:
orec = ' rec: >' + rec.rstrip('\n') + '<'
print(orec)
# always using 'string' at least lets this program run
(index, dvalue) = re.search(r'^XA([0-9]+)=(.*?[^#])!', string).groups()
# The following works when there is a match... but fails with an error when
# a match is NOT found, viz:-
# ...
# (index, dvalue) = re.search(r'^XA([0-9]+)=(.*?[^#])!', rec).groups()
#
# Traceback (most recent call last):
# File "T:\tmp\a.py", line 13, in <module>
# (index, dvalue) = re.search(r'^XA([0-9]+)=(.*?[^#])!', rec).groups()
# AttributeError: 'NoneType' object has no attribute 'groups'
#
buf = ' index = >' + index + '<' + ' value = >' + dvalue + '<'
print(buf)
exit(0)
data.dat contents:
DUD=ABC!QUEUE=D23!
XA32=7!P^=32!
PZ112=123^!PQ=ABC!
Python Output:
rec: >DUD=ABC!QUEUE=D23!<
index = >32< value = >7<
rec: >XA32=7!P^=32!<
index = >32< value = >7<
rec: >PZ112=123^!PQ=ABC!<
index = >32< value = >7<
Another development: Some more code to help me understand this better... but I'm unsure about when/how to use the match.group() or match.groups() ...
Python Code:
import re
rec = 'XA22=11^!S^=64!ABC=0,0!PX=0!SP=12B!'
print("rec = >{}<".format(rec))
# ----
index = 0 ; dvalue = 0 ; x = 0
match = re.match(r'XA([0-9]+)=(.*?[^#])!(.*?)!', rec)
if match:
(index, dvalue, x) = match.groups()
print("3 (): index = >{}< value = >{}< x = >{}<".format(index, dvalue, x))
# ----
index = 0 ; dvalue = 0 ; x = 0
match = re.match(r'XA([0-9]+)=(.*?[^#])!', rec)
if match:
(index, dvalue) = match.groups()
print("2 (): index = >{}< value = >{}< x = >{}<".format(index, dvalue, x))
# ----
index = 0 ; dvalue = 0 ; x = 0
match = re.match(r'XA([0-9]+)=', rec)
if match:
#(index) = match.groups() # Why doesn't this work like above examples!?
(index, ) = match.groups() # ...and yet this works!?
# Does match.groups ALWAYS returns a tuple!?
#(index) = match.group(1) # This also works; 0 = entire matched string?
print("1 (): index = >{}< value = >{}< x = >{}<".format(index, dvalue, x))
# ----
index = 0 ; dvalue = 0 ; x = 0
match = re.search(r'S\^=([0-9]+)!', rec)
if match:
(index, ) = match.groups() # Returns tuple(?!)
print("1 (): index = >{}< value = >{}< x = >{}<".format(index, dvalue, x))
Again, I'd appreciate any thoughts on which is the 'preferred' way.. or if there's another way to deal with the groups.
You need to check for a match first, then use the groups. I.e.
compile the regexes (optional for most cases nowadays, according to the documentation)
apply each regex to the string to generate a match object
match() only matches at the beginning of a string, i.e. with an implicit ^ anchor
search() matches anywhere in the string
check if the match object is valid
extract the groups
skip to next loop iteration
# works with Python 2 and Python 3
import re
with open('dummy.txt', 'r') as fh:
for rec in fh:
orec = ' rec: >' + rec.rstrip('\n') + '<'
print(orec)
match = re.match(r'XA([0-9]+)=(.*?[^#])!', rec)
if match:
(index, dvalue) = match.groups()
print(" index = >{}< value = >{}<".format(index, dvalue))
continue
match = re.match(r'PZ([0-9]+)=(.*?[^#])!', rec)
if match:
(index, dvalue) = match.groups()
print(" index = >{}< value = >{}<".format(index, dvalue))
continue
print(" Unknown Record format")
Output:
$ python dummy.py
rec: >DUD=ABC!QUEUE=D23!<
Unknown Record format
rec: >XA32=7!P^=32!<
index = >32< value = >7<
rec: >PZ112=123^!PQ=ABC!<
index = >112< value = >123^<
But I'm wondering why you don't simplify your Perl & Python code to just use a single regex instead? E.g.:
match = re.match(r'(?:XA|PZ)([0-9]+)=(.*?[^#])!', rec)
if match:
(index, dvalue) = match.groups()
print(" index = >{}< value = >{}<".format(index, dvalue))
else:
print(" Unknown Record format")
I am trying to access variable labels in for loop in SPSS using Python. The for loop iterates over a range of variables, deleting 1-3 and renaming 4 and 5 in a sequence of 5 variables. This works fine, but now when trying to access the variable labels via SPSS I am running into the 'unicode object has no attribute keyes' error.
I recognize that I need to somehow refer to the key instead of the string in my array, but as a novice programmer I am struggling to figure out how to update my existing code:
begin program.
import spss, spssaux
vdict=spssaux.VariableDict()
mylist=vdict.range(start="M10", end="ENDOK_D")
nvars = len(mylist)
mycounter = 1
durations = ""
for i in range(nvars):
myvar = mylist[i]
if (mycounter < 4):
spss.Submit("delete variables %s." % myvar)
mycounter +=1
elif (mycounter == 4):
varlabel = mylist[i].VariableLabel
spss.Submit('variable labels %s "%s" [TimeStamp]' % (myvar,varlabel) + ".")
if (myvar.endswith("_C")): mynewvar = myvar[:-2] + "_TS"
spss.Submit("rename variables (%s = %s)" % (myvar,mynewvar) + ".")
spss.Submit("formats %s (DATETIME28.4)" % (mynewvar) + ".")
mycounter +=1
elif (mycounter == 5):
varlabel = mylist[i].VariableLabel
spss.Submit('variable labels %s "%s" [TimeStamp]' % (myvar,varlabel) + ".")
if (myvar.endswith("_D")): mynewvar = myvar[:-2] + "_TSD"
spss.Submit("rename variables (%s = %s)" % (myvar,mynewvar) + ".")
durations += mynewvar + " "
mycounter = 1
spss.Submit("alter type %s (F4.0)" % durations + ".")
end program.
Any help would be greatly appreciated.
The line
spss.Submit('variable labels %s "%s" [TimeStamp]' % (myvar,varlabel) + ".") is what's causing you trouble.
The [TimeStamp] bit is basically telling python to look for a key named "TimeStamp" in a dictionary but before the [TimeStamp] it doesn't find the right data structure - namely a dictionary - but a string which does not have keys. Maybe this might make it clearer:
myDict = {
"varname": "myVariable",
"label": "This is myVariable's label!",
"TimeStamp": "20190204-0814"
}
print (myDict["varname"])
print (myDict["TimeStamp"])
>>> myVariable
>>> 20190204-0814
Python is looking for a data structure like this and wants to look up the key "TimeStamp". Now, I assume you just want your labels to read "Whatever Variable Label was assigned [TimeStamp]"?
Simply change the two lines to
spss.Submit('variable labels %s "%s" + " [TimeStamp]"' % (myvar,varlabel) + ".")
I have a file testfile with the set of server names as below.
app-server-l11[2-5].test.com
server-l34[5-8].test.com
dd-server-l[2-4].test.com
Can you please help in getting output to be as follow.
app-server-l112.test.com
app-server-l113.test.com
app-server-l114.test.com
app-server-l115.test.com
server-l345.test.com
server-l346.test.com
server-l347.test.com
server-l348.test.com
dd-server-l2.test.com
dd-server-l3.test.com
dd-server-l4.test.com
With GNU awk for the 3rd arg to match():
$ awk 'match($0,/(.*)\[([0-9]+)-([0-9]+)\](.*)/,a){for (i=a[2]; i<=a[3]; i++) print a[1] i a[4]}' file
app-server-l112.test.com
app-server-l113.test.com
app-server-l114.test.com
app-server-l115.test.com
server-l345.test.com
server-l346.test.com
server-l347.test.com
server-l348.test.com
dd-server-l2.test.com
dd-server-l3.test.com
dd-server-l4.test.com
In GNU awk:
$ awk -F"[][]" '{split($2,a,"-"); for(i=a[1];i<=a[2];i++) print $1 i $3}' file
app-server-l112.test.com
app-server-l113.test.com
app-server-l114.test.com
app-server-l115.test.com
server-l345.test.com
server-l346.test.com
server-l347.test.com
server-l348.test.com
dd-server-l2.test.com
dd-server-l3.test.com
dd-server-l4.test.com
split to fields by [ and ] using FS
use split the get the range start (a[1]) and end (a[2])
iterate the range with for and output
There is no checking whether there was a range or not. It could be implemented with something like: print (NF==3 ? $1 i $3 : $1 ).
Worst and ugliest example:
var='app-server-l11[2-5].test.com'
for i in range(int(var[(var.find('[') +1)]), int(var[(var.find("]") - 1)])+1):
print 'app-server-l11' + str(i) + '.test.com'
Use your imagination!
ser_nm = ['app-server-l11[2-5].test.com','server-134[5-8].test.com','dd-server-[2-4].test.com']
for nm in ser_nm:
for i in range(int(nm[nm.find('[')+1 : nm.find('-',(nm.find('[')+1))]), int(nm[nm.find('-',(nm.find('[')+1))+1:nm.find(']') ] )+1):
print(nm[:nm.find('[')] + str(i) + nm[nm.find(']')+1:])
This will also take care of cases where server names are like this:
'server-134[52-823].test.com'
not the best solution, but it works...
inp = open('input.txt', 'r+').read()
print(inp)
result= ''
for i in inp.split('\n'):
if len(i) > 1:
print(repr(i))
f1 = i.find('[')
f2 = i.find(']')+1
b1 = i[:f1]
b2 = i[f2:]
ins = i[f1:f2]
ins = ins[1:-1]
for j in range(int(ins.split("-")[0]),int(ins.split("-")[1])+1):
result+=b1+str(j)+b2+'\n'
outp = open('output.txt', 'w')
outp.write(result)
outp.close()
You can use the below command for the required output without any complex statement.
awk -f test.awk file.txt
test.awk must contains the below lines:
{
if(a=match($0,"\\["))
{
start=strtonum(substr($0,a+1,1));
end=strtonum(substr($0,a+3,1));
copy=$0;
for(i=start;i<=end;i++)
{
sub("\\[[0-9]{1,}-[0-9]{1,}\\]",i,copy);
print copy;
copy = $0;
}
}
else
{
print $0;
}
}
file.txt contains your input file like below lines:
app-server-l11[2-5].test.com
server-l34[5-8].test.com
dd-server-l[2-4].test.com
output:
app-server-l112.test.com
app-server-l113.test.com
app-server-l114.test.com
app-server-l115.test.com
server-l345.test.com
server-l346.test.com
server-l347.test.com
server-l348.test.com
dd-server-l2.test.com
dd-server-l3.test.com
dd-server-l4.test.com
As this sounds like a school assignment I'm going to be fairly vague.
I would use a regular expression to extract the numeric range and the rest of the address components, then use a loop to iterate over the extracted numeric range to build each address (using the other captured address components).
Since it's been over a week:
import re
inputs = [ "app-server-l11[2-5].test.com", "server-l34[5-8].test.com", "dd-server-l[2-4].test.com" ]
pattern = r"\s*(?P<subdomain>[a-zA-Z0-9-_.]+)\[(?P<range_start>\d+)-(?P<range_end>\d+)\](?P<domain>\S+)"
expr = re.compile( pattern )
def expand_domain( domain ):
mo = expr.match( domain )
if mo is not None:
groups = mo.groupdict()
subdomain = groups[ "subdomain" ]
domain = groups[ "domain" ]
range_start = int( groups[ "range_start" ] )
range_end = int( groups[ "range_end" ] )
result = [ "{}{:d}{}".format( subdomain, index, domain ) for index in range( range_start, range_end + 1 ) ]
return result
else:
raise ValueError( "'{}' does not match the expected input.".format( domain ) )
for domain in inputs:
print( "'{}':".format( domain ) )
for exp_dom in expand_domain( domain ):
print( "--> {}".format( exp_dom ) )
This is my first python script. I am trying to get data from an Arduino, read it on a Raspberry Pi and save it to the database. The code works separately (I can assign the variable correctly and send the data to the database but can't seem to get them both to work. I'm not sure my logic works (setting variables to null and then saving once they all have values). Thanks for the input.
import re
import serial
import MySQLdb
import time
db =MySQLdb.connect(host = "localhost",user = "root",passwd = "example", db = "arduino")
ser =serial.Serial('/dev/ttyACM0',9600)
humidityPattern = "Humidity\:(\d\d\.\d\d)"
tempDhPattern = "TemperatureDH\:(\d\d\.\d\d)"
barometerPattern = "PressureBMP\:(\d\d\.\d\d)"
tempBmpPattern = "TemperatureBMP\:(\d\d\.\d\d)"
tempTmpPattern = "TemperatureTMP\:(\d\d\.\d\d)"
blLightPattern = "BLLight\:(\d+)"
brLightPattern = "BRLight\:(\d+)"
frLightPattern = "FRLight\:(\d+)"
flLightPattern = "FLLight\:(\d+)"
while 1:
line = ser.readline()
humidity = None
tempDh = None
pressure = None
tempBmp = None
tempTmp = None
blLight = None
brLight = None
frLight = None
flLight = None
#Humidity Sensor
m = re.match(humidityPattern, line)
if m is not None:
humidity = m.group(1)
print "Humidity is "+humidity
m = re.match(tempDhPattern, line)
if m is not None:
tempDh= m.group(1)
print "Humidity Temp is "+tempDh
#Pressure Sensor
m = re.match(barometerPattern, line)
if m is not None:
pressure = m.group(1)
print "Pressure is "+tempDh
m = re.match(tempBmpPattern, line)
if m is not None:
tempBmp= m.group(1)
print "Pressure Temp is "+tempBmp
#Temp Sensor
m = re.match(tempTmpPattern, line)
if m is not None:
tempTmp= m.group(1)
print "Temp is "+tempTmp
#Light Sensors
m = re.match(blLightPattern, line)
if m is not None:
blLight= m.group(1)
print "BL Light is "+ blLight
m = re.match(brLightPattern, line)
if m is not None:
brLight= m.group(1)
print "BR Light is "+ brLight
m = re.match(frLightPattern, line)
if m is not None:
frLight = m.group(1)
print "FR Light is "+ frLight
m = re.match(flLightPattern, line)
if m is not None:
flLight = m.group(1)
print "FL Light is "+ flLight
if humidity and tempDh and pressure and tempBmp and tempTmp and blLight and brLight and frLight and flLight is not None:
with db:
cur = db.cursor()
cur.execute('insert into weather(humidity, temp_dh, pressure,temp_bmp, temp_tmp, bl_light, br_light, fr_light, fl_light) values("%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s")'%(humidity, tempDh, pressure, tempBmp, tempTmp, blLight, brLight, frLight, flLight ))
time.sleep(5)
print 'upload'
One problem is in the test:
if humidity and tempDh and pressure and tempBmp and tempTmp and blLight and brLight and frLight and flLight is not None:
which only tests for None the very last variable, flLight. However if, as it seems, all the others are either None or a non-empty string, this should accidentally work because None is falsy and every non-empty string is truthy.
So a bigger problem is that every time through the loop you're throwing away every value you've previously read, whether you've saved them or not.
To fix that, add a boolean flag must_init and change the logic to something like, at the start of your loop:
must_init = True
while True:
line = ser.readline()
if must_init:
humidity = None
tempDh = None
pressure = None
tempBmp = None
tempTmp = None
blLight = None
brLight = None
frLight = None
flLight = None
must_init = False
and set must_init = True again only at the very end within the with statement right after the now-final print 'upload'.
This way, you will null out all variables only (A) the first time or (B) right after saving their previous values to the DB, which seems to be more correct logic.
Other simplifying improvements are possible (e.g keep the variables as items in a dict, so you don't have to enumerate them in the any check; have the RE expression also in a dict keyed by the same name you use in the variables dict to enormously compact and simplify your code) but the key point is that the code with the addition of the must_init boolean flag as I suggest before should work -- you can improve it after!-)