Appologies for the really long drawn out question.
I am trying to read in a config file and get a list of rules out.
I have tried to use ConfigParser to do this but it is not a standard config file.
The file contains no section header and no token.
i.e.
config section a
set something to something else
config subsection a
set this to that
next
end
config firewall policy
edit 76
set srcintf "There"
set dstintf "Here"
set srcaddr "all"
set dstaddr "all"
set action accept
set schedule "always"
set service "TCP_5600"
next
edit 77
set srcintf "here"
set dstintf "there"
set srcaddr "all"
set dstaddr "all"
set action accept
set schedule "always"
set service "PING"
next
end
As I couldn't work out how to get ConfigParser to work I thought I would try to iterate through the file, unfortunately I don't have much programming skill so I have got stuck.
I really think I am making this more complicated than it should be.
Here's the code I have written;
class Parser(object):
def __init__(self):
self.config_section = ""
self.config_header = ""
self.section_list = []
self.header_list = []
def parse_config(self, fields): # Create a new section
new_list = []
self.config_section = " ".join(fields)
new_list.append(self.config_section)
if self.section_list: # Create a sub section
self.section_list[-1].append(new_list)
else: self.section_list.append(new_list)
def parse_edit(self, line): # Create a new header
self.config_header = line[0]
self.header_list.append(self.config_header)
self.section_list[-1].append(self.header_list)
def parse_set(self, line): # Key and values
key_value = {}
key = line[0]
values = line[1:]
key_value[key] = values
if self.header_list:
self.header_list.append(key_value)
else: self.section_list[-1].append(key_value)
def parse_next(self, line): # Close the header
self.config_header = []
def parse_end(self, line): # Close the section
self.config_section = []
def parse_file(self, path):
with open(path) as f:
for line in f:
# Clean up the fields and remove unused lines.
fields = line.replace('"', '').strip().split(" ")
if fields[0] == "set":
pass
elif fields[0] == "end":
pass
elif fields[0] == "edit":
pass
elif fields[0] == "config":
pass
elif fields[0] == "next":
pass
else: continue
# fetch and call method.
method = fields[0]
parse_method = "parse_" + method
getattr(Parser, parse_method)(self, fields[1:])
return self.section_list
config = Parser().parse_file('test_config.txt')
print config
The output I am looking for is something like the following;
[['section a', {'something': 'to something else'}, ['subsection a', {'this': 'to that'}]],['firewall policy',['76',{'srcintf':'There'}, {'dstintf':'Here'}{etc.}{etc.}]]]
and this is what I get
[['section a']]
EDIT
I have changed the above to reflect where I am currently at.
I am still having issues getting the output I expect. I just can't seem to get the list right.
class Parser(object):
def __init__(self):
self.my_section = 0
self.flag_section = False
# ...
def parse_config(self, fields):
self.my_section += 1
# go on with fields
# ...
self.flag_section = True
def parse_edit(self, line):
...
def parse_set(self, line):
...
def parse_end(self, line):
...
def parse_file(self, path):
with open(path) as f:
for line in f:
fields = f.strip().split(" ")
method = fields[0]
# fetch and call method
getattr(Parser, "parse_" + method)(self, fields[1:])
I post my answer for people who first come here from Google when trying to parse Fortigate configuration file !
I rewrote what I found here based on my own needs and it works great.
from collections import defaultdict
from pprint import pprint
import sys
f = lambda: defaultdict(f)
def getFromDict(dataDict, mapList):
return reduce(lambda d, k: d[k], mapList, dataDict)
def setInDict(dataDict, mapList, value):
getFromDict(dataDict, mapList[:-1])[mapList[-1]] = value
class Parser(object):
def __init__(self):
self.config_header = []
self.section_dict = defaultdict(f)
def parse_config(self, fields): # Create a new section
self.config_header.append(" ".join(fields))
def parse_edit(self, line): # Create a new header
self.config_header.append(line[0])
def parse_set(self, line): # Key and values
key = line[0]
values = " ".join(line[1:])
headers= self.config_header+[key]
setInDict(self.section_dict,headers,values)
def parse_next(self, line): # Close the header
self.config_header.pop()
def parse_end(self, line): # Close the section
self.config_header.pop()
def parse_file(self, path):
with open(path) as f:
gen_lines = (line.rstrip() for line in f if line.strip())
for line in gen_lines:
# pprint(dict(self.section_dict))
# Clean up the fields and remove unused lines.
fields = line.replace('"', '').strip().split(" ")
valid_fields= ["set","end","edit","config","next"]
if fields[0] in valid_fields:
method = fields[0]
# fetch and call method
getattr(Parser, "parse_" + method)(self, fields[1:])
return self.section_dict
config = Parser().parse_file('FGT02_20130308.conf')
print config["system admin"]["admin"]["dashboard-tabs"]["1"]["name"]
print config["firewall address"]["ftp.fr.debian.org"]["type"]
I do not know if this can help you too, but it did for me : http://wiki.python.org/moin/ConfigParserExamples
Have fun !
I would do it in a simpler way:
flagSection = False
flagSub = False
mySection = 0
mySubsection = 0
myItem = 0
with open('d:/config.txt', 'r') as f:
gen_lines = (line.rstrip() for line in f if line.strip())
for line in gen_lines:
if line[0:7]=='config ':
mySection = mySection + 1
newLine = line[7:]
# Create a new section
# Mark section as open
flagSection == True
elif line[0:5]=='edit '):
mySubsection = mySubsection + 1
newLine = line[5:]
# Create a new sub-section
# Mark subsection as open
flagSub == true
elif line[0:4]=='set '):
myItem = myItem + 1
name, value = x.split(' ',2)[1:]
# Add to whatever is open
elif line=='end':
# If subsection = open then close and goto end
if flagSub:
# Or if section = open then close and goto end
elif flagSection:
# :End
continue
The instruction gen_lines = (line.rstrip() for line in f if line.strip())
creates a generator of not empty lines (thanks to the test if line.strip()) without newline and without blanks at the right (thanks to line.rstrip())
.
If I would know more about the operations you want to perform with name,value and in the section opened with if line=='end' , I could propose a code using regexes.
Edit
from time import clock
n = 1000000
print 'Measuring times with clock()'
te = clock()
for i in xrange(n):
x = ('abcdfafdf'[:3] == 'end')
print clock()-te,
print "\tx = ('abcdfafdf'[:3] == 'end')"
te = clock()
for i in xrange(n):
x = 'abcdfafdf'.startswith('end')
print clock()-te,
print "\tx = 'abcdfafdf'.startswith('end')"
print '\nMeasuring times with timeit module'
import timeit
ti = timeit.repeat("x = ('abcdfafdf'[:3] == 'end')",repeat=10,number = n)
print min(ti),
print "\tx = ('abcdfafdf'[:3] == 'end')"
to = timeit.repeat("x = 'abcdfafdf'.startswith('end')",repeat=10,number = n)
print min(to),
print "\tx = 'abcdfafdf'.startswith('end')"
result:
Measuring times with clock()
0.543445605517 x = ('abcdfafdf'[:3] == 'end')
1.08590449345 x = 'abcdfafdf'.startswith('end')
Measuring times with timeit module
0.294152748464 x = ('abcdfafdf'[:3] == 'end')
0.901923289133 x = 'abcdfafdf'.startswith('end')
Is the fact the times are smaller with timieit than with clock() due to the fact that the GC is unplugged when the program is run ? Anyway, with either clock() or timeit module , executing startswith() takes more time than slicing.
Related
In the following script:
def update_dict(key):
my_dict[key] = ...
mydict = dict()
k = ...
update_dict(k)
my_dict has not been declared in the class but it's been used in update_dict.
1- The interpreter allowed that to happen. Shouldn't python throw an error on that?
2- What problems could carry using a global variable without declaring it as global within the local scope where it's used? Does python uses a pointer to the global variable based on the variable name?
This snippet is a summary made by me and the full code follows this paragraph if you think (perhaps correctly?) that I did a mistake in my summary. The equivalent to mydict here is sites.
import sys
import csv
import argparse
import gzip
class SiteStats:
def __init__(self, g_size, g_seq):
self.num_reads = 0
self.called_sites = 0
self.called_sites_methylated = 0
self.group_size = g_size
self.sequence = g_seq
def update_call_stats(key, num_called_cpg_sites, is_methylated, sequence):
if key not in sites:
sites[key] = SiteStats(num_called_cpg_sites, sequence)
sites[key].num_reads += 1
sites[key].called_sites += num_called_cpg_sites
if is_methylated > 0:
sites[key].called_sites_methylated += num_called_cpg_sites
parser = argparse.ArgumentParser( description='Calculate methylation frequency at genomic CpG sites')
parser.add_argument('-c', '--call-threshold', type=float, required=False, default=2.0)
parser.add_argument('-s', '--split-groups', action='store_true')
args, input_files = parser.parse_known_args()
assert(args.call_threshold is not None)
sites = dict()
# iterate over input files and collect per-site stats
for f in input_files:
if f[-3:] == ".gz":
in_fh = gzip.open(f, 'rt')
else:
in_fh = open(f)
csv_reader = csv.DictReader(in_fh, delimiter='\t')
for record in csv_reader:
num_sites = int(record['num_motifs'])
llr = float(record['log_lik_ratio'])
# Skip ambiguous call
if abs(llr) < args.call_threshold * num_sites:
continue
sequence = record['sequence']
is_methylated = llr > 0
# if this is a multi-cpg group and split_groups is set, break up these sites
if args.split_groups and num_sites > 1:
c = str(record['chromosome'])
s = int(record['start'])
e = int(record['end'])
# find the position of the first CG dinucleotide
sequence = record['sequence']
cg_pos = sequence.find("CG")
first_cg_pos = cg_pos
while cg_pos != -1:
key = (c, s + cg_pos - first_cg_pos, s + cg_pos - first_cg_pos)
update_call_stats(key, 1, is_methylated, "split-group")
cg_pos = sequence.find("CG", cg_pos + 1)
else:
key = (str(record['chromosome']), int(record['start']), int(record['end']))
update_call_stats(key, num_sites, is_methylated, sequence)
# header
print("\t".join(["chromosome", "start", "end", "num_motifs_in_group", "called_sites", "called_sites_methylated", "methylated_frequency", "group_sequence"])
)
sorted_keys = sorted(list(sites.keys()), key = lambda x: x)
for key in sorted_keys:
if sites[key].called_sites > 0:
(c, s, e) = key
f = float(sites[key].called_sites_methylated) / sites[key].called_sites
print("%s\t%s\t%s\t%d\t%d\t%d\t%.3f\t%s" % (c, s, e, sites[key].group_size, sites[key].called_sites, sites[key].called_sites_methylated, f, sites[k
ey].sequence))
Thank you!
global varname is only needed when you are assigning to the global variable, because otherwise it is assumed to be a new local that is created. For all other cases, Python already knows that its supposed to be the global being used and there is no ambiguity.
In your sample code, you are setting a key/value pair on a global dict, not assigning to a global variable:
def update_dict(key):
my_dict[key] = ...
The following would be a case where you need global in order to modify the global b:
b = 0
def a():
global b
b = 2
print(b)
a()
print(b)
Output:
0
2
Create graph:-
def loadGraphFile(file):
graph = []
for line in file:
contents = line.split()
movieName = contents[0]
actorNames = [contents[i]+ " " + contents[i+1] for i in range(1, len(contents), 2)]
movieNode = findNode(graph, movieName)
if movieNode == None:
movieNode = mkNode(movieName)
graph.append(movieNode)
for actorName in actorNames:
actorNode = findNode(graph,actorName)
if actorNode == None:
actorNode = mkNode(actorName)
graph.append(actorNode)
actorNode.neighbor.append(movieNode)
movieNode.neighbor.append(actorNode)
return graph
def loadGraphFileName('file.text'):
return loadGraphFile(Open('file.text'))
You declared your function wrong:
def loadGraphFileName('file.text'): # change this
return loadGraphFile(Open('file.text'))
To this:
def loadGraphFileName(): # You don't use it anyway
return loadGraphFile(Open('file.text'))
Or:
def loadGraphFileName(filename='file.text'): # file.text will be the default. if you give an parameter with it, filename will change to that parameter
return loadGraphFile(Open(filename)) # And use it here
You cannot have literals as function params
You can instead do
def loadGraphFileName(f = 'file.txt'):
return loadGraphFile(Open(f))
Here's what I am trying to do:
I've built a mini-system that allows for user registering & so on, but the system is very dependent on db_parse() and user_exists(), because that are the main two conditionals for the whole script to run.
Basically I am testing if an user exists with user_exists('username') which should return a "True" (It's a dict which has a value of either True/False).
So, here's the whole code to it (Please excuse the indendation:
class __system():
def __init__(self):
self.usernames = []
self.passwords = []
self.dbname = 'database.txt'
self.privilege = [1,2,3]
self.backupdb = 'backup.txt'
def db_parse(self):
d = {'username':[],
'uid':[],
'password':[],
'pwdid':[]
}
with open(self.dbname,'r') as f:
lines = ([line.rstrip() for line in f])
f.flush()
for x in xrange(0,len(lines)):
if x%2==0:
d['username'].append(lines[x])
d['uid'].append(x) #-> number of line in the file
if x%2==1:
d['password'].append(lines[x])
d['pwdid'].append(x)
print lines
f.close()
return d
def user_exists(self, username=''):
d = {'exists': None,
'uid': None
}
db = self.db_parse()
ylen = len(db['username'])
for y in range(0,ylen):
if username == db['username'][y]:
d['exists'] = True
d['uid'] = db['uid'][y]
else:
d['exists'] = False
d['uid'] = None
return d
def main():
obj = __system()
print obj.user_exists('user1')
if __name__ == "__main__":
main()
The 'database.txt' is looking like this:
user1<br>
203ad5ffa1d7c650ad681fdff3965cd2<br>
user2<br>
6e809cbda0732ac4845916a59016f954<br>
How can I say this...this sometimes work, this doesn't and I've done debugging for 10 hours straight (Yea, that's right.)
I can't seem to catch why it returns "False" and "uid:0" when the user clearly exists and then, 5 minutes later, only re-pasting the code, it does work.
You're going to kick yourself for this, but the issue is here:
for y in range(0,ylen):
if username == db['username'][y]:
d['exists'] = True
d['uid'] = db['uid'][y]
else:
d['exists'] = False
d['uid'] = None
return d
If username matches the first user in the file, your for loop continues on to the second user in the file, which, of course, won't match. So it ends up returning False/None. You just need to add a break if a match is found:
for y in range(0,ylen):
if username == db['username'][y]:
d['exists'] = True
d['uid'] = db['uid'][y]
break # Add this
else:
d['exists'] = False
d['uid'] = None
return d
As an aside, you don't need to call f.close() if you're opening the file using with open(...) as f. The file will automatically close when you leave the with block. You should also use for x, line in enumerate(lines): instead of for x in xrange(0, len(lines)):
I have a text file with entries that look like this :
JohnDoe
Assignment 9
Reading: NO
header: NO
HW: NO
Solutions: 0
show: NO
Journals: NO
free: NO
Finished: NO
Quiz: 0
Done
Assignment 3
E-book: NO
HW: NO
Readings: NO
Show: 0
Journal: NO
Study: NO
Test: NO
Finished: NO
Quiz: 0
Done
This is a small sample. The file has several students in it. Each student has two assignments under their name and they only pass if the line that starts with "Finished" in each assignment reads "Finished: YES". All of the data under each assignment is disorganized, but somewhere under each assignment a line will say "Finished: YES (or NO)" I need a way to read the file and say whether or not any of the students have passed. So far, I have
def get_entries( file ):
with open( "dicrete.txt.rtf", 'rt') as file:
for line in file:
if "Finished" in line:
finished, answer = line.split(':')
yield finished, answer
# dict takes a sequence of `(key, value)` pairs and turns in into a dict
print dict(get_entries( file ))
I can only get this code to return a single entry (the first "Finished" it reads as key and "YES or NO" as value, which is what I want, but I want it to return Every line in the file that that starts with "Finished". So the sample data I provided I want to return a dict with 2 entries {Finished:"NO" , Finished:"NO"}
Dictionaries can only store one mapping per key. So, you can never have a dictionary that has two different entries for the same key.
Consider using a list of two-tuples instead, like [("Finished", "NO"), ("Finished", "NO")].
Sounds like you need a better data model! Let's look at that, shall we?
Let's define an Assignment class that we can call with all the lines of text between Assignment: # and Finished: YES/NO.
class Assignment(object):
def __init__(self, id, *args, **kwargs):
self.id = id
for key,val in kwargs.items():
setattr(self, key.lower(), val)
finished = getattr(self, 'finished', None)
if finished is None:
raise AttributeError("All assignments must have a 'finished' value")
else:
self.finished = True if finished.lower() == "yes" else False
#classmethod
def from_string(cls, s):
"""Builds an Assignment object from a string
a = Assignment.from_string('''Assignment: 1\nAttributes: Go Here\nFinished: yes''')
>>> a.id
1
>>> a.finished
True"""
d = dict()
id = None
for line in s.splitlines():
key,*val = map(str.strip, line.split(":"))
val = ' '.join(val) or None
if key.lower().startswith('assignment'):
id = int(key.split()[-1])
continue
d[key.lower()] = val
if id is not None:
return cls(id, **d)
else:
raise ValueError("No 'Assignment' field in string {}".format(s))
Once you have your model, you'll need to parse your input. Luckily this is actually pretty simple.
def splitlineson(s, sentinel):
"""splits an iterable of strings into a newline separated string beginning with each sentinel.
>>> s = ["Garbage", "lines", "SENT$", "first", "group", "SENT$", "second", "group"]
>>> splitlineson(s, "SENT$")
iter("SENT$\nfirst\ngroup",
"SENT$\nsecond\ngroup")"""
lines = []
for line in s:
if line.lower().strip().startswith(sentinel.lower()):
if any((sentinel.lower() in line.lower() for line in lines)):
yield "\n".join(lines)
lines = [line.strip()]
else:
if line:
lines.append(line.strip())
yield "\n".join(lines)
with open('path/to/textfile.txt') as inf:
assignments = splitlineson(inf, "assignment ")
assignment_list = [Assignment.from_string(a) for a in assignments]
IndentationError: unexpected unindent WHY???
#!/usr/bin/python
import sys
class Seq:
def __init__(self, id, adnseq, colen):
self.id = id
self.dna = adnseq
self.cdnlen = colen
self.prot = ""
def __str__(self):
return ">%s\n%s\n" % (self.id, self.prot)
def translate(self, transtable):
self.prot = ""
for i in range(0,len(self.dna),self.cdnlen):
codon = self.dna[i:i+self.cdnlen]
aa = transtable[codon]
self.prot += aa
def parseCommandOptions(cmdargs):
tfname = cmdargs[1]
sfname = cmdargs[2]
return (tfname, sfname)
def readTTable(fname):
try:
ttable = {}
cdnlen = -1
tfile = open(fname, "r")
for line in tfile:
linearr = line.split()
codon = linearr[0]
cdnlen = len(codon)
aa = linearr[1]
ttable[codon] = aa
tfile.close()
return (ttable, cdnlen)
def translateSData(sfname, cdnlen, ttable):
try:
sequences = []
seqf = open(seq_fname, "r")
line = seqf.readline()
while line:
if line[0] == ">":
id = line[1:len(line)].strip()
seq = ""
line = seqf.readline()
while line and line[0] != '>':
seq += line.strip()
line = seqf.readline()
sequence = Seq(id, seq, cdnlen)
sequence.translate(ttable)
sequences.append(sequence)
seqf.close()
return sequences
if __name__ == "__main__":
(trans_table_fname, seq_fname) = parseCommandOptions(sys.argv)
(transtable, colen) = readTTable(trans_table_fname)
seqs = translateSData(seq_fname, colen, transtable)
for s in seqs:
print s
It says:
def translateSeqData(sfname, cdnlen, ttable):
^
IndentationError: unexpected unindent
WHY? I have checked a thousands times and I can't find the problem. I have only used Tabs and no spaces. Plus, sometimes it asks to define the class. Is that Ok?
It's because you have:
def readTTable(fname):
try:
without a matching except block after the try: block. Every try must have at least one matching except.
See the Errors and Exceptions section of the Python tutorial.
you didn't complete your try statement. You need and except in there too.
This error could actually be in the code preceding where the error is reported. See the For example, if you have a syntax error as below, you'll get the indentation error. The syntax error is actually next to the "except" because it should contain a ":" right after it.
try:
#do something
except
print 'error/exception'
def printError(e):
print e
If you change "except" above to "except:", the error will go away.
Good luck.
#MaxPython The answer below is missing syntax ":"
try:
#do something
except:
# print 'error/exception'
def printError(e):
print e