I want to write an INI file with duplicate options,ie:
[test]
foo = value1
foo = value2
xxx = yyy
With ConfigParser.set only the last value is writed.
config = ConfigParser.ConfigParser()
config.read('example.cfg')
config.add_section('test')
config.set('test', service['foo'], service['value1'])
config.set('test', service['foo'], service['value2'])
config.set('test', service['xxx'], service['yyy'])
The result is:
[test]
foo = value2
xxx = yyy
Is there any way?
It looks like it isn't possible in a simple way. The default way ConfigParser stores is with dict's, i.e. one value per unique key.
In a similar question Python's ConfigParser unique keys per section the suggestions are to go with:
CongfigObj
Patched version of epydoc
i have a simple custom .ini parser in python (built for another project), which uses a list to store values but only if they are not in key=value format. if key=value then last key will be held since these are stored in a dictionary
The parser can also parse nested sections like:
[SECTION1][SECTION2]
key1=value1
; etc..
The code is below, it is easy to modify to store key/value in list instead of dictionary or even detect multiple key and rename to avoid collisions (e.g key, key$1 second key with same key value key and so on). use/modify as needed
##
#
# Simple .ini Parser for Python 2.x, 3.x
#
##
import re
class Ini_Parser():
"""Simple .ini parser for Python"""
NL = None
ACTUAL = {
'\\n' : "\n",
'\\t' : "\t",
'\\v' : "\v",
'\\f' : "\f"
}
def parseStr(s, q):
_self = Ini_Parser
endq = s.find(q, 1)
quoted = s[1:endq]
rem = s[endq+1:].strip()
for c,actual in _self.ACTUAL.items():
quoted = ( actual ).join( quoted.split( c ) )
quoted = ( '\\' ).join( quoted.split( '\\\\' ) )
return quoted, rem
def fromString(s, keysList=True, rootSection='_'):
_self = Ini_Parser
comments = [';', '#']
if rootSection: rootSection = str(rootSection)
else: rootSection = '_'
if not _self.NL:
_self.NL = re.compile(r'\n\r|\r\n|\r|\n')
sections = {}
currentSection = str(rootSection)
if keysList:
sections[currentSection] = { '__list__' : [] }
else:
sections[currentSection] = { }
currentRoot = sections
# parse the lines
lines = re.split(_self.NL, str(s))
# parse it line-by-line
for line in lines:
# strip the line of extra spaces
line = line.strip()
lenline = len(line)
# comment or empty line, skip it
if not lenline or (line[0] in comments): continue
linestartswith = line[0]
# section line
if '['==linestartswith:
SECTION = True
# parse any sub-sections
while '['==linestartswith:
if SECTION:
currentRoot = sections
else:
currentRoot = currentRoot[currentSection]
SECTION = False
endsection = line.find(']', 1)
currentSection = line[1:endsection]
if currentSection not in currentRoot:
if keysList:
currentRoot[currentSection] = { '__list__' : [] }
else:
currentRoot[currentSection] = { }
# has sub-section ??
line = line[endsection+1:].strip()
if not len(line): break
linestartswith = line[0]
# key-value pairs
else:
# quoted string
if '"'==linestartswith or "'"==linestartswith:
key, line = _self.parseStr(line, linestartswith)
# key-value pair
if line.find('=', 0)>-1:
line = line.split('=')
line.pop(0)
value = "=".join(line).strip()
valuestartswith = value[0]
# quoted value
if '"'==valuestartswith or "'"==valuestartswith:
value, rem = _self.parseStr(value, valuestartswith)
currentRoot[currentSection][key] = value
# single value
else:
if keysList:
currentRoot[currentSection]['__list__'].append(key)
else:
currentRoot[currentSection][key] = True
# un-quoted string
else:
line = line.split('=')
key = line.pop(0).strip()
# single value
if 1>len(line):
if keysList:
currentRoot[currentSection]['__list__'].append(key)
else:
currentRoot[currentSection][key] = True
# key-value pair
else:
value = "=".join(line).strip()
valuestartswith = value[0]
# quoted value
if '"'==valuestartswith or "'"==valuestartswith:
value, rem = _self.parseStr(value, valuestartswith)
currentRoot[currentSection][key] = value
return sections
def fromFile(filename, keysList=True, rootSection='_'):
s = ''
with open(filename, 'r') as f: s = f.read()
return Ini_Parser.fromString(s, keysList, rootSection)
def walk(o, key=None, top='', q='', EOL="\n"):
s = ''
if len(o):
o = dict(o)
if key: keys = [key]
else: keys = o.keys()
for section in keys:
keyvals = o[section]
if not len(keyvals): continue
s += str(top) + "[" + str(section) + "]" + EOL
if ('__list__' in keyvals) and len(keyvals['__list__']):
# only values as a list
s += q + (q+EOL+q).join(keyvals['__list__']) + q + EOL
del keyvals['__list__']
if len(keyvals):
for k,v in keyvals.items():
if not len(v): continue
if isinstance(v, dict) or isinstance(v, list):
# sub-section
s += Ini_Parser.walk(keyvals, k, top + "[" + str(section) + "]", q, EOL)
else:
# key-value pair
s += q+k+q+ '=' +q+v+q + EOL
s += EOL
return s
def toString(o, rootSection='_', quote=False, EOL="\n"):
s = ''
if rootSection: root = str(rootSection)
else: root = '_'
if quote: q = '"'
else: q = ''
# dump the root section first, if exists
if root in o:
section = dict(o[root])
llist = None
if '__list__' in section:
llist = section['__list__']
if llist and isinstance(llist, list) and len(llist):
s += q + (q+EOL+q).join(llist) + q + EOL
del section['__list__']
for k,v in section.items():
if not len(v): continue
s += q+k+q+ '=' +q+v+q + EOL
s += EOL
del o[root]
# walk the sections and sub-sections, if any
s += Ini_Parser.walk(o, None, '', q, EOL)
return s
def toFile(filename, o, rootSection='_', quote=False, EOL="\n"):
with open(filename, 'w') as f:
f.write( Ini_Parser.toString(o, rootSection, quote, EOL) )
# for use with 'import *'
__all__ = [ 'Ini_Parser' ]
Related
I have two lists of multiline strings and I try to get the the diff lines for these strings. First I tried to just split all lines of each string and handled all these strings as one big "file" and get the diff for it but I had a lot of bugs. I cannot just diff by index since I do not know, which multiline string was added, which was deleted and which one was modified.
Lets say I had the following example:
import difflib
oldList = ["one\ntwo\nthree","four\nfive\nsix","seven\neight\nnine"]
newList = ["four\nfifty\nsix","seven\neight\nnine","ten\neleven\ntwelve"]
oldAllTogether = []
for string in oldList:
oldAllTogether.extend(string.splitlines())
newAllTogether = []
for string in newList:
newAllTogether.extend(string.splitlines())
diff = difflib.unified_diff(oldAllTogether,newAllTogether)
So I somehow have to find out, which strings belong to each other.
I had to implmenent my own code in order to get the desired output. It is basically the same as Differ.compare() with the difference that we have a look at multiline blocks instead of lines. So the code would be:
diffString = ""
oldList = ["one\ntwo\nthree","four\nfive\nsix","seven\neight\nnine"]
newList = ["four\nfifty\nsix","seven\neight\nnine","ten\neleven\ntwelve"]
a = oldList
b = newList
cruncher = difflib.SequenceMatcher(None, a, b)
for tag, alo, ahi, blo, bhi in cruncher.get_opcodes():
if tag == 'replace':
best_ratio, cutoff = 0.74, 0.75
oldstrings = a[alo:ahi]
newstrings = b[blo:bhi]
for j in range(len(newstrings)):
newstring = newstrings[j]
cruncher.set_seq2(newstring)
for i in range(len(oldstrings)):
oldstring = oldstrings[i]
cruncher.set_seq1(oldstring)
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_old, best_new = cruncher.ratio(), i, j
if best_ratio < cutoff:
#added string
stringLines = newstring.splitlines()
for line in stringLines: diffString += "+" + line + "\n"
else:
#replaced string
start = False
for diff in difflib.unified_diff(oldstrings[best_old].splitlines(),newstrings[best_new].splitlines()):
if start:
diffString += diff + "\n"
if diff[0:2] == '##':
start = True
del oldstrings[best_old]
#deleted strings
stringLines = []
for string in oldstrings:
stringLines.extend(string.splitlines())
for line in stringLines: diffString += "-" + line + "\n"
elif tag == 'delete':
stringLines = []
for string in a[alo:ahi]:
stringLines.extend(string.splitlines())
for line in stringLines:
diffString += "-" + line + "\n"
elif tag == 'insert':
stringLines = []
for string in b[blo:bhi]:
stringLines.extend(string.splitlines())
for line in stringLines:
diffString += "+" + line + "\n"
elif tag == 'equal':
continue
else:
raise ValueError('unknown tag %r' % (tag,))
which result in the following:
print(diffString)
four
-five
+fifty
six
-one
-two
-three
+ten
+eleven
+twelve
I extracted some specific names of text. The text and function are described below :
import re
text = '''
def cal_revenue(revenues_store, profit_margin, average_profit):
average_profit = revenues_store * profit_margin
return average_profit
'''
# Extract names
lines = text.split('\n')
for line in lines:
x = re.search(r"^def.*:$", line)
if x != None:
values = x[0].split('def ')[1].split('(')
function_name = values[0]
arguments = values[1][:-2].split(', ')
print(f"Function Name: {function_name}")
print(f"Arguments: {arguments}")
This function works well and gives the expected results. Now I want to store all of these results in separate dictionaries
# Create dictionaries
splited_table1= dict()
splited_table2= dict()
# Extract names
def extraction_variables(text):
lines = text.split('\n')
for line in lines:
x = re.search(r"^def.*:$", line)
if x != None:
values = x[0].split('def ')[1].split('(')
splited_table1 = values[0]
splited_table2 = values[1][:-2].split(', ')
return splited_table1, splited_table2
extraction_variables(text)
splited_table1
splited_table2
But after execution of this command dictionaries are empty. So can anybody help me how to store values in dictionaries from the function above?
Try this:
import re
text = '''
def cal_revenue(revenues_store, profit_margin, average_profit):
average_profit = revenues_store * profit_margin
return average_profit
'''
splited_table1 = {}
splited_table2 = {}
# Extract names
def extraction_variables(text):
lines = text.split('\n')
for line in lines:
x = re.search(r"^def.*:$", line)
if x is not None:
values = x[0].split('def ')[1].split('(')
# function_name = values[0]
# arguments = values[1][:-2].split(', ')
splited_table1 = values[0]
splited_table2 = values[1][:-2].split(', ')
return splited_table1, splited_table2
e = extraction_variables(text)
print(e)
Not much modified but working for me.
if not working, you need to show the output of your code
I am trying to save this dict code into a .csv file, with one column for the key and one column for the frequency counted. But I when I try to run the code below, I get an error at the end for line 6. What am I doing wrong?
encryptedA_edited = abcfreq(encryptedA)
import csv
with open("EncryptA_Edited.csv", "w", newline="") as ECA:
writer = csv.writer(ECA)
writer.writerows(encryptedA_edited.items())
'NoneType' object has no attribute 'items'
This is my function:
import string
encryptedA = open("encryptedA.txt")
encryptedA = encryptedA.read()
encryptedB = open("encryptedB.txt")
encryptedB = encryptedB.read()
def abcfreq(lettervalues):
lettervalues = lettervalues.lower().strip()
freq = {}
j =0
for x in string.ascii_lowercase:
freq[x] = 0
for j in lettervalues:
if j in freq:
freq[j] += 1
for key, value in (freq.items()):
print("%s:%d" % (key, value))
max_value = max(freq.values())
max_key = max(freq, key =freq.get)
print("The letter with the highest frequency is " + max_key +":"+str(max_value))
This should be what you're looking for.
Added a few functions for your convenience and shows how to view a file once it's been created
from random import choice, randint
import csv, os, string
def freq(element,iterable) -> int:
return sum(1 for i in iterable if i==element)
def sample(sequence,length):
'''
Just for "generating" test strings
breaks if the sequence does not have a __len__ attribute
'''
for i in range(length):
yield choice(sequence)
def ascii(omissions:str='',include:bool=False) -> str:
"""
A convenient ascii character set
Return an ascii character set excluding the given omissions:
"p" -> ' ' + punctuation
"u" -> uppercase
"l" -> lowercase
"d" -> digits
Feel free to omit combinations:
>>> ascii('lup')
... 0123456789
or include them
>>> ascii('d',True)
... 0123456789
"""
d = {
"p":" "+string.punctuation,
"u":string.ascii_uppercase,
"l":string.ascii_lowercase,
"d":string.digits,
}
return "".join(d[k] for k in d if k in omissions) if include else "".join(d[k] for k in d if not k in omissions)
def weights(string,omissions='',include=False):
# return {i:freq(i,string) for i in set(string)} ## if you only want to measure elements of the string
return {i:freq(i,string) for i in ascii(omissions,include)}
heaviest = lambda string: max(string,key=weights(string).get)
if __name__ == '__main__':
s = 'abcda'
print(s,heaviest(s),sep='\n\t') # 'a'
strings = [''.join(sample(ascii('l',True)[:4],randint(3,5))) for i in range(4)]
for s in strings:
path = s+'.csv'
with open(path, "w", newline="") as ECA:
writer = csv.writer(ECA)
writer.writerows(weights(s,'l',True).items())
os.startfile(path)
I am working on RNA sequence matching
seq = 'UCAGCUGUCAGUCAUGAUC'
sub_seq =['UGUCAG', 'CAGUCA', 'UCAGCU','GAUC']
I am matching the sub_seq to the seq, matched sub_seq is under the seq, if there is no matched, use dash line. Output looks like this:
UCAGCUGUCAGUCAUGAUC
UCAGCU--CAGUCA-GAUC
-----UGUCAG--------
I try to use the dictionary to do this
index_dict = {}
for i in xrange(len(sub_seq)):
index_dict[seq.find(sub_seq[i])] = {}
index_dict[seq.find(sub_seq[i])]['sequence'] = sub_seq[i]
index_dict[seq.find(sub_seq[i])]['end_index'] = seq.find(sub_seq[i]) + len(sub_seq[i]) - 1
I cannot figure out the algorithm to do alignment, any help will be appreciated!
seq_l = len(seq)
for ele in sub_seq:
start = seq.find(ele)
ln = len(ele)
if start != -1:
end = start + ln
print("-" * start + ele + "-"*(seq_l- end))
else:
print("-" * seq_l)
-----UGUCAG--------
--------CAGUCA-----
UCAGCU-------------
---------------GAUC
Not sure where UCAGCU--CAGUCA-GAUC comes from as you are only using a single sub sequence at a time in your code
Assuming you'll let me change your index_dict slightly, consider:
seq = 'UCAGCUGUCAGUCAUGAUC'
sub_seq =['UGUCAG', 'CAGUCA', 'UCAGCU','GAUC']
index_dict = {}
for i in xrange(len(sub_seq)):
index_dict[seq.find(sub_seq[i])] = {
'sequence': sub_seq[i],
'end_index': seq.find(sub_seq[i]) + len(sub_seq[i]) # Note this changed
}
sorted_keys = sorted(index_dict)
lines = []
while True:
if not sorted_keys: break
line = []
next_index = 0
for k in sorted_keys:
if k >= next_index:
line.append(k)
next_index = index_dict[k]['end_index']
# Remove keys we used, append line to lines
for k in line: sorted_keys.remove(k)
lines.append(line)
# Build output lines
olines = []
for line in lines:
oline = ''
for k in line:
oline += '-' * (k - len(oline)) # Add dashes before subseq
oline += index_dict[k]['sequence'] # Add subsequence
oline += '-' * (len(seq) - len(oline)) # Add trailing dashes
olines.append(oline)
print seq
print '\n'.join(olines)
Output:
UCAGCUGUCAGUCAUGAUC
UCAGCU--CAGUCA-GAUC
-----UGUCAG--------
Note this is pretty verbose, and could be condensed a bit. The while True and for line in lines loops could probably be merged into one, but it should help explain one possible approach.
Edit: This is one way you might join the last two loops:
seq = 'UCAGCUGUCAGUCAUGAUC'
sub_seq =['UGUCAG', 'CAGUCA', 'UCAGCU','GAUC']
index_dict = {}
for i in xrange(len(sub_seq)):
index_dict[seq.find(sub_seq[i])] = {
'sequence': sub_seq[i],
'end_index': seq.find(sub_seq[i]) + len(sub_seq[i]) # Note this changed
}
sorted_keys = sorted(index_dict)
lines = []
while True:
if not sorted_keys: break
line = ''
next_index = 0
keys_used = []
for k in sorted_keys:
if k >= next_index:
line += '-' * (k - len(line)) # Add dashes before subseq
line += index_dict[k]['sequence'] # Add subsequence
next_index = index_dict[k]['end_index'] # Update next_index
keys_used.append(k) # Mark key as used
for k in keys_used: sorted_keys.remove(k) # Remove used keys
line += '-' * (len(seq) - len(line)) # Add trailing dashes
lines.append(line) # Add line to lines
print seq
print '\n'.join(lines)
Output:
UCAGCUGUCAGUCAUGAUC
UCAGCU--CAGUCA-GAUC
-----UGUCAG--------
I'm trying to remove a key/value pair if the key contains 'empty' values.
I have tried the following dictionary comprehension and tried doing it in long form, but it doesn't seem to actually do anything and I get no errors.
def get_Otherfiles():
regs = ["(.*)((U|u)ser(.*))(\s=\s\W\w+\W)", "(.*)((U|u)ser(.*))(\s=\s\w+)", "(.*)((P|p)ass(.*))\s=\s(\W(.*)\W)", "(.*)((P|p)ass(.*))(\s=\s\W\w+\W)"]
combined = "(" + ")|(".join(regs) + ")"
cred_results = []
creds = []
un_matched = []
filesfound = []
d = {}
for root, dirs, files in os.walk(dir):
for filename in files:
if filename.endswith(('.bat', '.vbs', '.ps', '.txt')):
readfile = open(os.path.join(root, filename), "r")
d.setdefault(filename, [])
for line in readfile:
m = re.match(combined, line)
if m:
d[filename].append(m.group(0).rstrip())
else:
pass
result = d.copy()
result.update((k, v) for k, v in d.iteritems() if v is not None)
print result
Current output:
{'debug.txt': [], 'logonscript1.vbs': ['strUser = "guytom"', 'strPassword = "P#ssw0rd1"'], 'logonscript2.bat': ['strUsername = "guytom2"', 'strPass = "SECRETPASSWORD"']}
As you can see I have entries with empty values. I'd like to remove these before printing the data.
In this part of your code:
d.setdefault(filename, [])
for line in readfile:
m = re.match(combined, line)
if m:
d[filename].append(m.group(0).rstrip())
else:
pass
You always add filename as a key to the dictionary, even if you don't subsequently add anything to the resulting list. Try
for line in read file:
m = re.match(combined, line)
if m:
d.setdefault(filename, []).append(m.group(0).rstrip())
which will only initialize d[filename] to an empty list if it is actually necessary to have something on which to call append.
result = dict((k, v) for k, v in d.iteritems() if v is not None)
update wont remove entries ... it will only add or change
a = {"1":2}
a.update({"2":7})
print a # contains both "1" and "2" keys
Looking at the first matching group in your regex, (.*), if the regex matches but there are no characters to match, group(0) is "", not None. So, you can filter there.
result.update((k, v) for k, v in d.iteritems() if not v)
But you can also have your regex do that part for you. Change that first group to (.+) and you won't have empty values to filter out.
EDIT
Instead of removing empty values at the end, you can avoid adding them to the dict altogether.
def get_Otherfiles():
# fixes: make it a raw string so that \s works right and
# tighten up filtering, ... (U|u) should probably be [Uu] ...
regs = ["(.+)\s*((U|u)ser(.*))(\s=\s\W\w+\W)", "(.*)((U|u)ser(.*))(\s=\s\w+)", "(.*)((P|p)ass(.*))\s=\s(\W(.*)\W)", "(.*)((P|p)ass(.*))(\s=\s\W\w+\W)"]
combined = "(" + ")|(".join(regs) + ")"
cred_results = []
creds = []
un_matched = []
filesfound = []
d = {}
for root, dirs, files in os.walk(dir):
for filename in files:
if filename.endswith(('.bat', '.vbs', '.ps', '.txt')):
readfile = open(os.path.join(root, filename), "r")
# assuming you want to aggregate matching file names...
content_list = d.get(filename, [])
content_orig_len = len(content_list)
for line in readfile:
m = re.match(combined, line)
if m:
content_list.append(m.group(0))
if len(content_list) > content_orig_len:
d[filename] = content_list