can python configObj process a line without '=' - python

I use python ConfigObj to load a config file, it works great if config file in pattern "cfgName=cfgvalue".
Now I need write config file in this way:
basket.ini
[favoFruit]
Apple
Orange
can (how) load this as a list favoFruit['Apple','Orange'] by ConfigObj?
Current I only can get error message Invalid line at line "2" when using cfgObj=ConfigObj('basket.ini')
The YAML or JSON can do this, my question is can ConfigObj do it too?

configobj doesn't support lists the way you're trying to use them, but as comma separated values:
[fruit]
favourite = Apple, Orange
In your code you just have to access the attribute as usual:
>>> cfg = configobj.ConfigObj('basket.ini')
>>> cfg['fruit']['favourite']
['Apple', 'Orange']
For more information, please have a look at this article.
Edit: If you really need to support configuration file with exactly the same format as in your question, note that it would be easy to write a custom parser for it:
import re
from collections import defaultdict
def parse(f):
data = defaultdict(list)
section = None
for line in f:
line = line.strip()
if not line:
continue
match = re.match('\[(?P<name>.*)\]', line)
if match:
section = match.group('name')
else:
data[section].append(line)
return data
cfg = parse(open('basket.ini'))
print cfg['favoFruit']
Example output:
['Apple', 'Orange']

Related

Read config file in python

I would like to read following config file having IPs defined for workers and managers. I tried configparser module but it requires key-value pairs. Anyone has any idea to read the below file using python, I would be thankful.
[managers]
1.2.3.4
[workers]
2.3.45.5
3.5.6.7
5.7.8.9
File may have random number of IPs.
If you reformat your data file as follows, you can then use configparser module to parse it. You can install it by doing pip install configparser
Data file
[managers]
ip = 1.2.3.4
[workers]
ip = 2.3.45.5
3.5.6.7
5.7.8.9
Sample Usage
from configparser import ConfigParser
# from ConfigParser import ConfigParser # for python3
data_file = 'tmp.txt'
config = ConfigParser()
config.read(data_file)
config.sections()
# ['managers', 'workers']
config['managers']['ip']
# '1.2.3.4'
config['workers']['ip']
#'2.3.45.5\n3.5.6.7\n5.7.8.9'
config['workers']['ip'].splitlines()
#['2.3.45.5', '3.5.6.7', '5.7.8.9']
Using a simple iteration.
Demo:
res = {}
temp = []
with open(filename, "r") as infile:
for line in infile: #Iterate over each line
line = line.strip()
if line.startswith("["): #Check if line is header
line = line.strip("[]")
res[line] = [] #Create Key
temp.append(line)
else:
res[temp[-1]].append(line) #Append Values.
print(res)
Output:
{'workers': ['2.3.45.5', '3.5.6.7', '5.7.8.9'], 'managers': ['1.2.3.4']}

How to remove comment lines from a JSON file in python

I am getting a JSON file with following format :
// 20170407
// http://info.employeeportal.org
{
"EmployeeDataList": [
{
"EmployeeCode": "200005ABH9",
"Skill": CT70,
"Sales": 0.0,
"LostSales": 1010.4
}
]
}
Need to remove the extra comment lines present in the file.
I tried with the following code :
import json
import commentjson
with open('EmployeeDataList.json') as json_data:
employee_data = json.load(json_data)
'''employee_data = json.dump(json.load(json_data))'''
'''employee_data = commentjson.load(json_data)'''
print(employee_data)`
Still not able to remove the comments from the file and bring
the JSON file in correct format.
Not getting where things are going wrong? Any direction in this regard is highly appreciated.Thanks in advance
You're not using commentjson correctly. It has the same interface as the json module:
import commentjson
with open('EmployeeDataList.json', 'r') as handle:
employee_data = commentjson.load(handle)
print(employee_data)
Although in this case, your comments are simple enough that you probably don't need to install an extra module to remove them:
import json
with open('EmployeeDataList.json', 'r') as handle:
fixed_json = ''.join(line for line in handle if not line.startswith('//'))
employee_data = json.loads(fixed_json)
print(employee_data)
Note the difference here between the two code snippets is that json.loads is used instead of json.load, since you're parsing a string instead of a file object.
Try JSON-minify:
JSON-minify minifies blocks of JSON-like content into valid JSON by removing all whitespace and JS-style comments (single-line // and multiline /* .. */).
I usually read the JSON as a normal file, delete the comments and then parse it as a JSON string. It can be done in one line with the following snippet:
with open(path,'r') as f: jsonDict = json.loads('\n'.join(row for row in f if not row.lstrip().startswith("//")))
IMHO it is very convenient because it does not need CommentJSON or any other non standard library.
Well that's not a valid json format so just open it like you would a text document then delete anything from// to \n.
with open("EmployeeDataList.json", "r") as rf:
with open("output.json", "w") as wf:
for line in rf.readlines():
if line[0:2] == "//"
continue
wf.write(line)
Your file is parsable using HOCON.
pip install pyhocon
>>> from pyhocon import ConfigFactory
>>> conf = ConfigFactory.parse_file('data.txt')
>>> conf
ConfigTree([('EmployeeDataList',
[ConfigTree([('EmployeeCode', '200005ABH9'),
('Skill', 'CT70'),
('Sales', 0.0),
('LostSales', 1010.4)])])])
If it is the same number of lines every time you can just do:
fh = open('EmployeeDataList.NOTjson',"r")
rawText = fh.read()
json_data = rawText[rawText.index("\n",3)+1:]
This way json_data is now the string of text without the first 3 lines.

Using ConfigParser to read non-standard config files

I am having a config file of the form
# foo.conf
[section1]
foo=bar
buzz=123
[section2]
line1
line2
line3
that I want to parse using the Python ConfigParser library. Note that section2 does not contain key/value pairs but some raw text instead. I would like to have a possibility to read all (raw) content of section2 to a variable.
Does ConfigParser allow me to read this file or can one of its classes be subclassed in an easy manner to do so?
Using the standard
import ConfigParser
config = ConfigParser.ConfigParser()
config.read('foo.conf')
yields ConfigParser.ParsingError: File contains parsing errors: foo.conf
You could try to use an io adapter to transform the input file in a format suitable for ConfigParser. A way for that would be to tranform plain line that are neither empty line, nor comment line, nor section lines not key=value line in linei=original_line, where i is increased at each line and starts at 1 in each section.
A possible code could be:
class ConfParsAdapter(io.RawIOBase):
#staticmethod
def _confParsAdapter(fd):
num=1
rxsec = re.compile('\[.*\]( *#.*)?$')
rxkv = re.compile('.+?=.*')
rxvoid = re.compile('(#.*)?$')
for line in fd:
if rxsec.match(line.strip()):
num=1
elif rxkv.match(line) or rxvoid.match(line.strip()):
pass
else:
line = 'line{}={}'.format(num, line)
num += 1
yield(line)
def __init__(self, fd):
self.fd = self._confParsAdapter(fd)
def readline(self, hint = -1):
try:
return next(self.fd)
except StopIteration:
return ""
That way, you could use with your current file without changing anything in it:
>>> parser = ConfigParser.RawConfigParser()
>>> parser.readfp(ConfParsAdapter(open('foo.conf'))
>>> parser.sections()
['section1', 'section2']
>>> parser.items('section2')
[('line1', 'line1'), ('line2', 'line2'), ('line3', 'line3')]
>>>
As far as I know,ConfigParser can not do this:
The ConfigParser class implements a basic configuration file parser
language which provides a structure similar to what you would find on
Microsoft Windows INI files.
It seems that your conf file is not a basic configuration file,so maybe two ways you can parse this conf file.
Read the conf file and modify it.
Generate a valid configuration file.

MRJOB open JSON file - Python

I am trying to load a json file as part of the mapper function but it returns "No such file in directory" although the file is existent.
I am already opening a file and parsing through its lines. But want to compare some of its values to a second JSON file.
from mrjob.job import MRJob
import json
import nltk
import re
WORD_RE = re.compile(r"\b[\w']+\b")
sentimentfile = open('sentiment_word_list_stemmed.json')
def mapper(self, _, line):
stemmer = nltk.PorterStemmer()
stems = json.loads(sentimentfile)
line = line.strip()
# each line is a json line
data = json.loads(line)
form = data.get('type', None)
if form == 'review':
bs_id = data.get('business_id', None)
text = data['text']
stars = data['stars']
words = WORD_RE.findall(text)
for word in words:
w = stemmer.stem(word)
senti = stems.get[w]
if senti:
yield (bs_id, (senti, 1))
You should not be opening a file in the mapper function at all. You only need to pass the file in as STDIN or as the first argument for the mapper to pick it up. Do it like this:
python mrjob_program.py sentiment_word_list_stemmed.json > output
OR
python mrjob_program.py < sentiment_word_list_stemmed.json > output
Either one will work. It says that there is no such file or directory because these mappers are not able to see the file that you are specifying. The mappers are designed to run on remote machines. Even if you wanted to read from a file in the mapper you would need to copy the file that you are passing to all machines in the cluster which doesn't really make sense for this example. You can actually specify a DEFAULT_INPUT_PROTOCOL so that the mapper know which type of input you are using as well.
Here is a talk on the subject that will help:
http://blip.tv/pycon-us-videos-2009-2010-2011/pycon-2011-mrjob-distributed-computing-for-everyone-4898987/
You are using the json.loads() function, while passing in an open file. Use json.load() instead (note, no s).
stems = json.load(sentimentfile)
You do need to re-open the file every time you call your mapper() function, better just store the filename globally:
sentimentfile = 'sentiment_word_list_stemmed.json'
def mapper(self, _, line):
stemmer = nltk.PorterStemmer()
stems = json.load(open(sentimentfile))
Last but not least, you should use a absolute path to the filename, and not rely on the current working directory being correct.

Using ConfigParser to read a file without section name

I am using ConfigParser to read the runtime configuration of a script.
I would like to have the flexibility of not providing a section name (there are scripts which are simple enough; they don't need a 'section'). ConfigParser will throw a NoSectionError exception, and will not accept the file.
How can I make ConfigParser simply retrieve the (key, value) tuples of a config file without section names?
For instance:
key1=val1
key2:val2
I would rather not write to the config file.
You can do this in a single line of code.
In python 3, prepend a fake section header to your config file data, and pass it to read_string().
from configparser import ConfigParser
parser = ConfigParser()
with open("foo.conf") as stream:
parser.read_string("[top]\n" + stream.read()) # This line does the trick.
You could also use itertools.chain() to simulate a section header for read_file(). This might be more memory-efficient than the above approach, which might be helpful if you have large config files in a constrained runtime environment.
from configparser import ConfigParser
from itertools import chain
parser = ConfigParser()
with open("foo.conf") as lines:
lines = chain(("[top]",), lines) # This line does the trick.
parser.read_file(lines)
In python 2, prepend a fake section header to your config file data, wrap the result in a StringIO object, and pass it to readfp().
from ConfigParser import ConfigParser
from StringIO import StringIO
parser = ConfigParser()
with open("foo.conf") as stream:
stream = StringIO("[top]\n" + stream.read()) # This line does the trick.
parser.readfp(stream)
With any of these approaches, your config settings will be available in parser.items('top').
You could use StringIO in python 3 as well, perhaps for compatibility with both old and new python interpreters, but note that it now lives in the io package and readfp() is now deprecated.
Alternatively, you might consider using a TOML parser instead of ConfigParser.
Alex Martelli provided a solution for using ConfigParser to parse .properties files (which are apparently section-less config files).
His solution is a file-like wrapper that will automagically insert a dummy section heading to satisfy ConfigParser's requirements.
Enlightened by this answer by jterrace, I come up with this solution:
Read entire file into a string
Prefix with a default section name
Use StringIO to mimic a file-like object
ini_str = '[root]\n' + open(ini_path, 'r').read()
ini_fp = StringIO.StringIO(ini_str)
config = ConfigParser.RawConfigParser()
config.readfp(ini_fp)
EDIT for future googlers: As of Python 3.4+ readfp is deprecated, and StringIO is not needed anymore. Instead we can use read_string directly:
with open('config_file') as f:
file_content = '[dummy_section]\n' + f.read()
config_parser = ConfigParser.RawConfigParser()
config_parser.read_string(file_content)
You can use the ConfigObj library to do that simply : http://www.voidspace.org.uk/python/configobj.html
Updated: Find latest code here.
If you are under Debian/Ubuntu, you can install this module using your package manager :
apt-get install python-configobj
An example of use:
from configobj import ConfigObj
config = ConfigObj('myConfigFile.ini')
config.get('key1') # You will get val1
config.get('key2') # You will get val2
The easiest way to do this is to use python's CSV parser, in my opinion. Here's a read/write function demonstrating this approach as well as a test driver. This should work provided the values are not allowed to be multi-line. :)
import csv
import operator
def read_properties(filename):
""" Reads a given properties file with each line of the format key=value. Returns a dictionary containing the pairs.
Keyword arguments:
filename -- the name of the file to be read
"""
result={ }
with open(filename, "rb") as csvfile:
reader = csv.reader(csvfile, delimiter='=', escapechar='\\', quoting=csv.QUOTE_NONE)
for row in reader:
if len(row) != 2:
raise csv.Error("Too many fields on row with contents: "+str(row))
result[row[0]] = row[1]
return result
def write_properties(filename,dictionary):
""" Writes the provided dictionary in key-sorted order to a properties file with each line of the format key=value
Keyword arguments:
filename -- the name of the file to be written
dictionary -- a dictionary containing the key/value pairs.
"""
with open(filename, "wb") as csvfile:
writer = csv.writer(csvfile, delimiter='=', escapechar='\\', quoting=csv.QUOTE_NONE)
for key, value in sorted(dictionary.items(), key=operator.itemgetter(0)):
writer.writerow([ key, value])
def main():
data={
"Hello": "5+5=10",
"World": "Snausage",
"Awesome": "Possum"
}
filename="test.properties"
write_properties(filename,data)
newdata=read_properties(filename)
print "Read in: "
print newdata
print
contents=""
with open(filename, 'rb') as propfile:
contents=propfile.read()
print "File contents:"
print contents
print ["Failure!", "Success!"][data == newdata]
return
if __name__ == '__main__':
main()
Having ran into this problem myself, I wrote a complete wrapper to ConfigParser (the version in Python 2) that can read and write files without sections transparently, based on Alex Martelli's approach linked on the accepted answer. It should be a drop-in replacement to any usage of ConfigParser. Posting it in case anyone in need of that finds this page.
import ConfigParser
import StringIO
class SectionlessConfigParser(ConfigParser.RawConfigParser):
"""
Extends ConfigParser to allow files without sections.
This is done by wrapping read files and prepending them with a placeholder
section, which defaults to '__config__'
"""
def __init__(self, *args, **kwargs):
default_section = kwargs.pop('default_section', None)
ConfigParser.RawConfigParser.__init__(self, *args, **kwargs)
self._default_section = None
self.set_default_section(default_section or '__config__')
def get_default_section(self):
return self._default_section
def set_default_section(self, section):
self.add_section(section)
# move all values from the previous default section to the new one
try:
default_section_items = self.items(self._default_section)
self.remove_section(self._default_section)
except ConfigParser.NoSectionError:
pass
else:
for (key, value) in default_section_items:
self.set(section, key, value)
self._default_section = section
def read(self, filenames):
if isinstance(filenames, basestring):
filenames = [filenames]
read_ok = []
for filename in filenames:
try:
with open(filename) as fp:
self.readfp(fp)
except IOError:
continue
else:
read_ok.append(filename)
return read_ok
def readfp(self, fp, *args, **kwargs):
stream = StringIO()
try:
stream.name = fp.name
except AttributeError:
pass
stream.write('[' + self._default_section + ']\n')
stream.write(fp.read())
stream.seek(0, 0)
return ConfigParser.RawConfigParser.readfp(self, stream, *args,
**kwargs)
def write(self, fp):
# Write the items from the default section manually and then remove them
# from the data. They'll be re-added later.
try:
default_section_items = self.items(self._default_section)
self.remove_section(self._default_section)
for (key, value) in default_section_items:
fp.write("{0} = {1}\n".format(key, value))
fp.write("\n")
except ConfigParser.NoSectionError:
pass
ConfigParser.RawConfigParser.write(self, fp)
self.add_section(self._default_section)
for (key, value) in default_section_items:
self.set(self._default_section, key, value)
Blueicefield's answer mentioned configobj, but the original lib only supports Python 2. It now has a Python 3+ compatible port:
https://github.com/DiffSK/configobj
APIs haven't changed, see it's doc.

Categories