JSON dumps custom formatting - python

I'd like to dump a Python dictionary into a JSON file with a particular custom format. For example, the following dictionary my_dict,
'text_lines': [{"line1"}, {"line2"}]
dumped with
f.write(json.dumps(my_dict, sort_keys=True, indent=2))
looks like this
"text_lines": [
{
"line1"
},
{
"line2"
}
]
while I prefer that it looks like this
"text_lines":
[
{"line1"},
{"line2"}
]
Similarly, I want the following
"location": [
22,
-8
]
to look like this
"location": [22, -8]
(that is, more like a coordinate, which it is).
I know that this is a cosmetic issue, but it's important to me to preserve this formatting for easier hand editing of the file.
Any way of doing this kind of customisation? An explained example would be great (the docs did not get me very far).

I have used the example provided by Tim Ludwinski and adapted it to my preference:
class CompactJSONEncoder(json.JSONEncoder):
"""A JSON Encoder that puts small lists on single lines."""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.indentation_level = 0
def encode(self, o):
"""Encode JSON object *o* with respect to single line lists."""
if isinstance(o, (list, tuple)):
if self._is_single_line_list(o):
return "[" + ", ".join(json.dumps(el) for el in o) + "]"
else:
self.indentation_level += 1
output = [self.indent_str + self.encode(el) for el in o]
self.indentation_level -= 1
return "[\n" + ",\n".join(output) + "\n" + self.indent_str + "]"
elif isinstance(o, dict):
self.indentation_level += 1
output = [self.indent_str + f"{json.dumps(k)}: {self.encode(v)}" for k, v in o.items()]
self.indentation_level -= 1
return "{\n" + ",\n".join(output) + "\n" + self.indent_str + "}"
else:
return json.dumps(o)
def _is_single_line_list(self, o):
if isinstance(o, (list, tuple)):
return not any(isinstance(el, (list, tuple, dict)) for el in o)\
and len(o) <= 2\
and len(str(o)) - 2 <= 60
#property
def indent_str(self) -> str:
return " " * self.indentation_level * self.indent
def iterencode(self, o, **kwargs):
"""Required to also work with `json.dump`."""
return self.encode(o)
Also see the version I have in use.

Here's something that I hacked together. Not very pretty but it seems to work. You could probably handle simple dictionaries in a similar way.
class MyJSONEncoder(json.JSONEncoder):
def __init__(self, *args, **kwargs):
super(MyJSONEncoder, self).__init__(*args, **kwargs)
self.current_indent = 0
self.current_indent_str = ""
def encode(self, o):
#Special Processing for lists
if isinstance(o, (list, tuple)):
primitives_only = True
for item in o:
if isinstance(item, (list, tuple, dict)):
primitives_only = False
break
output = []
if primitives_only:
for item in o:
output.append(json.dumps(item))
return "[ " + ", ".join(output) + " ]"
else:
self.current_indent += self.indent
self.current_indent_str = "".join( [ " " for x in range(self.current_indent) ])
for item in o:
output.append(self.current_indent_str + self.encode(item))
self.current_indent -= self.indent
self.current_indent_str = "".join( [ " " for x in range(self.current_indent) ])
return "[\n" + ",\n".join(output) + "\n" + self.current_indent_str + "]"
elif isinstance(o, dict):
output = []
self.current_indent += self.indent
self.current_indent_str = "".join( [ " " for x in range(self.current_indent) ])
for key, value in o.items():
output.append(self.current_indent_str + json.dumps(key) + ": " + self.encode(value))
self.current_indent -= self.indent
self.current_indent_str = "".join( [ " " for x in range(self.current_indent) ])
return "{\n" + ",\n".join(output) + "\n" + self.current_indent_str + "}"
else:
return json.dumps(o)
NOTE: It's pretty much unnecessary in this code to be inheriting from JSONEncoder.

You will need to create a subclass of the json.JSONEncoder class and override the methods
for each type of value so they write the format you need. You may end up re-implementing
most of them, depending on what your formatting needs are.
http://docs.python.org/2/library/json.html has an example for extending
the JSONEncoder.

Related

TypeError: unbound method getInterfaceName() must be called with Interface instance as first argument (got nothing instead)

class Unit:
def init(self, _chassisId, _unitNo, _interface):
self.chassisId = _chassisId
self.unitNo = _unitNo
self.interface = _interface
def getInterface(self):
return self.interface
#staticmethod
def parse(elem):
unitList = elem.find(UNIT+LIST)
chassisList = []
for unit in unitList.findall(UNIT):
try:
unitNumber = unit.find(UNIT_NUMBER).text
interface = unit.find(INTERFACE)
interface = ""
chassisIdElem = unit.find(CHASSIS_ID)
chassisId = ""
if chassisIdElem is not None:
chassisId = unit.find(CHASSIS_ID).text
elif unit.find(BURNED_IN_MAC) is not None:
chassisId = unit.find(BURNED_IN_MAC).text
chassisId = chassisId.replace(".", "").replace(":", "").upper()
chassis = Unit(chassisId, interface, unitNumber)
chassisList.append(chassis)
except Exception as e:
print "Unit details not found", e
return chassisList
def getChassisId(self):
return self.chassisId
def __str__(self):
str = "\n"
str += "\nUnit Details:- "
len = str.__len__();
str += "\n"
for i in range(1,len-1):
str += "-"
str += "\nUnit: " + self.unitNo
str += "\nChassis Id: " + self.chassisId
str += "\nInterfaces: " + self.interfaces
return str
def __add__(self, other):
return str(self) + other
def __radd__(self, other):
return other + str(self)
class Interface:
def init(self, _linkState, _interfaceName):
self.linkState = _linkState
self.interfaceName = _interfaceName
#staticmethod
def parse(elem):
prefix = Device.getPrefix(elem.tag)
interfaceList = elem.find(INTERFACE + LIST)
interfaceNameTag = eval(prefix + "_INTERFACE_NAME")
linkStateTag = eval(prefix + "_LINK_STATE")
interfaces = []
for interface in interfaceList.findall(INTERFACE):
try:
interfaceName = interface.find(interfaceNameTag).text
linkStateElem = interface.find(LINK_STATE)
linkState = ""
if linkStateElem is not None:
linkState = interface.find(LINK_STATE).text
elif interface.find(LINE_PROTOCOL) is not None:
linkState = interface.find(LINE_PROTOCOL).text
interface = Interface(linkState, Name)
interfaces.append(interface)
except Exception as e:
print "Interface details not found", e
return interfaces
def getLinkState(self):
return self.linkState
def getInterfaceName(self):
return self.interfaceName
def __str__(self):
str = "\n"
str += "\nInterface Details:- "
len = str.__len__();
str += "\n"
for i in range(1,len-1):
str += "-"
str += "\nLink State: " + self.linkState
str += "\nInterface Name: " + self.interfaceName
return str
def __add__(self, other):
return str(self) + other
def __radd__(self, other):
return other + str(self)
You haven't shown us the call to getInterfaceName() that causes the error, which makes it harder to help you.
However, I'll guess that the call looks something like this:
something = Interface.getInterfaceName()
You can't do it that way. You must create an instance of Interface, and then call its .getInterfaceName() method:
myInterface = Interface()
something = myInterface.getInterfaceName()

Extracting all path from a multi-level dictionary

I have a dictionary like this:
dirDict = {"DIR1" : {
"DIR11" : {
"DIR111" : "Maki111",
"DIR112" : "Maki112"
},
"DIR12" : "Maki12",
"DIR13" : {
"DIR131" : "Maki131"
}
}
}
Imagine this like a folder structure. And I would like to get similar as os.walk would do with a folder structure. Something like this:
["DIR1/DIR11/DIR111/Maki111",
"DIR1/DIR11/DIR112/Maki112",
"DIR1/DIR12/Maki12",
"DIR1/DIR13/DIR131/Maki131"]
So it is basically all the path for the dictionary values. I tried it many ways with recursive functions but I got lost.
Here is my latest trial:
def walk(input_dict, path_string = "", result = ""):
for key, value in input_dict.items():
if isinstance(value, dict):
path_string += "/" + key
print "==== DICT ====", "\nkey: ", key, "\nvalue: ", value, "\n\t\tpath_string: ", path_string
result = walk(value, path_string)
print "\t\t\t\tresulting: ", result
elif isinstance(value, str):
print "==== NOT DICT ===="
path_string += "/" + value
print "\t\tpath_string: ", path_string, "\nvalue: ", value
return path_string
else:
path_string = "/" + key
result += "\n" + result
return result
Using Python 3:
dirDict = {"DIR1" : {
"DIR11" : {
"DIR111" : "Maki111",
"DIR112" : "Maki112"
},
"DIR12" : "Maki12",
"DIR13" : {
"DIR131" : "Maki131"
}
}
}
def recurse(d, prefix=None, sep='/'):
if prefix is None:
prefix = []
for key, value in d.items():
if isinstance(value, dict):
yield from recurse(value, prefix + [key])
else:
yield sep.join(prefix + [key, value])
print(list(recurse(dirDict)))
Output:
['DIR1/DIR13/DIR131/Maki131', 'DIR1/DIR11/DIR111/Maki111', 'DIR1/DIR11/DIR112/Maki112', 'DIR1/DIR12/Maki12']
def walk(d, path):
paths = []
if len(d) == 0:
return path
for k, v in d.iteritems():
child_path = path + k + '/'
if isinstance(v, basestring):
paths.append(child_path + v)
else:
paths.extend(walk(v, child_path))
return paths
THe walk function I posted at https://gist.github.com/nvie/f304caf3b4f1ca4c3884#gistcomment-1597937 can be used as a helper for your problem:
def walk(obj, parent_first=True):
# Top down?
if parent_first:
yield (), obj
# For nested objects, the key is the path component.
if isinstance(obj, dict):
children = obj.items()
# For nested lists, the position is the path component.
elif isinstance(obj, (list, tuple)):
children = enumerate(obj)
# Scalar values have no children.
else:
children = []
# Recurse into children
for key, value in children:
for child_path, child in walk(value, parent_first):
yield (key,) + child_path, child
# Bottom up?
if not parent_first:
yield (), obj
Your problem can be approached using something like this:
for path, value in walk(obj):
if isinstance(value, str): # leaf node
path_with_value = path + (value,)
print("/".join(path_with_value))
A compact solution with a list comprehension:
def f(v):
if isinstance(v, dict):
return dict_to_list(v)
elif isinstance(v, list):
return v
else:
return [v]
def dict_to_list(d):
return ['{}/{}'.format(k, i) for k, v in d.items() for i in f(v)]
lst = dict_to_list(dirDict)
lst.sort()
print('\n'.join(lst))

Removing quotes from keys of dicts when rendering to template

is it possible to remove the quotes on keys during rendering a dict with render_to_string function so that I get key:value and not 'key':value in template?
For example if this is my dict:
d = {'a':1, 'b':2}
and I render it like this,
return render_to_string('somefile.json', {'d':d})
Then in somefile.json I will get {{d}} as {'a':1, 'b':2}, but I want {{d}} to be {a:1, b:2}. (without quotes on a and b)
How do I achieve this?
TIA
One approach you could use is overriding the __repr__ method of dict class or subclassing it and changing the method there. I have a the latter solution below.
class MyDict(dict):
def __repr__(self):
s = "{"
for key in self:
s += "{0}:{1}, ".format(key, self[key])
if len(s) > 1:
s = s[0: -2]
s += "}"
return s
MyDict({'a': 1, 'b': 2})
{a:1, b:2}
I found Stepan's answer to be accurate and useful. It was useful in my case to also apply the rendering recursively, and to keep quotes on string elements. This extended version may also be useful to others:
class DictWithoutQuotedKeys(dict):
def __repr__(self):
s = "{"
for key in self:
s += "{0}:".format(key)
if isinstance(self[key], basestring):
# String values still get quoted
s += "\"{0}\", ".format(self[key])
elif isinstance(self[key], dict):
# Apply formatting recursively
s += "{0}, ".format(DictWithoutQuotedKeys(self[key]))
else:
s += "{0}, ".format(self[key])
if len(s) > 1:
s = s[0: -2]
s += "}"
return s
Tweaked the code to deal with nested list and dictinory.
Code :
class DictWithoutQuotedKeys(dict):
def __repr__(self):
# print(self)
s = "{"
for key in self:
s += "{0}:".format(key)
if isinstance(self[key], dict):
# Apply formatting recursively
s += "{0}, ".format(DictWithoutQuotedKeys(self[key]))
elif isinstance(self[key], list):
s +="["
for l in self[key]:
if isinstance(l, dict):
s += "{0}, ".format(DictWithoutQuotedKeys(l))
else:
#print(l)
if isinstance(l, int):
s += "{0}, ".format(l)
else:
s += "'{0}', ".format(l)
if len(s) > 1:
s = s[0: -2]
s += "], "
else:
if isinstance(self[key], int):
s += "{0}, ".format(self[key])
else:
s += "\'{0}\', ".format(self[key])
# Quote all the values
#s += "\'{0}\', ".format(self[key])
if len(s) > 1:
s = s[0: -2]
s += "}"
return s
Input :
data = {'a':["1", "3", 4], 'b':[{'check1':9, 'check2':"kkk"}], 'c': {'d':2 , 'e': 3}, 'f':'dd', 't':2}
Output :
{a:['1', '3', 4], b:[{check1:9, check2:'kkk'}], c:{d:2, e:3}, f:'dd', t:2}
I found John's solution useful, Tweaked his code a bit to suit my case. To Quote all the Values in dict and keys without Quotes in Dict
{
"variant": {
"id": 808950810,
"option1": "Not Pink",
"price": "99.00"
}
}
To
{variant:{id:'32036302848074', compare_at_price:'39.83'}}
class DictWithoutQuotedKeys(dict):
def __repr__(self):
s = "{"
for key in self:
s += "{0}:".format(key)
# if isinstance(self[key], str):
# # String values still get quoted
# s += "\"{0}\", ".format(self[key])
# if isinstance(self[key], int):
# # String values still get quoted
# s += "\'{0}\', ".format(self[key])
if isinstance(self[key], dict):
# Apply formatting recursively
s += "{0}, ".format(DictWithoutQuotedKeys(self[key]))
else:
# Quote all the values
s += "\'{0}\', ".format(self[key])
if len(s) > 1:
s = s[0: -2]
s += "}"
return s

Python compiler for simple language to java vm code algorithm

I have a simple language that I am trying to write a compiler for (yes it is homework) to compile a simple language I shall describe if necessary to java vm code.
It currently works pretty well I've just hit a bump with logical AND's and OR's.
Each work fine in a single if/while condition, but if I try and chain them things go wrong, correct me if I am wrong but I believe that AND has precedence, but I was wondering if there are logical ways of arranging them? I think is what I'm trying to ask, the java vm code output just has the compare and jump statements one after the other (which seems wrong). I realise it's quite abstract so maybe what I'm after is a pseudo code/algorithm for how to structure chained AND's and OR's.
EDIT: Currently just treats any combination of AND and OR as AND's. Comparing the factor/term/expression connection (compared to booleanfactor etc) I believe that AND has precedence? Just a thought.
Apologies if this is poorly understood :/
So i figure ill include relevant info just incase.
compiler
import re
import sys
# Restrictions:
# Integer constants must be short.
# Stack size must not exceed 1024.
# Integer is the only type.
# Logical operators cannot be nested.
class Scanner:
'''The interface comprises the methods lookahead and consume.
Other methods should not be called from outside of this class.'''
def __init__(self, input_file):
'''Reads the whole input_file to input_string.'''
# source code of the program to be compiled
self.input_string = input_file.read()
# index where the unprocessed part of input_string starts
self.current_char_index = 0
# a pair (most recently read token, matched substring of input_string)
self.current_token = self.get_token()
def skip_white_space(self):
'''Consumes all characters in input_string up to the next
non-white-space character.'''
if (self.current_char_index >= len(self.input_string) - 1):
# bad fix for it over-running the end of the file
return
while self.input_string[self.current_char_index].isspace():
self.current_char_index += 1
return
def get_token(self):
'''Returns the next token and the part of input_string it matched.
Returns None if there is no next token.
The characters up to the end of the token are consumed.'''
self.skip_white_space()
# find the longest prefix of input_string that matches a token
token, longest = None, ''
for (t, r) in Token.token_regexp:
match = re.match(r, self.input_string[self.current_char_index:])
if match and match.end() > len(longest):
token, longest = t, match.group()
# consume the token by moving the index to the end of the matched part
self.current_char_index += len(longest)
return (token, longest)
def lookahead(self):
'''Returns the next token without consuming it.
Returns None if there is no next token.'''
return self.current_token[0]
def consume(self, *tokens):
'''Returns the next token and consumes it, if it is in tokens.
Raises an exception otherwise.
If the token is a number or an identifier, its value is returned.'''
if self.current_token[0] not in tokens:
print('Token ' + self.current_token[0] + ' isn\'t in the tokens: ')
for token in tokens:
print(token)
raise Exception('Token is not in tokens this shouldn\'t happen much')
if self.current_token[0] == 'ID':
symbol_table.location(self.current_token[1])
value = self.current_token[1]
elif (self.current_token[0] == 'NUM'):
value = self.current_token[1]
else:
value = self.current_token[0]
self.current_token = self.get_token()
return value
class Token:
DO = 'DO';
ELSE = 'ELSE';
END = 'END';
IF = 'IF';
THEN = 'THEN';
WHILE = 'WHILE';
SEM = 'SEM';
BEC = 'BEC';
LESS = 'LESS';
EQ = 'EQ';
GRTR = 'GRTR';
LEQ = 'LEQ';
NEQ = 'NEQ';
GEQ = 'GEQ';
ADD = 'ADD';
SUB = 'SUB';
MUL = 'MUL';
DIV = 'DIV';
LPAR = 'LPAR';
RPAR = 'RPAR';
NUM = 'NUM';
ID = 'ID';
READ = 'READ';
WRITE = 'WRITE';
OR = 'OR';
AND = 'AND';
NOT = 'NOT';
# The following list gives the regular expression to match a token.
# The order in the list matters for mimicking Flex behaviour.
# Longer matches are preferred over shorter ones.
# For same-length matches, the first in the list is preferred.
token_regexp = [
(DO, 'do'),
(ELSE, 'else'),
(END, 'end'),
(IF, 'if'),
(THEN, 'then'),
(WHILE, 'while'),
(READ, 'read'),
(WRITE, 'write'),
(OR, 'or'),
(AND, 'and'),
(NOT, 'not'),
(SEM, ';'),
(BEC, ':='),
(LESS, '<'),
(EQ, '='),
(NEQ, '!='),
(GRTR, '>'),
(LEQ, '<='),
(GEQ, '>='),
(ADD, '[+]'), # + is special in regular expressions
(SUB, '-'),
(MUL, '[*]'),
(DIV, '/'),
(LPAR, '[(]'), # ( is special in regular expressions
(RPAR, '[)]'), # ) is special in regular expressions
(ID, '[a-z]+'),
(NUM, '[0-9]+'),
]
class Symbol_Table:
'''A symbol table maps identifiers to locations.'''
def __init__(self):
self.symbol_table = {}
def size(self):
'''Returns the number of entries in the symbol table.'''
return len(self.symbol_table)
def location(self, identifier):
'''Returns the location of an identifier. If the identifier is not in
the symbol table, it is entered with a new location. Locations are
numbered sequentially starting with 0.'''
if identifier in self.symbol_table:
return self.symbol_table[identifier]
index = len(self.symbol_table)
self.symbol_table[identifier] = index
return index
class Label:
def __init__(self):
self.current_label = 0
def next(self):
'''Returns a new, unique label.'''
self.current_label += 1
return 'l' + str(self.current_label)
def indent(s, level):
return ' '*level + s + '\n'
# Each of the following classes is a kind of node in the abstract syntax tree.
# indented(level) returns a string that shows the tree levels by indentation.
# code() returns a string with JVM bytecode implementing the tree fragment.
# true_code/false_code(label) jumps to label if the condition is/is not true.
# Execution of the generated code leaves the value of expressions on the stack.
class Program_AST:
def __init__(self, program):
self.program = program
def __repr__(self):
return repr(self.program)
def indented(self, level):
return self.program.indented(level)
def code(self):
program = self.program.code()
local = symbol_table.size()
java_scanner = symbol_table.location('Java Scanner')
return '.class public Program\n' + \
'.super java/lang/Object\n' + \
'.method public <init>()V\n' + \
'aload_0\n' + \
'invokenonvirtual java/lang/Object/<init>()V\n' + \
'return\n' + \
'.end method\n' + \
'.method public static main([Ljava/lang/String;)V\n' + \
'.limit locals ' + str(local) + '\n' + \
'.limit stack 1024\n' + \
'new java/util/Scanner\n' + \
'dup\n' + \
'getstatic java/lang/System.in Ljava/io/InputStream;\n' + \
'invokespecial java/util/Scanner.<init>(Ljava/io/InputStream;)V\n' + \
'astore ' + str(java_scanner) + '\n' + \
program + \
'return\n' + \
'.end method\n'
class Statements_AST:
def __init__(self, statements):
self.statements = statements
def __repr__(self):
result = repr(self.statements[0])
for st in self.statements[1:]:
result += '; ' + repr(st)
return result
def indented(self, level):
result = indent('Statement(s)', level)
for st in self.statements:
result += st.indented(level+1)
return result
def code(self):
result = ''
for st in self.statements:
result += st.code()
return result
class If_AST:
def __init__(self, boolean_expression, then):
self.boolean_expression = boolean_expression
self.then = then
def __repr__(self):
return 'if ' + repr(self.boolean_expression) + ' then ' + \
repr(self.then) + ' end'
def indented(self, level):
return indent('If-Then', level) + \
self.boolean_expression.indented(level+1) + \
self.then.indented(level+1)
def code(self):
l1 = label_generator.next()
return self.boolean_expression.code(l1) + \
self.then.code() + \
l1 + ':\n'
class If_Else_AST:
def __init__(self, boolean_expression, then, _else):
self.boolean_expression = boolean_expression;
self.then = then;
self._else = _else;
def __repr__(self):
return 'if ' + repr(self.boolean_expression) + ' then ' + \
repr(self.then) + ' else ' + \
repr(self._else) + ' end'
def indented(self, level):
return indent('If-Then-Else', level) + \
self.boolean_expression.indented(level+1) + \
self.then.indented(level+1) + \
indent('Else', level+1) + \
self._else.indented(level+1)
def code(self):
l1 = label_generator.next()
l2 = label_generator.next()
return self.boolean_expression.code(l1) + \
self.then.code() + \
'goto ' + l2 + '\n' + \
l1 + ':\n' + \
self._else.code() + \
l2 + ':\n'
class While_AST:
def __init__(self, boolean_term, body):
self.boolean_term = boolean_term
self.body = body
def __repr__(self):
return 'while ' + repr(self.boolean_term) + ' do ' + \
repr(self.body) + ' end'
def indented(self, level):
return indent('While-Do', level) + \
self.boolean_term.indented(level+1) + \
self.body.indented(level+2)
def code(self):
l1 = label_generator.next()
l2 = label_generator.next()
return l1 + ':\n' + \
self.boolean_term.code(l2) + \
self.body.code() + \
'goto ' + l1 + '\n' + \
l2 + ':\n'
class Assign_AST:
def __init__(self, identifier, expression):
self.identifier = identifier
self.expression = expression
def __repr__(self):
return repr(self.identifier) + ':=' + repr(self.expression)
def indented(self, level):
return indent('Assign', level) + \
self.identifier.indented(level+1) + \
self.expression.indented(level+1)
def code(self):
loc = symbol_table.location(self.identifier.identifier)
return self.expression.code() + \
'istore ' + str(loc) + '\n'
class Write_AST:
def __init__(self, expression):
self.expression = expression
def __repr__(self):
return 'write ' + repr(self.expression)
def indented(self, level):
return indent('Write', level) + self.expression.indented(level+1)
def code(self):
return 'getstatic java/lang/System/out Ljava/io/PrintStream;\n' + \
self.expression.code() + \
'invokestatic java/lang/String/valueOf(I)Ljava/lang/String;\n' + \
'invokevirtual java/io/PrintStream/println(Ljava/lang/String;)V\n'
class Read_AST:
def __init__(self, identifier):
self.identifier = identifier
def __repr__(self):
return 'read ' + repr(self.identifier)
def indented(self, level):
return indent('Read', level) + self.identifier.indented(level+1)
def code(self):
java_scanner = symbol_table.location('Java Scanner')
loc = symbol_table.location(self.identifier.identifier)
return 'aload ' + str(java_scanner) + '\n' + \
'invokevirtual java/util/Scanner.nextInt()I\n' + \
'istore ' + str(loc) + '\n'
class Comparison_AST:
def __init__(self, left, op, right):
self.left = left
self.op = op
self.right = right
def __repr__(self):
op = { Token.LESS:'<', Token.EQ:'=', Token.GRTR:'>',
Token.LEQ:'<=', Token.NEQ:'!=', Token.GEQ:'>=' }
return repr(self.left) + op[self.op] + repr(self.right)
def indented(self, level):
return indent(self.op, level) + \
self.left.indented(level+1) + \
self.right.indented(level+1)
def true_code(self, label):
op = { Token.LESS:'if_icmplt', Token.EQ:'if_icmpeq',
Token.GRTR:'if_icmpgt', Token.LEQ:'if_icmple',
Token.NEQ:'if_icmpne', Token.GEQ:'if_icmpge' }
return self.left.code() + \
self.right.code() + \
op[self.op] + ' ' + label + '\n'
def false_code(self, label):
# Negate each comparison because of jump to "false" label.
op = { Token.LESS:'if_icmpge', Token.EQ:'if_icmpne',
Token.GRTR:'if_icmple', Token.LEQ:'if_icmpgt',
Token.NEQ:'if_icmpeq', Token.GEQ:'if_icmplt' }
return self.left.code() + \
self.right.code() + \
op[self.op] + ' ' + label + '\n'
class Expression_AST:
def __init__(self, left, op, right):
self.left = left
self.op = op
self.right = right
def __repr__(self):
op = { Token.ADD:'+', Token.SUB:'-', Token.MUL:'*', Token.DIV:'/' }
return '(' + repr(self.left) + op[self.op] + repr(self.right) + ')'
def indented(self, level):
return indent(self.op, level) + \
self.left.indented(level+1) + \
self.right.indented(level+1)
def code(self):
op = { Token.ADD:'iadd', Token.SUB:'isub',
Token.MUL:'imul', Token.DIV:'idiv' }
return self.left.code() + \
self.right.code() + \
op[self.op] + '\n'
class Number_AST:
def __init__(self, number):
self.number = number
def __repr__(self):
return self.number
def indented(self, level):
return indent(self.number, level)
def code(self): # works only for short numbers
return 'sipush ' + self.number + '\n'
class Identifier_AST:
def __init__(self, identifier):
self.identifier = identifier
def __repr__(self):
return self.identifier
def indented(self, level):
return indent(self.identifier, level)
def code(self):
loc = symbol_table.location(self.identifier)
return 'iload ' + str(loc) + '\n'
class BooleanFactor_AST:
def __init__(self, condition, logic):
self.condition = condition
self.logic = logic
def __repr__(self):
if self.logic == False:
return 'NOT ' + repr(self.condition)
else:
return repr(self.condition)
def indented(self, level):
if self.logic == False:
return indent('NOT ', level) + self.condition.indented(level + 1)
else:
return self.condition.indented(level)
def false_code(self, label):
if self.logic == True:
return self.condition.false_code(label)
else:
return self.condition.true_code(label)
return
def true_code(self, label):
if self.logic == True:
return self.condition.true_code(label)
else:
return self.condition.false_code(label)
class BooleanTerm_AST:
def __init__(self, terms):
self.terms = terms
def __repr__(self):
result = repr(self.terms[0])
for term in self.terms[1:]:
result = result + ' AND ' + repr(term)
return result
def indented(self, level):
result = self.terms[0].indented(level)
for term in self.terms[1:]:
result = result + indent('AND', level)
result = result + term.indented(level)
return result
def code(self, label):
result = ''
for term in self.terms:
result = result + term.false_code(label)
return result
class BooleanExpression_AST:
def __init__(self, expressions):
self.expressions = expressions
def __repr__(self):
result = repr(self.expressions[0])
for expression in self.expressions[1:]:
result = result + ' OR ' + repr(expression)
return result
def indented(self, level):
result = self.expressions[0].indented(level)
indentation = 0
for expression in self.expressions[1:]:
indentation += 1
result = result + indent('OR', level + indentation)
result = result + expression.indented(level + indentation)
return result
def code(self, label):
result = ''
for expression in self.expressions:
result = result + expression.code(label)
return result
# The following methods comprise the recursive-descent parser.
def program():
sts = statements()
return Program_AST(sts)
def statements():
result = [statement()]
while scanner.lookahead() == Token.SEM:
scanner.consume(Token.SEM)
st = statement()
result.append(st)
return Statements_AST(result)
def statement():
if scanner.lookahead() == Token.IF:
return if_statement()
elif scanner.lookahead() == Token.WHILE:
return while_statement()
elif scanner.lookahead() == Token.ID:
return assignment()
elif scanner.lookahead() == Token.READ:
return read();
elif scanner.lookahead() == Token.WRITE:
return write();
else: # error
return scanner.consume(Token.IF, Token.WHILE, Token.ID)
def if_statement():
scanner.consume(Token.IF)
condition = boolean_expression()
scanner.consume(Token.THEN)
then = statements()
if scanner.lookahead() == Token.END:
scanner.consume(Token.END)
return If_AST(condition, then)
else:
scanner.consume(Token.ELSE)
_else = statements()
scanner.consume(Token.END)
return If_Else_AST(condition, then, _else)
def while_statement():
scanner.consume(Token.WHILE)
condition = boolean_expression()
scanner.consume(Token.DO)
body = statements()
scanner.consume(Token.END)
return While_AST(condition, body)
def assignment():
ident = identifier()
scanner.consume(Token.BEC)
expr = expression()
return Assign_AST(ident, expr)
def read():
scanner.consume(Token.READ)
variable = identifier()
return Read_AST(variable)
def write():
scanner.consume(Token.WRITE)
expr = expression()
return Write_AST(expr)
def comparison():
left = expression()
op = scanner.consume(Token.LESS, Token.EQ, Token.GRTR,
Token.LEQ, Token.NEQ, Token.GEQ)
right = expression()
return Comparison_AST(left, op, right)
def expression():
result = term()
while scanner.lookahead() in [Token.ADD, Token.SUB]:
op = scanner.consume(Token.ADD, Token.SUB)
tree = term()
result = Expression_AST(result, op, tree)
return result
def term():
result = factor()
while scanner.lookahead() in [Token.MUL, Token.DIV]:
op = scanner.consume(Token.MUL, Token.DIV)
tree = factor()
result = Expression_AST(result, op, tree)
return result
def factor():
if scanner.lookahead() == Token.LPAR:
scanner.consume(Token.LPAR)
result = expression()
scanner.consume(Token.RPAR)
return result
elif scanner.lookahead() == Token.NUM:
value = scanner.consume(Token.NUM)
return Number_AST(value)
elif scanner.lookahead() == Token.ID:
return identifier()
else: # error
return scanner.consume(Token.LPAR, Token.NUM, Token.ID)
def identifier():
value = scanner.consume(Token.ID)
return Identifier_AST(value)
def boolean_factor():
if scanner.lookahead() == Token.NOT:
scanner.consume(Token.NOT)
logic = False
else:
logic = True
result = comparison()
return BooleanFactor_AST(result, logic)
def boolean_term():
result = [boolean_factor()]
while scanner.lookahead() in [Token.AND]:
scanner.consume(scanner.lookahead())
temp = boolean_factor()
result.append(temp)
return BooleanTerm_AST(result)
def boolean_expression():
result = [boolean_term()]
while scanner.lookahead() in [Token.OR]:
scanner.consume(scanner.lookahead())
temp = boolean_term()
result.append(temp)
return BooleanExpression_AST(result)
# Initialise scanner, symbol table and label generator.
#scanner = Scanner(open('test.txt'))
scanner = Scanner(sys.stdin)
symbol_table = Symbol_Table()
symbol_table.location('Java Scanner') # fix a location for the Java Scanner
label_generator = Label()
# Uncomment the following to test the scanner without the parser.
# This shows a list of all tokens in the input.
#
#token = scanner.lookahead()
#while token != None:
# print(token)
# scanner.consume(token)
# token = scanner.lookahead()
#exit()
# Call the parser.
ast = program()
assert scanner.lookahead() == None
# Uncomment the following to test the parser without the code generator.
# The first line gives back the program by calling __repr__ of the AST classes.
# The second line shows the syntax tree with levels indicated by indentation.
#
#print(ast)
#print(ast.indented(0))
#exit()
# Call the code generator.
# This translates the abstract syntax tree to JVM bytecode.
# It can be assembled to a class file by Jasmin: http://jasmin.sourceforge.net/
print(ast.code())
testing bat file
python compiler.py <test.txt> Program.j
java -Xmx100m -jar jasmin.jar Program.j
java -Xmx100m Program < testInput.txt > test_output.txt
and language (BNF)
Program = Statements
Statements = Statement (; Statement)
Statement = If | While | Assignment
If = if Comparison then Statements end
While = while Comparison do Statements end
Assignment = identifier := Expression
Comparison = Expression Relation Expression
Relation = = | != | < | <= | > | >=
Expression = Term ((+ | -) Term)
Term = Factor ((* | /) Factor)
Factor = (Expression) | number | identifier
BooleanExpression = BooleanTerm (or BooleanTerm)*
BooleanTerm = BooleanFactor (and BooleanFactor)*
BooleanFactor = not BooleanFactor | Comparison
I think thats all that is relevant, cheers if you take a go at helping me on this
if you want a method to chain OR's and AND'syou can use this property:
p v q === ¬p ^ ¬q
Is equivalent, you can process all in the AND form. for example.
p v q ^ r v s === ¬p ^ ¬q ^ ¬r ^ ¬s
So evaluate the expression in AND form is simple with an algorithm.
I guess the expression doesn't have any parenthesis, in other way you need prioritize the grouping symbols (), [], {}.

Printing a tree with branches recursively

I'm trying to print a tree recursively in Python. For some reason, the indentation isn't working (perhaps I'm too tired at this point to see an obvious flaw). Here's the structure / class definitions that I'm working with:
class Tree(object):
def __init__(self, data):
self.data = data
self.branches = []
class Branch(object):
def __init__(self, value):
self.label = value
self.node = None
As you see, each tree has Branches, which have a label and point to another Tree (that's the node value you see there). Here's how I'm trying to print out the tree:
def __str__(self):
return self.tree_string(0)
def tree_string(self, indent):
indentation = indent * " "
result = indentation + str(self.data) + "\n";
for branch in self.branches:
result += indentation + branch.label + ": \n" + branch.node.tree_string(indent + 2)
return result
This is giving me:
4
Somewhat:
Yes
Fuller:
3
Correct:
8
Caribbean:
2
Wrong:
Wrong
Correct:
Correct
Italian:
Wrong
Burger:
Correct
Wrong:
Wrong
Nothing:
Wrong
When it should be giving me something like
4
Somewhat:
Correct
Fuller:
3
Correct:
8
Caribbean:
2
Wrong:
Wrong
Correct:
Correct
Italian:
Wrong
Burger:
Correct
Wrong:
Wrong
Nothing:
Wrong
What's causing my code to have those extra newlines and not have the proper indents?
Update
Pretty sure the data is ok. Here's a modified version that shows it's ok:
def tree_string(self, indent):
indentation = indent * " "
result = str(self.data);
if len(self.branches) > 0:
result += "["
for branch in self.branches:
result += branch.label + ":" + branch.node.tree_string(indent + 2) + " "
result += "]"
return result
..which gives the output
4[Somewhat:Correct Fuller:3[Correct:8[Caribbean:2[No:No Correct:Correct ] Italian:Wrong Burger:Correct ] Wrong:Wrong ] Nothing:Wrong ]
However, the indent values are for some reason always 0 or 2.
Looks like it should work to me:
class Tree(object):
def __init__(self, data):
self.data = data
self.branches = []
def __str__(self):
return self.tree_string(0)
def tree_string(self, indent):
indentation = indent * " "
result = indentation + str(self.data) + "\n";
for branch in self.branches:
result += indentation + branch.label + ": \n" + branch.node.tree_string(indent + 2)
return result
class Branch(object):
def __init__(self, value):
self.label = value
self.node = None
tree = Tree(4)
b1 = Branch('Somewhat')
b1.node = Tree('Yes')
b2 = Branch('Fuller')
b2.node = Tree(3)
tree.branches = [b1, b2]
b3 = Branch('Correct')
b3.node = Tree(8)
b2.node.branches = [b3]
print(tree)
yields
4
Somewhat:
Yes
Fuller:
3
Correct:
8

Categories