I'm trying to implement a Graph/Node data structure to store some connections between synsets in WordNet.
import nltk
from nltk.corpus import wordnet as wn
# a dict of [wn.synset, Node]
syns_node_dict = []
# gets a node from a wn.synset
def nodeFromSyn(syn):
for row in syns_node_dict:
if row[0] == syn:
return row[1]
return False
def display_dict():
for row in syns_node_dict:
curr_nde = row[1]
display([curr_nde._value, curr_nde._parents, curr_nde._children])
# Graph Node data struct
class Node:
# value is a wn.synset
# parents, children are both lists of wn.synset
def __init__(self, value, parents=[], children=[]):
curr = nodeFromSyn(value)
# if a Node for value already exists, merge attributes
if curr:
for p in parents:
if p not in curr._parents : curr._parents.append(p)
for c in children:
if c not in curr._children : curr._children.append(c)
self = curr
# if a Node for value does not exist, create new Node for value
else:
syns_node_dict.append([value, self])
self._value = value
self._parents = parents
self._children = children
# Create a Node for each of self's parents if it does not already exist
# and add self as a child
for parent in self._parents:
parent_node = nodeFromSyn(parent)
if parent_node:
if value not in parent_node._children:
parent_node._children.append(value)
else:
parent_node = Node(parent, children=[value])
# Create a Node for each of self's children if it does not already exist
# and add self as a parent
for child in self._children:
child_node = nodeFromSyn(child)
if child_node:
if value not in child_node._parents:
child_node._parents.append(value)
else:
child_node = Node(child, parents=[value])
However, I'm seeing some strange behavior when I attempt to run my code:
Node(wn.synset('condition.n.01'), [], children=[wn.synset('difficulty.n.03')])
display_dict()
>[Synset('condition.n.01'), [], [Synset('difficulty.n.03')]]
>[Synset('difficulty.n.03'), [Synset('condition.n.01')], []]
Node(wn.synset('state.n.02'), children=[wn.synset('condition.n.01')])
display_dict()
>[Synset('condition.n.01'), [Synset('state.n.02')], [Synset('difficulty.n.03')]]
>[Synset('difficulty.n.03'), [Synset('condition.n.01')], []]
>[Synset('state.n.02'), [], [Synset('condition.n.01')]]
When I run the following lines of code I expect the last line to be
[Synset('attribute.n.02'), [], [Synset('state.n.02')]].
Why does the Node for wn.synset('attribute.n.02') contain itself among its parents?
Node(wn.synset('attribute.n.02'), children=[wn.synset('state.n.02')])
display_dict()
>[Synset('condition.n.01'), [Synset('state.n.02')], [Synset('difficulty.n.03')]]
>[Synset('difficulty.n.03'), [Synset('condition.n.01')], []]
>[Synset('state.n.02'), [Synset('attribute.n.02')], [Synset('condition.n.01')]]
>[Synset('attribute.n.02'), [Synset('attribute.n.02')], [Synset('state.n.02')]]
I isolated the issue to be within the block below but I can't figure out what's causing this behavior.
# Create a Node for each of self's children if it does not already exist
# and add self as a parent
for child in self._children:
child_node = nodeFromSyn(child)
if child_node:
if value not in child_node._parents:
child_node._parents.append(value)
else:
child_node = Node(child, parents=[value])
The reason this is happening is because default parameters are evaluated once when the function is defined, not each time it is called. Because of this, all the nodes you create with the parents (or children) parameter blank are actually sharing the same list for their parent list. For the first two nodes this isn't a problem because you specify an empty list for the first node, and the first node creates a list for the second, but all the nodes after that are sharing the same list (which becomes more obvious if you keep creating more nodes).
A way to solve this problem outlined here is to make the default parameters None, then check if the parameter is none and create the empty list if so:
# modify the init function so that the default parameters are None
def __init__(self, value, parents=None, children=None):
# if they are left as None, set them to an empty list
if parents is None:
parents = []
if children is None:
children = []
# The rest of your code here...
Related
I have a dictionary which represents graph. Key is Node class and values are set of Nodes. So it has this structure:
dict[Node] = {Node, Node, Node}
class Node:
def __init__(self, row, column, region):
self.id = f'{str(row)}-{str(column)}'
self.region = region
self.visited = False
In code below I need to update visited property of Node class.
while nodes_queue:
current_node = nodes_queue.pop()
for edge in self.map[current_node]:
if edge.region == root.region and not edge.visited:
edge.visited = True # Not updated!
nodes_queue.append(edge)
But it looks like I get view of Node objects instead of actual objects. When I update visited property in for loop and get it from next iteration, the property is still set to False
I've figured it out. I was storing different Node objects as key and what was in values set in my dictionary.
I created context of all Nodes and get Node from there by its id.
def get_node_from_context(self, row, column, region):
node = Node(row, column, region)
if node not in self.__graph_context__:
self.__graph_context__[node] = node
else:
node = self.__graph_context__[node]
return node
Suppose i got the following piece of python code to create a forest containing a bunch of trees.
NEXT_INDEX = 0
class Node:
""" A node of a tree """
def __init__(self):
# Each node gets a unique id
self._index = NEXT_INDEX
NEXT_INDEX += 1
# any node may have an arbitrary number of children
self._children = list()
self._parent = None
def add_child(self, node):
node._parent = self
self._children.append(node)
def __str__(self):
return "node {}".format(self._index)
class Forest:
""" A bunch of trees """
def __init__(self):
# contains the root nodes of a whole bunch of trees
self._trees = list()
def add_node(self, node):
# the new node will be the root node for a new tree in self._trees
self._trees.append(node)
def find_node(self, idx):
"""
Search all trees in self._trees for a node with index = idx
and return that node.
"""
# Implementation not relevant here
pass
def on_add_child(child):
# should be executed each time add_child is called on a node with the
# new child as a parameter
print("on_add_child with child = {}".format(child))
I would like to execute a method, "on_add_child", each time a child is added to any node in any of the trees stored in Forest._trees.
Important: The print statement has to be in the Forest class. In the real code Forest maintains a search index of nodes and whenever a new child node is added, the new node has to be added to the search index. Adding a reference to Forest to Node (so that Node.add_child could call Forest.on_add_child) is unfortunately not an option either, because it would introduce a circular dependency between Node and Forest.
Example: Say i executed the following code
forest = Forest()
node_0 = Node()
node_1 = Node()
node_2 = Node()
node_3 = Node()
node_4 = Node()
# We add the first node to the forest: It will become the root of the first tree
forest.add_node(node_0)
# Add node_1 as a child to node_0; This should execute on_add_child(node_1) and
# print "on_add_child with child = node 1"
forest.find_node(0).add_child(node_1)
# Should print "on_add_child with child = node 2"
# => on_add_child is also triggered when we add a child to a non-root node
forest.find_node(1).add_child(node_2)
# Create a second tree
forest.add_node(node_3)
# Should print "on_add_child with child = node 4"
forest.find_node(3).add_child(node_4)
How can this be accomplished? I am aware of python properties and i have found several related questions about how to use properties together with python lists (eg. Python property on a list, Python decorating property setter with list, python: how to have a property and with a setter function that detects all changes that happen to the value), but in my case it is not just a list, but also a tree structure and i couldn't get this combination to work.
If you need action in the forest class to be initiated you can call this in add_child as well. So if vertices need to be updated just update them every time add_child is called. You will need to also keep track of which forest the node is in by passing that into the default constructor.
def add_child(self, node):
node._parent = self
self._children.append(node)
self._forest.on_add_child(node)
I am trying to implement some basic recursive structure in Python, but without great success. I have a tree represented in form of nested lists like this:
ex = ['A',
['A1',
['A11', 'tag'],
['A12', 'tag'],
['A13',
['A131', 'tag'],
['A132',
['A1321', 'tag'],
['A1322', 'tag']]]],
['A2', 'tag'],
['A3',
['A31',
['A311', 'tag'],
['A312', 'tag']],
['A32', 'tag'],
['A33',
['A331',
['A3311', 'tag'],
['A3312', 'tag']]],
['A34', 'tag'],
['A35',
['A351', 'tag'],
['A352',
['A3521', 'tag'],
['A3522', 'tag']]]],
['A4', 'tag']]
and I have defined a Node class that allows specifying a tag 'A', 'A1', ... and adding children. Terminal nodes can be retrieved by noticing that children is not a list.
class Node(object):
def __init__(self, tag, parent=None, children=[]):
self.tag = tag
self.parent = parent
self.children = children
self.is_terminal = False if isinstance(children, list) else True
def add_child(self, node):
if not isinstance(node, Node):
raise ValueError("Cannot append node of type: [%s]" % type(node))
if self.is_terminal:
raise ValueError("Cannot append node to terminal")
else:
self.children.append(node)
Now I am having trouble implementing a function that would recursively transform the list-based tree into a Node-based one:
tree = Node(tag='A',
children=[Node(tag='A1',
children=[Node(tag='A11',
children='tag'),
Node(tag='A12',
children='tag'),
...]),
...])
This is my attempt so far based on the idea that at each position in the nested list, we might have either a terminal node, in which case we just add it to the root, or a non-terminal, in which case we extract the respective root tag and iterate over children recursively. When list is empty we return control to caller.
My feeling is that the coding style is perhaps not the best suitable for Python, but I would like to know what I am missing more concretely.
def is_terminal(e):
return len(e) == 2 and type(e[0]) == str and type(e[1]) == str
def from_list(lst, root):
lst = list(lst) # avoid mutating input list
if not lst:
return
for e in lst:
if is_terminal(e):
tag, children = e
print "terminal", tag, "with root", root.tag
root.add_child(Node(tag=tag, children=children, parent=root))
else:
e = list(e)
tag, children = e.pop(0), e
print "non terminal", tag, "with root", root.tag
root = Node(tag=tag, parent=root)
from_list(children, root=root)
It has a number of problems. For instance, it looses track of the highest root 'A' -i.e. A2 gets A1 as root. It also flattens out the tree to a Node with 16 children, one per terminal node, and goes into infinite recursion.
I'd appreciate any type of hints.
I finally found out the problem, which turned out to be partially a missing point in the algorithm and partially a misunderstanding in the way Python lists work.
The algorithm was loosing track of the highest root, because I wasn't adding the sub root in the else statement. This new version solves the issue.
else:
e = list(e)
tag, children = e.pop(0), e
print "non terminal", tag, "with root", root.tag
subroot = Node(tag=tag, parent=root)
root.add_child(subroot) #
from_list(children, root=subroot)
The problem with the flattening was actually the fact that I was using [] as default argument in the Node class definition. As explained here, the default empty list is being created only the first function call (or class instantiation in this case) and not every time the function is called.
Therefore, the children list of the root was getting added all sub children (and hence the flattening effect), and the children list of all sub children was getting modified every time the root children list was modified - hence the infinite recursion.
It turns out to be more of a Python-gotcha, rather than an algorithm definition problem.
For the record, the complete corrected version of the code:
class Node(object):
def __init__(self, tag, parent=None, children=None):
self.tag = tag
self.parent = parent
self.children = [] if not children else children
self.is_terminal = False if isinstance(self.children, list) else True
def add_child(self, node):
if not isinstance(node, Node):
raise ValueError("Cannot append node of type: [%s]" % type(node))
if self.is_terminal:
raise ValueError("Cannot append node to terminal")
else:
self.children.append(node)
def is_terminal(e):
return len(e) == 2 and type(e[0]) == str and type(e[1]) == str
def from_list(lst, root):
lst = list(lst)
if not lst:
return
for e in lst:
if is_terminal(e):
tag, children = e
print "terminal", tag, "with root", root.tag
root.add_child(Node(tag=tag, children=children, parent=root))
else:
e = list(e)
tag, children = e.pop(0), e
print "non terminal", tag, "with root", root.tag
newroot = Node(tag=tag, parent=root)
root.add_child(newroot)
from_list(children, root=newroot)
And here is how you call it:
root = Node(tag=ex[0])
from_list(ex[1:], root=root)
I've been trying to create a simple linked list implementation in Python as a code exercise and, although I have most of the stuff working (inserting, removing, pretty print, swapping the content of two nodes), I've been stuck on swapping two nodes for a few days.
I've looked around on the internet and most people seem to recommend deleting/inserting the nodes or swapping the data. Both are very fine and functional options but I wanted to challenge myself and see if I could swap the nodes the "correct" way.
Ideally I would like to have a generic function that can handle all edge cases (moving to the begin, end and swapping random nodes). This has proven to be way more challenging than I expected.
I've experimented a bit with pen and paper and search around and I found the following discussion and example implementation:
Discussion about swapping in C
Example implementation in C
The issue I run into is that my node1.next and my node2.prev are swapped and that my node2.next refers to itself and not to the next node in the list.
The comment on the page of the example implementation specifically addresses this problem and mentions that it should not happen with his implementation.
I can't seem to figure out what I've done wrong. I guess I could "cheat" and force them to take the correct values at the end but that gives a lot of problems when the node is the first/last.
__author__ = 'laurens'
from django.core.exceptions import ObjectDoesNotExist
import logging
import copy
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)
class DoublyLinkedList(object):
"""This class implements a basic doubly linked list in Django, it depends
on a Django model with the following field:
id : PK
prev: integer previous node
data_field: Foreign key
next: integer next node
the prev and next fields don't need to be self-referencing
When instantiating the class you have to link this class to a Django model
and specify a data field. The data field can link to a foreign key
or contain data
"""
def __init__(self, doubly_linked_list_model, data_field):
self.doubly_linked_list_model = doubly_linked_list_model
self.data_field = data_field
def get_node_from_node_id(self, node_id=None):
"""This function returns the node associated with a certain node_id"""
if node_id is None:
node = None
else:
try:
node = self.doubly_linked_list_model.get(id=node_id)
except ObjectDoesNotExist:
node = None
return node
#staticmethod
def _update_node(node, prev=None, next=None):
node.prev = prev
node.next = next
logger.debug('updating node: %s', node.id)
logger.debug('node.prev = %s', node.prev)
logger.debug('node.next = %s', node.next)
try:
node.save()
except Exception as e: #Todo: specify this
logger.debug('Error saving node: %s', node.id)
def move_node(self, node1=None, node2=None):
"""
This function swaps the position of node1 with the position of node2
"""
#swapping two nodes!
logger.debug('Swapping two random nodes!: %s, %s', node1.id, node2.id)
# Swapping next nodes
logger.debug('Swapping next node')
tmp = copy.deepcopy(node1.next)
self._update_node(node=node1,
prev=node1.prev,
next=node2.next)
#Todo: Check if tmp changes or is stored as a copy
self._update_node(node=node2,
prev=node2.prev,
next=tmp)
if node1.next is not None:
logger.debug('Connect the next node to node 1')
node_next = self.get_node_from_node_id(node1.next)
self._update_node(node=node_next,
prev=node1.id,
next=node_next.next)
if node2.next is not None:
logger.debug('Connect the next node to node 2')
node_next = self.get_node_from_node_id(node2.next)
self._update_node(node=node_next,
prev=node2.id,
next=node_next.next)
logger.debug('Swap prev nodes')
tmp = copy.deepcopy(node1.prev)
self._update_node(node=node1,
prev=node2.prev,
next=node1.next)
self._update_node(node=node2,
prev=tmp,
next=node2.next)
# Connect the node before node1 to node 1
if node1.prev is not None:
logger.debug('Connect the prev to node 1')
node_prev = self.get_node_from_node_id(node1.prev)
self._update_node(node=node_prev,
prev=node_prev.prev,
next=node1.id)
# Connect the node before node2 to node 2
if node2.prev is not None:
logger.debug('Connect the prev to node 2')
node_prev = self.get_node_from_node_id(node2.prev)
self._update_node(node=node_prev,
prev=node_prev.prev,
next=node2.id)
The _update_node function does nothing more than taking my input and committing it to the database; it can handle None values.
get_node_from_node_id takes an integer as input and returns the node object associated with it. I use it so that I don't have to work with self-referencing foreign keys (is that the correct term?) in the database, for now I would like to continue working this way. Once I have this working I'll move on to fixing it in the database in the correct way.
tip: I get answers much more quickly when I provide a minimal, complete, verifiable example (MCVE), also known as a short, self-contained, compilable example (SSCCE).
Your example code fails to demonstrate the problem, making it impossible for us to help you.
Please make it easy for us to help you.
I ran your example code, but I didn't see any output.
The issue I run into is that ... that my node2.next refers to itself and
not to the next node in the list.
Why is node2.next referring to node2 a problem?
As far as I can tell, the part of the code you gave us so far works fine.
Some of the most difficult debugging sessions I've ever had ended only when I realized that everything was actually working correctly, that the "bug" I thought I was hunting didn't actually exist.
Yes, there is an intermediate step halfway through the algorithm where a node refers to itself, which may seem obviously wrong.
But then the second half of the algorithm makes further changes.
By the time the algorithm finishes,
the "next" chain correctly runs all the way from the beginning to the end, and
the "forward" chain correctly runs in the reverse order as the "next" chain, from the end to the beginning, and
the order of those chains is similar to the original order, except that node1 and node2 have swapped logical position (but are still in the same physical position).
Every node points to 2 other nodes (or to the NULL node), never to itself.
Isn't that what you wanted?
When I ran the following test script, I get lots of output --
but I don't see any problems in the output.
(I used a simple array with integer indexes rather than true pointers or references to make the code shorter and easier to debug compared to a SQL database).
Would you mind pointing out which particular line of output is not what you expected, and spelling out what you expected that line of output to actually say?
#!/usr/bin/env python
# https://stackoverflow.com/questions/24610889/trying-to-write-a-swap-position-function-for-a-doubly-linked-list
# Is this the shortest possible program that exhibits the bug?
# Before running this probram, you may need to install
# sudo apt-get install python-django
# 2015-03-12: David added some test scaffolding
# 2014-07-07: Laurens posted to StackOverflow
__author__ = 'laurens'
from django.core.exceptions import ObjectDoesNotExist
import logging
import copy
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)
class DoublyLinkedList(object):
"""This class implements a basic doubly linked list in Django, it depends
on a Django model with the following field:
id : PK
prev: integer previous node
data_field: Foreign key
next: integer next node
the prev and next fields don't need to be self-referencing
When instantiating the class you have to link this class to a Django model
and specify a data field. The data field can link to a foreign key
or contain data
"""
def __init__(self, doubly_linked_list_model, data_field):
self.doubly_linked_list_model = doubly_linked_list_model
self.data_field = data_field
def get_node_from_node_id(self, node_id=None):
"""This function returns the node associated with a certain node_id"""
if node_id is None:
node = None
else:
try:
node = self.doubly_linked_list_model.get(id=node_id)
except ObjectDoesNotExist:
node = None
return node
#staticmethod
def _update_node(node, prev=None, next=None):
node.prev = prev
node.next = next
logger.debug('updating node: %s', node.id)
logger.debug('node.prev = %s', node.prev)
logger.debug('node.next = %s', node.next)
try:
node.save()
except Exception as e: #Todo: specify this
logger.debug('Error saving node: %s', node.id)
def move_node(self, node1=None, node2=None):
"""
This function swaps the position of node1 with the position of node2
"""
#swapping two nodes!
logger.debug('Swapping two random nodes!: %s, %s', node1.id, node2.id)
# Swapping next nodes
logger.debug('Swapping next node')
tmp = copy.deepcopy(node1.next)
self._update_node(node=node1,
prev=node1.prev,
next=node2.next)
#Todo: Check if tmp changes or is stored as a copy
self._update_node(node=node2,
prev=node2.prev,
next=tmp)
if node1.next is not None:
logger.debug('Connect the next node to node 1')
node_next = self.get_node_from_node_id(node1.next)
self._update_node(node=node_next,
prev=node1.id,
next=node_next.next)
if node2.next is not None:
logger.debug('Connect the next node to node 2')
node_next = self.get_node_from_node_id(node2.next)
self._update_node(node=node_next,
prev=node2.id,
next=node_next.next)
logger.debug('Swap prev nodes')
tmp = copy.deepcopy(node1.prev)
self._update_node(node=node1,
prev=node2.prev,
next=node1.next)
self._update_node(node=node2,
prev=tmp,
next=node2.next)
# Connect the node before node1 to node 1
if node1.prev is not None:
logger.debug('Connect the prev to node 1')
node_prev = self.get_node_from_node_id(node1.prev)
self._update_node(node=node_prev,
prev=node_prev.prev,
next=node1.id)
# Connect the node before node2 to node 2
if node2.prev is not None:
logger.debug('Connect the prev to node 2')
node_prev = self.get_node_from_node_id(node2.prev)
self._update_node(node=node_prev,
prev=node_prev.prev,
next=node2.id)
global_test_array = []
obfuscation = 0xaa
class trivial_test_node_class(object):
def __init__(self, prev, id, next, data):
print "initializing test class."
# self.stuff = [ ["first", 0, 1], ["second", 0, 1] ]
self.id = id
self.prev = prev
self.next = next
self.data = data
def something(self):
print "something"
def save(self):
id = self.id
global_test_array[id] = self
def __repr__(self):
# print self.prev, self.id, self.next, self.data
the_string = "%s(%r)\n" % (self.__class__, self.__dict__)
return the_string
class trivial_test_list_model_class(object):
def __init__(self):
print "initializing test class."
#self.stuff = [ ["first", 0, 1], ["second", 0, 1] ]
self.stuff = [ 0 for i in xrange(0,10) ]
data = 'a'
for i in xrange(1,10):
self.stuff[i] = trivial_test_node_class(i-1,i,i+1,data);
data += 'r' # talk like a pirate day
self.stuff[-1].next = 0 # use 0 as NULL id.
global global_test_array
global_test_array = self.stuff
def something(self):
print "something"
def get(self,id):
return self.stuff[id]
def __repr__(self):
# for i in xrange(1,10):
# print self.stuff[i]
the_string = "%s(%r)" % (self.__class__, self.__dict__)
return the_string
if __name__ == '__main__':
# test code that only gets run when this file is run directly,
# not when this file is imported from some other python script.
print "Hello, world."
trivial_test_model = trivial_test_list_model_class()
print trivial_test_model
testll = DoublyLinkedList( trivial_test_model, "data" )
left_node = trivial_test_model.get(3)
right_node = trivial_test_model.get(4)
testll.move_node( left_node, right_node )
print trivial_test_model
# recommended by http://wiki.python.org/moin/vim
# vim: tabstop=8 expandtab shiftwidth=4 softtabstop=4 :
I have an algorithm to populate a tree like structure (class: Scan_instance_tree), but unfortunately, during each call, it is incorrectly adding to the root node's children, as well as to the new child nodes created further down in the tree.
As a clue, I saw another thread...
Persistent objects in recursive python functions
...where this problem was mentioned briefly, and it was suggested that the parameters passed had to be mutable. Is that the answer, and how would I do this, in this example???
Here is my current code:
class Field_node(object):
field_phenotype_id = -1
field_name = ''
field_parent_id = -1
child_nodes = []
class Scan_instance_tree(object):
root_node = None
def __init__(self, a_db):
self.root_node = Field_node()
scan_field_values = self.create_scan_field_values(a_db) # This just creates a temporary user-friendly version of a database table
self.build_tree(scan_field_values)
def build_tree(self, a_scan_field_values):
self.root_node.field_name = 'ROOT'
self.add_child_nodes(a_scan_field_values, self.root_node)
def add_child_nodes(self, a_scan_field_values, a_parent_node):
i = 0
while i < len(a_scan_field_values):
if a_scan_field_values[i]['field_parent_dependancy'] == a_parent_node.field_phenotype_id:
#highest_level_children.append(a_scan_field_values.pop(a_scan_field_values.index(scan_field)))
child_node = Field_node()
child_node.field_phenotype_id = a_scan_field_values[i]['field_phenotype_id']
child_node.field_name = a_scan_field_values[i]['field_name']
child_node.field_parent_dependancy = a_scan_field_values[i]['field_parent_dependancy']
a_parent_node.child_nodes.append(child_node)
a_scan_field_values.remove(a_scan_field_values[i])
# RECURSION: get the child nodes
self.add_child_nodes(a_scan_field_values, child_node)
else:
i = i+1
If I remove the recursive call to self.add_child_nodes(...), the root's children are added correctly, ie they only consist of those nodes where the field_parent_dependancy = -1
If I allow the recursive call, the root's children contain all the nodes, regardless of the field_parent_dependancy value.
Best regards
Ann
When you define your Field_node class, the line
child_nodes = []
is actually instantiating a single list as a class attribute, rather than an instance attribute, that will be shared by all instances of the class.
What you should do instead is create instance attributes in __init__, e.g.:
class Field_node(object):
def __init__(self):
self.field_phenotype_id = -1
self.field_name = ''
self.field_parent_id = -1
self.child_nodes = []