I have a neo4j database populated with thousands of nodes without any relationship defined. I have a file which contains relationships between nodes, so I would like to create relationships between these nodes created in the database. My current approach is:
from py2neo import NodeSelector,Graph,Node,Relationship
graph = Graph('http://127.0.0.1:7474/db/data')
tx = graph.begin()
selector = NodeSelector(graph)
with open("file","r") as relations:
for line in relations:
line_split=line.split(";")
node1 = selector.select("Node",unique_name=line_split[0]).first()
node2 = selector.select("Node",unique_name=line_split[1]).first()
rs = Relationship(node1,"Relates to",node2)
tx.create(rs)
tx.commit()
The current approach needs 2 queries to database in order to obtain nodes to form a relationship + relationship creation. Is there a more efficient way given that nodes currently exist in the database?
You can use some form of node caching while populating relations:
from py2neo import NodeSelector,Graph,Node,Relationship
graph = Graph('http://127.0.0.1:7474/db/data')
tx = graph.begin()
selector = NodeSelector(graph)
node_cache = {}
with open("file","r") as relations:
for line in relations:
line_split=line.split(";")
# Check if we have this node in the cache
if line_split[0] in node_cache:
node1 = node_cache[line_split[0]]
else:
# Query and store for later
node1 = selector.select("Node",unique_name=line_split[0]).first()
node_cache[line_split[0]] = node1
if line_split[1] in node_cache:
node2 = node_cache[line_split[1]]
else:
node2 = selector.select("Node",unique_name=line_split[1]).first()
node_cache[line_split[1]] = node2
rs = Relationship(node1,"Relates to",node2)
tx.create(rs)
tx.commit()
With the above you will only load each node once and only if that node appears in your input file.
Related
When i created two node and their relation using transaction as shown below, i am able to use graph.match() to check if relation exist.
from py2neo import Graph, Node, Relationship, NodeSelector
g = Graph('http://localhost:7474/db/data', user='uname', password='pass')
tx = g.begin()
a = Node("Person", name="Alice")
tx.create(a)
b = Node("Person", name="Bob")
tx.create(b)
ab = Relationship(a, "KNOWS", b)
tx.create(ab)
tx.commit()
relations = g.match(start_node=a, rel_type="KNOWS", end_node=b)
list(relations) // this returns [(alice)-[:KNOWS]->(bob)]
Later i tried to pass start_node and end_node to graph.match() function as shown below, but it didn't work instead returned ERROR : Nodes for relationship match end points must be bound
d = Node("Person", name="Alice")
e = Node("Person", name="Bob")
relations = g.match(start_node=d, rel_type="KNOWS", end_node=e)
list(relations) // this returns error " Nodes for relationship match end points must be bound "
What is wrong with the above code to fetch an exixting relationship between Alice and Bob
The problem in following code
d = Node("Person", name="Alice")
e = Node("Person", name="Bob")
relations = g.match(start_node=d, rel_type="KNOWS", end_node=e)
list(relations) // this returns error " Nodes for relationship match end points must be bound "
is that Node is used to create new node as #InverseFalcon mentioned in the comment. it did not points to actual node in server. it worked to check relation at the time of creation ( First set of code in my question ) of nodes is because after creating the nodes the Node class return node object.
Following code worked for my project
d = g.run("MATCH (a:Person) WHERE a.name={b} RETURN a", b="Alice")
list_d = list(d)
start_node = list_d[0]['a']
e = g.run("MATCH (a:Person) WHERE a.name={b} RETURN a", b="Bob")
list_e = list(e)
end_node = list_e[0]['a']
relations = g.match(start_node=start_node, rel_type="KNOWS", end_node=end_node)
I have the following code:
import py2neo
from py2neo import Graph, Node, Relationship
def createRelationshipWithProperties():
print("Start - Creating Relationships")
# Authenticate the user using py2neo.authentication
# Ensure that you change the password 'sumit' as per your database configuration.
py2neo.authenticate("localhost:7474", "neo4j", "")
# Connect to Graph and get the instance of Graph
graph = Graph("http://localhost:7474/db/data/")
# Create Node with Properties
amy = Node("FEMALE", name="Amy")
# Create one more Node with Properties
kristine = Node("FEMALE",name="Kristine")
# Create one more Node with Properties
sheryl = Node("FEMALE",name="Sheryl")
kristine_amy = Relationship(kristine,"FRIEND",amy,since=2005)
print (kristine_amy)
amy_sheryl = Relationship(sheryl,("FRIEND"),amy,since=2001)
#Finally use graph Object and Create Nodes and Relationship
#When we create Relationship between, then Nodes are also created.
resultNodes = graph.create(kristine_amy)
resultNodes1 = graph.create(amy_sheryl)
#Print the results (relationships)
print("Relationship Created - ",resultNodes)
print("Relationship Created - ",resultNodes1)
if __name__ == '__main__':
createRelationshipWithProperties()
The resultsNodes = graph.create line seems to commit the nodes and relationships to the server because I can see them when I match(n) Return n. However, when the code prints resultsNodes, I get None as if they don't exist. This is the output that I get:
Start - Creating Relationships
(kristine)-[:FRIEND {since:2005}]->(amy)
Relationship Created - None
Relationship Created - None
You're using the API incorrectly. The create method doesn't return nodes but instead updates the supplied argument. Therefore to get the relationship nodes, you need to interrogate the relationship object after performing the create.
I produce the following node and relationship data in a for loop about 1 million times. The idea is that investor nodes connect to company nodes by relationship edges:
investor = {'name': owner['name'],
'CIK': owner['CIK']}
relationship = {'isDirector': owner['isDirector'],
'isOfficer': owner['isOfficer'],
'isOther': owner['isOther'],
'isTenPercentOwner': owner['isTenPercentOwner'],
'title': owner['title']}
company = {'Name': json['issuerName'],
'SIC': json['issuerSIC'],
'Ticker Symbol': json['issuerTradingSymbol'],
'CIK': json['issuerCIK'],
'EIN': json['issuerEIN']}
How do I complete the following code to get the dicts above into neo4j community edition?
from py2neo import Graph, authenticate
authenticate("localhost:7474", "neo4j", "neo")
graph = Graph()
for json in long_list_of_dicts:
investor = {...}
company = {...}
relationship = {...}
# Code to import investor, company, relationship data into neo4j
In py2neo a Node is defined in following manner:
class Node(*labels, **properties)
Each node has a label and can have many properties. In this case, Investor node can de defined by setting the label investor and properties of the node to be name and CIK.
investor_node = Node('investor', name = owner['name'], CIK = owner['CIK'])
Similarly, company node would look like:
company_node = Node('company', name = json['issuerName'], SIC = json['issuerSIC'])
Relationship are defined in following manner :
class Relationship(start_node, type, end_node, **properties)
In this case Relationship can be defined using:
investor_company_relationship = Relationship(investor_node, "is_director", company_node)
You can find one sample implementation of neo4j graph here.
You could use UNWIND clause. Something like
WITH {json} AS document
UNWIND document AS company
MERGE (c:company {c_id:company.id})
SET c.sic=company.issuerSIC
If some of your json items is list again, you can use UNWIND as much as you like: UNWIND document.list_of_some_property
I'm using py2neo (version 1.6). I want to get an existing node by its property from the graph and then use it to create a relationship.
My solution:
graph = neo4j.GraphDatabaseService("http://...")
query = neo4j.CypherQuery(graph,"MATCH (n:NodeType) where n.property = 'property' return n")
r = query.execute()
if len(r.data)==0:
raise Exception("node does not exist")
node = r.data[0]['n']
newNode = batch.create(node(name="APropertyOfNewNode"))
batch.add_labels(newNode, "LableOfNewNode")
relation = rel(node, "relationshipName", newNode)
batch.create(relation)
batch.submit()
batch.clear()
Is there an high level way to get an existing node by its property without using CypherQuery and writing plain cypher?
You can use GraphDatabaseService.find:
from py2neo import neo4j
graph = neo4j.GraphDatabaseService('http://localhost:7474/db/data/')
movies = graph.find('Movie', 'title', 'The Matrix')
But graph.find returns a generator object.
movies
# <generator object find at 0x10b64acd0>
So you can only iterate through it once.
for movie in movies:
print type(movie)
print movie['tagline']
# <class 'py2neo.neo4j.Node'>
# Welcome to the Real World
I generate a graph with the word of a sentence as tokens and dependency relations as edges.
This info comes from a external Tool in the format of ((word, index), (word, index), relation) tuple. Each word can have many dependencies and I want only one node per word.
When I process each dependency I want to search it by their ID, but when i Try to use find_vertex() i get the next Exception:
TypeError: No registered converter was able to extract a C++ reference to type graph_tool::GraphInterface from this Python object of type Graph
This is the code:
def graph(self, sentence, sentenceNamespace="sentence", separator="_"):
graph = Graph()
root_node = graph.add_vertex()
# Create a link to Root Node
graph.graph_properties['root'] = graph.new_graph_property("python::object", root_node)
# Create Node's properties
graph.vertex_properties['id'] = graph.new_vertex_property("string")
graph.vertex_properties['form'] = graph.new_vertex_property("string")
graph.vertex_properties['ord'] = graph.new_vertex_property("int64_t")
# Create edge's properties
graph.edge_properties['type'] = graph.new_edge_property("string")
# Create the root node
graph.vertex_properties['form'][root_node] = "S"
graph.vertex_properties['ord'][root_node] = -1
# Find or create the dependant and governor nodes and link with a new edge
for dependency in self.dependency(sentence):
dependency_governor = dependency[0]
# Governor's generated ID
governor_name = "{1}{0}{2[0]}{0}{1[1]}".format(separator, sentenceNamespace,dependency_governor )
dependency_dependant = dependency[1]
# Dependant's generated ID
dependant_name = "{1}{0}{2[0]}{0}{1[1]}".format(separator, sentenceNamespace, dependency_dependant)
# Relation Type
relation = dependency[2]
# This is the problematic Line ################################################
governor = find_vertex(graph, graph.vertex_properties['id'], governor_name)
##############################################################
# If search is successful use the first node else create a new one
if governor :
governor = governor[0]
else:
governor = graph.add_vertex()
graph.vertex_properties['id'][governor] = governor_name
graph.vertex_properties['form'][governor] = dependency_governor[0]
graph.vertex_properties['ord'][governor] = dependency_governor[1]
dependant = find_vertex(graph, 'id', dependant_name)
# If search is successful use the first node else create a new one
if dependant :
dependant = dependant[0]
else:
dependant = graph.add_vertex()
graph.vertex_properties['id'][dependant] = dependant_name
graph.vertex_properties['form'][dependant] = dependant_dependant[0]
graph.vertex_properties['ord'][dependant] = dependency_dependant[1]
# Link the nodes with the new dependency relation
dependency_edge = graph.add_edge(governor, dependant)
graph.edge_properties['type'][dependency_edge]= relation
return graph
This bug has been fixed in the git version.