I'm working with digraph defined in dot language using networkx. I need to achieve something like this:
X = networkx.read_dot('_t.dot') #loads dotfile
def navigate_through_model(model, type): #model is model, type -> string of desired 'comment'
.....
return path
if \__target\__== '\__main\__':
print navigate_through_model(X, 'regression') # I need to return path, that is going through all nodes with comment == 'regression' (it can be other attribute, comment is only for question)
And I'm kind a lost in this :(
Any help will be appreciated.
Ok finally I found (after good sleep) that I can use weight of edges to achieve the same result. So solution is easy.. set lowest weight to 'regression' path and than just generate shortest path.
networkx(X, 'start', 'end', weight='weight')
In order to do this you have to edit networkx/algorithms/shortest_paths/weighted.py
There is some kind of bug on the row 342.
vw_dist = dist[v] + edgedata.get(weight, 1)
You have to change it to:
vw_dist = dist[v] + float(edgedata.get(weight, 1)) #if you want to use floating number or just int to use integers
Ok I finally gets to this:
import networkx
X = networkx.read_dot('_t.dot')
def navigate_through_model(model, start_node, end_node, typex):
path = []
actual_node = start_node
visited_nodes = []
status = True
while actual_node != end_node:
u = networkx.all_neighbors(model, actual_node)
for line in u:
target_node = line
print target_node
try:
comm = model[actual_node][target_node][0]['comment']
if (comm == typex and target_node not in visited_nodes):
path.append((actual_node, target_node))
actual_node = target_node
visited_nodes.append(target_node)
print path
print '\n'
print visited_nodes
status = True
else:
status = False
except Exception as err:
pass
if not status:
return "not found"
return path
print navigate_through_model(X, 'start', 'end', 'regression')
Maybe it is not the best solution but it works!
Related
Given a list of paths as:
'alpha/beta/gamma/delta alpha/beta/sigma beta/phi/pi/rho'
I want to Print it as:
-alpha
-beta
-gamma
delta
-sigma
-beta
-phi
-pi
rho
Can you please help me out with this?
I was able to make a list of dictionaries of dictionaries. (I am kinda lost here)
There are simpler ways to do this where I can directly print the data but I want to do it in a structure such that I might be able to use this data somewhere else too.
paths = 'alpha/beta/gamma/delta alpha/beta/sigma b/f/g/h r/g/t/y q/w/er/rat'
folder_list = []
def get_children(ippath, e_dict):
remaining_path = '/'.join(ippath.split('/')[1:])
try:
splitted_path = ippath.split('/')[0]
if splitted_path:
e_dict[splitted_path] = {}
e_dict[splitted_path].update(get_children(remaining_path, e_dict[ippath.split('/')[0]]))
return e_dict
else:
return e_dict
except:
return remaining_path
for path in paths.split(' '):
end_dict = dict()
output = get_children(path, end_dict)
if output:
folder_list.append(output)
# final_list.update(output)
else:
continue
print(folder_list)
It gives me a list of nested dictionaries but still not what I want.
Thank you, I really appreciate the help
Are you fine with using another library? if so, dpath will work great for this.
It allows you to create dicts based on strings
https://pypi.org/project/dpath/
Here's a straightforward solution:
First, build a set of all distinct full paths, including the intermediate paths.
Sort the paths. This puts them in depth-first order, guaranteeing that a parent directory will always appear before its children.
Iterate through the paths, maintaining a stack:
Pop from the stack until you find the parent of the current path.
Print just the difference between the current path and its parent. The indentation level is determined by the length of the stack.
Push the current path to the stack.
To get the - symbols in the right place, we can keep track of which paths are leaf nodes in the tree. Here's the code:
def dir_tree(s):
paths = set()
for path in s.split():
parts = path.split('/')
is_leaf = True
while parts:
path = '/'.join(parts) + '/'
paths.add( (path, is_leaf) )
parts.pop()
is_leaf = False
stack = ['']
for path, is_leaf in sorted(paths):
while not path.startswith(stack[-1]):
stack.pop()
suffix = path[len(stack[-1]):-1]
tabs = len(stack) - 1
print('\t'*tabs + ('' if is_leaf else '-') + suffix)
stack.append(path)
Output:
-alpha
-beta
-gamma
delta
sigma
-beta
-phi
-pi
rho
I finally got it to work.. :)
Ron Serruya's suggested library helped me rethink my structure.
import json
paths = 'alpha/beta/gamma/delta alpha/beta/sigma beta/phi/pi/rho'
folder_list = {}
def get_children(ippath, e_dict):
remaining_path = '/'.join(ippath.split('/')[1:])
try:
splitted_path = ippath.split('/')[0]
if splitted_path:
e_dict[splitted_path] = {}
e_dict[splitted_path].update(get_children(remaining_path, e_dict[ippath.split('/')[0]]))
return e_dict
else:
return e_dict
except:
return remaining_path
def merge_dictionaries(new_dictionary, main_dictionary):
key = list(new_dictionary.keys())[0]
if list(new_dictionary[key].keys())[0] in list(main_dictionary[key].keys()):
merge_dictionaries(new_dictionary[key], main_dictionary[key])
else:
main_dictionary[key][list(new_dictionary[key].keys())[0]] = new_dictionary[key][list(new_dictionary[key].keys())[0]]
def main():
for path in paths.split(' '):
end_dict = dict()
output = get_children(path, end_dict)
if output:
if list(output.keys())[0] not in list(folder_list.keys()):
folder_list.update(output)
else:
merge_dictionaries(output, folder_list)
else:
continue
print(str(json.dumps(folder_list, sort_keys=True, indent=4, separators=('', ''))).replace('{', '').replace('}', ''))
main()
Gives Output:
"alpha"
"beta"
"gamma"
"delta"
"sigma"
"beta"
"phi"
"pi"
"rho"
Sorry for really bad structure of the code, I am up for suggestion to improve this structurally.
I am trying to understand sys.path.
So I want to make code that returns directory tree like this,but I can't.
Can someone please tell me the code?
【sys.path】
['C:\\Users\\81802\\PycharmProjects\\PlayGround',
'C:\\Users\\81802\\AppData\\Local\\Programs\\Python\\Python37\\python37.zip',
'C:\\Users\\81802\\AppData\\Local\\Programs\\Python\\Python37\\DLLs',
'C:\\Users\\81802\\AppData\\Local\\Programs\\Python\\Python37\\lib',
'C:\\Users\\81802\\AppData\\Local\\Programs\\Python\\Python37',
'C:\\Users\\81802\\PycharmProjects\\PlayGround\\venv',
'C:\\Users\\81802\\PycharmProjects\\PlayGround\\venv\\lib\\site-packages',
'C:\\Users\\81802\\PycharmProjects\\PlayGround\\venv\\lib\\site-packages\\setuptools-39.1.0-py3.7.egg',
'C:\\Users\\81802\\PycharmProjects\\PlayGround\\venv\\lib\\site-packages\\pip-10.0.1-py3.7.egg']
【directory tree(dict)】
{'C:\\Users\\81802\\':
[{'PycharmProjects\\PlayGround\\':
['',
{'venv\\':
['',
{'lib\\site-packages\\':
['',
'setuptools-39.1.0-py3.7.egg',
'pip-10.0.1-py3.7.egg']}]}]},
{'AppData\\Local\\Programs\\Python\\Python37\\':
['',
'python37.zip',
'DLLs',
'lib']}]}
This is the simplest I can get. The idea is to maintain a set of paths which didn't currently diverge.
import sys
from pprint import pprint
pprint(sys.path)
sep = '\\'
# check if all paths agree on the current name
def isSameName(paths, index):
for path in paths:
if index >= len(path) or path[index] != paths[0][index]:
return False
return True
#transform the current set of paths into tree
def toTree(paths, startIndex):
index = startIndex
if len(paths) == 1:
return sep.join(paths[0][index:])
while isSameName(paths, index):
index += 1
nameMap = dict()
for path in paths:
name = path[index] if len(path) > index else 0
if not (name in nameMap):
nameMap[name] = []
nameMap[name].append(path)
res = [toTree(paths, index) for paths in nameMap.values()]
return { sep.join(paths[0][startIndex:index]) : res}
paths = [path.split(sep) for path in sys.path]
pprint(toTree(paths, 0))
This will give you a dictionary where every key is a directory, and the values are lists of either file names or dictionaries with a subdirectory.
import os
def get_files_dict(startpath):
tree = [] # this is the array of subdirectory objects
for item in os.listdir(startpath):
# we need to have a full path to the item in the directory
item_path = os.path.join(startpath, item)
if os.path.isfile(item_path):
tree.append(item)
else:
# we call this function recursively for subdirectories
tree.append(get_files_dict(item_path))
return {os.path.basename(startpath):tree}
file_tree = get_files_dict(os.getcwd())
# this is just a helper function to print the tree nicely
def print_tree(d,i=0):
for k,v in d.items():
print("{}{}".format(" "*4*i, k+os.sep))
for l in v:
if type(l) is dict:
print_tree(l,i+1)
else:
print("{}{}".format(" "*4*(i+1), l))
print_tree(file_tree)
And the printed output:
runner/
.bashrc
.bash_logout
.profile
.site-packages/
main.py
.config/
pycodestyle
_test_runner.py
This was inspired by this SO issue, but I changed quite a bit about the implementation.
im trying to search a GTK 3 treestore for a string. The treestore has 4 columns,and is for a treeview widget that has callapsible nodes. im creating the nodes with this function:
def AddItem(self,ParentIter,txt,datapath='',projName=Project):
self.store = self.builder.get_object('theTreeStore')
NodeId = secrets.token_hex(8)
if ParentIter == None:
ParentNodeId = ''
else:
ParentNodeId = self.store.get_value(ParentIter, 2)
treeEntry = ['%s' %ParentNodeId,'%s' %txt,'%s' %NodeId,'%s' %datapath]
node = self.store.append(ParentIter, treeEntry) <<<<<<<<<<<<<
self.view = self.builder.get_object('Tree')
self.view.set_model(self.store)
# table nodes(tParentNodeID ,tNodeTxt ,tNodeID ,tDataPath );
sql = "INSERT INTO %s (tParentNodeID ,tNodeTxt ,tNodeID ,tDataPath ) VALUES ('%s','%s','%s','%s')" %(projName,ParentNodeId,txt,NodeId,datapath)
self.cursor.execute(sql)
self.mariadb_connection.commit()
for x in self.cursor:
print(x)
return(node)
as you can see the data in the tree is nested in its parent.
now i need to somehow search the treestore for a row that contains a certain NodeId string. Ive read the gtk docs over and over but i cant quite figure out what to do. im guessing i need to use following methods:
store.get_iter()
store.iter_children()
but idk everything i try only returns the root nodes no children.
i basically want a search function that will recursively search each node and its children,and their children for a string. something like this:
def GetRowbyNodeID(nodeid):
for row in treestore:
if row[1]==nodeid:
return(row)
for children in row:
if children[1] == nodeid
return(children)
The code is in multiple files, i can post any functions relevant if needed.
GtkTreeStore implements GtkTreeModel interface. Thus you can use the following methods:
iter = store.get_iter() to obtain an iterator
chld_iter = iter.get_children()to obtain an iterator over children elements (please note, it's an iter's method!)
I'd also recommend reading this tutorial. "The Model" section contains all you need on iterating over the model (spoiler: search for print_tree_store)
Got it all working. thanks again. im posting the relevant code just in case anyone else could use it.
def SearchTreeRows(self,store, treeiter, searchstr):
print("\nsearch>%s"%searchstr)
while treeiter != None:
if store[treeiter][2] ==searchstr:
print("found in:%s"%str(store[treeiter][:]))
return(treeiter)
break
print("searched:%s"%str(store[treeiter][:]))
if store.iter_has_child(treeiter):
childiter = store.iter_children(treeiter)
ret = self.SearchTreeRows(store, childiter, searchstr)
if ret is not None:
return ret
treeiter = store.iter_next(treeiter)
def NodeId2Tree(self,nodeid):
self.store = self.builder.get_object('theTreeStore')
rootiter = self.store.get_iter_first()
row = self.SearchTreeRows(self.store, rootiter,nodeid)
return(row)
def LoadProject(self):
global Project
global ProjSel
sql = "SHOW TABLES"
self.cursor.execute(sql)
tbls = []
for x in self.cursor:
tbls.append(x)
diag = self.builder.get_object('ProjectChooser')
self.combo = Gtk.ComboBox()
ls =Gtk.ListStore(str)
for tble in tbls:
strg ="%s" %tble
ls.append(tble)
self.combo.set_model(ls)
cellr = Gtk.CellRendererText()
self.combo.pack_start(cellr,True)
self.combo.add_attribute(cellr, 'text', 0)
diag.vbox.pack_start(self.combo, True, True, 5)
diag.show_all()
response = diag.run()
self.combo.destroy()
print(ProjSel)
Project = ProjSel
ProjSel = ''
view = self.builder.get_object('Tree')
self.store.clear()
view.set_model(self.store)
sql = "SELECT tParentNodeId,tNodeTxt,tNodeId FROM %s"%(Project)
self.cursor.execute(sql)
for x in self.cursor:
parid = x[0]
nodtxt = x[1]
nodid =x[2]
if parid == '':
treeEntry = ['%s' %parid, '%s' %nodtxt, '%s' %nodid, '']
node = self.store.append(None, treeEntry) #root nodes
else:
treeEntry = ['%s' %parid, '%s' %nodtxt, '%s' %nodid, '']
n2id = self.NodeId2Tree(parid)
node = self.store.append(n2id, treeEntry)
print("got return:%s For:%s"%(n2id,treeEntry[0]))
view.set_model(self.store)
#select * where parentid == none >> get root nodes ???? or parse line by line
I am new to programming in python,´and i have some troubles understanding the concept. I wish to compare two xml files. These xml files are quite large.
I will give an example for the type of files i wish to compare.
xmlfile1:
<xml>
<property1>
<property2>
<property3>
</property3>
</property2>
</property1>
</xml>
xml file2:
<xml>
<property1>
<property2>
<property3>
<property4>
</property4>
</property3>
</property2>
</property1>
</xml>
the property1,property2 that i have named are different from the ones that are actually in the file. There are a lot of properties within the xml file.
ANd i wish to compare the two xml files.
I am using an lxml parser to try to compare the two files and to print out the difference between them.
I do not know how to parse it and compare it automatically.
I tried reading through the lxml parser, but i couldnt understand how to use it to my problem.
Can someone please tell me how should i proceed with this problem.
Code snippets can be very useful
One more question, Am i following the right concept or i am missing something else? Please correct me of any new concepts that you knwo about
This is actually a reasonably challenging problem (due to what "difference" means often being in the eye of the beholder here, as there will be semantically "equivalent" information that you probably don't want marked as differences).
You could try using xmldiff, which is based on work in the paper Change Detection in Hierarchically Structured Information.
My approach to the problem was transforming each XML into a xml.etree.ElementTree and iterating through each of the layers.
I also included the functionality to ignore a list of attributes while doing the comparison.
The first block of code holds the class used:
import xml.etree.ElementTree as ET
import logging
class XmlTree():
def __init__(self):
self.hdlr = logging.FileHandler('xml-comparison.log')
self.formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
#staticmethod
def convert_string_to_tree( xmlString):
return ET.fromstring(xmlString)
def xml_compare(self, x1, x2, excludes=[]):
"""
Compares two xml etrees
:param x1: the first tree
:param x2: the second tree
:param excludes: list of string of attributes to exclude from comparison
:return:
True if both files match
"""
if x1.tag != x2.tag:
self.logger.debug('Tags do not match: %s and %s' % (x1.tag, x2.tag))
return False
for name, value in x1.attrib.items():
if not name in excludes:
if x2.attrib.get(name) != value:
self.logger.debug('Attributes do not match: %s=%r, %s=%r'
% (name, value, name, x2.attrib.get(name)))
return False
for name in x2.attrib.keys():
if not name in excludes:
if name not in x1.attrib:
self.logger.debug('x2 has an attribute x1 is missing: %s'
% name)
return False
if not self.text_compare(x1.text, x2.text):
self.logger.debug('text: %r != %r' % (x1.text, x2.text))
return False
if not self.text_compare(x1.tail, x2.tail):
self.logger.debug('tail: %r != %r' % (x1.tail, x2.tail))
return False
cl1 = x1.getchildren()
cl2 = x2.getchildren()
if len(cl1) != len(cl2):
self.logger.debug('children length differs, %i != %i'
% (len(cl1), len(cl2)))
return False
i = 0
for c1, c2 in zip(cl1, cl2):
i += 1
if not c1.tag in excludes:
if not self.xml_compare(c1, c2, excludes):
self.logger.debug('children %i do not match: %s'
% (i, c1.tag))
return False
return True
def text_compare(self, t1, t2):
"""
Compare two text strings
:param t1: text one
:param t2: text two
:return:
True if a match
"""
if not t1 and not t2:
return True
if t1 == '*' or t2 == '*':
return True
return (t1 or '').strip() == (t2 or '').strip()
The second block of code holds a couple of XML examples and their comparison:
xml1 = "<note><to>Tove</to><from>Jani</from><heading>Reminder</heading><body>Don't forget me this weekend!</body></note>"
xml2 = "<note><to>Tove</to><from>Daniel</from><heading>Reminder</heading><body>Don't forget me this weekend!</body></note>"
tree1 = XmlTree.convert_string_to_tree(xml1)
tree2 = XmlTree.convert_string_to_tree(xml2)
comparator = XmlTree()
if comparator.xml_compare(tree1, tree2, ["from"]):
print "XMLs match"
else:
print "XMLs don't match"
Most of the credit for this code must be given to syawar
If your intent is to compare the XML content and attributes, and not just compare the files byte-by-byte, there are subtleties to the question, so there is no solution that fits all cases.
You have to know something about what is important in the XML files.
The order of attributes listed in an element tag is generally not supposed to matter. That is, two XML files that differ only in the order of element attributes generally ought to be judged the same.
But that's the generic part.
The tricky part is application-dependent. For instance, it may be that white-space formatting of some elements of the file doesn't matter, and white-space might be added to the XML for legibility. And so on.
Recent versions of the ElementTree module have a function canonicalize(), which can take care of simpler cases, by putting the XML string into a canonical format.
I used this function in the unit tests of a recent project, to compare a known XML output with output from a package that sometimes changes the order of attributes. In this case, white space in the text elements was unimportant, but it was sometimes used for formatting.
import xml.etree.ElementTree as ET
def _canonicalize_XML( xml_str ):
""" Canonicalizes XML strings, so they are safe to
compare directly.
Strips white space from text content."""
if not hasattr( ET, "canonicalize" ):
raise Exception( "ElementTree missing canonicalize()" )
root = ET.fromstring( xml_str )
rootstr = ET.tostring( root )
return ET.canonicalize( rootstr, strip_text=True )
To use it, something like this:
file1 = ET.parse('file1.xml')
file2 = ET.parse('file2.xml')
canon1 = _canonicalize_XML( ET.tostring( file1.getroot() ) )
canon2 = _canonicalize_XML( ET.tostring( file2.getroot() ) )
print( canon1 == canon2 )
In my distribution, the Python 2 doesn't have canonicalize(), but Python 3 does.
Another script using xml.etree. Its awful but it works :)
#!/usr/bin/env python
import sys
import xml.etree.ElementTree as ET
from termcolor import colored
tree1 = ET.parse(sys.argv[1])
root1 = tree1.getroot()
tree2 = ET.parse(sys.argv[2])
root2 = tree2.getroot()
class Element:
def __init__(self,e):
self.name = e.tag
self.subs = {}
self.atts = {}
for child in e:
self.subs[child.tag] = Element(child)
for att in e.attrib.keys():
self.atts[att] = e.attrib[att]
print "name: %s, len(subs) = %d, len(atts) = %d" % ( self.name, len(self.subs), len(self.atts) )
def compare(self,el):
if self.name!=el.name:
raise RuntimeError("Two names are not the same")
print "----------------------------------------------------------------"
print self.name
print "----------------------------------------------------------------"
for att in self.atts.keys():
v1 = self.atts[att]
if att not in el.atts.keys():
v2 = '[NA]'
color = 'yellow'
else:
v2 = el.atts[att]
if v2==v1:
color = 'green'
else:
color = 'red'
print colored("first:\t%s = %s" % ( att, v1 ), color)
print colored("second:\t%s = %s" % ( att, v2 ), color)
for subName in self.subs.keys():
if subName not in el.subs.keys():
print colored("first:\thas got %s" % ( subName), 'purple')
print colored("second:\thasn't got %s" % ( subName), 'purple')
else:
self.subs[subName].compare( el.subs[subName] )
e1 = Element(root1)
e2 = Element(root2)
e1.compare(e2)
I've looked through as many answers on this subject as I could find and all suggested that it's a global - local conflict. I can't see how this would apply in my case but please do explain. Here's the error :
"local variable 'CompletelyUniqueName' referenced before assignment"
and here is the code, a function I call from another script :
def geopixsum(filename):
# register all of the GDAL drivers
gdal.AllRegister()
# Check file type (in this case Geotiff)
if filename.endswith('.tif'):
# open the image
try:
inDs = gdal.Open(filename)
except:
print 'Could not open ',file,'\n'
# get image size
rows = inDs.RasterYSize
cols = inDs.RasterXSize
# read band 1 into data
band1 = inDs.GetRasterBand(1)
data = band1.ReadAsArray(0,0,cols,rows)
# get nodata value
nandat = band1.GetNoDataValue()
sumvals = data[np.where(np.logical_not(data == nandat))]
CompletelyUniqueName = sumvals.sum()
print 'sum = ',CompletelyUniqueName
inDs = None
return CompletelyUniqueName
This code worked when not a function but rather just a script on its own. Again, I know this would make it it seem like a global - local issue but given the name I've assigned the variable I think I've gone to great enough lengths to avoid a conflict.
You should either define default value for CompletelyUniqueName (for case if filename.endswith('.tif') == False)
def geopixsum(filename):
CompletelyUniqueName = 0
if filename.endswith('.tif'):
...
CompletelyUniqueName = sumvals.sum()
return CompletelyUniqueName
Or return inside if statement
def geopixsum(filename):
if filename.endswith('.tif'):
...
CompletelyUniqueName = sumvals.sum()
return CompletelyUniqueName
The simplest fix:
def geopixsum(filename):
CompletelyUniqueName = 0 # or None, or anything you want to return
# if the file is not a tif
# register all of the GDAL drivers
gdal.AllRegister()
# Check file type (in this case Geotiff)
if filename.endswith('.tif'):
# open the image
try:
inDs = gdal.Open(filename)
except:
print 'Could not open ',file,'\n'
# get image size
rows = inDs.RasterYSize
cols = inDs.RasterXSize
# read band 1 into data
band1 = inDs.GetRasterBand(1)
data = band1.ReadAsArray(0,0,cols,rows)
# get nodata value
nandat = band1.GetNoDataValue()
sumvals = data[np.where(np.logical_not(data == nandat))]
CompletelyUniqueName = sumvals.sum()
print 'sum = ',CompletelyUniqueName
inDs = None
return CompletelyUniqueName