python list dependency based on other value in same list - python

I am struggling to figure how to make dependency b/w values in python list.
Basically I have a list like below. from the below list ,i can pass a input value as TABLE_VIEW
based on the input value , i want to generate dependency list in a order.
INPUT
1.EXP_TABLE_NAME_STG,TARGET_TABLE
2.SQ_TABLE_NAME,EXP_TABLE_NAME_STG
3.TABLE_VIEW,SQ_TABLE_NAME
4.EXP_TABLE_NAME_STG,LKP_NEW_TABLE_3
5.SQ_TABLE_NAME,LKP_NEW_TABLE_1
6.EXP_TABLE_NAME_STG,LKP_NEW_TABLE_2
7.LKP_NEW_TABLE_1,TARGET_TABLE
For example 3rd one value is TABLE_VIEW,SQ_TABLE_NAME, so here based on 2nd value i.e SQ_TABLE_NAME I want to find out next dependency so in this case
SQ_TABLE_NAME,EXP_TABLE_NAME_STG
SQ_TABLE_NAME,LKP_NEW_TABLE_1
again from above two , take the 2nd value and again make dependency.
EXP_TABLE_NAME_STG,LKP_NEW_TABLE_3
EXP_TABLE_NAME_STG,LKP_NEW_TABLE_2
EXP_TABLE_NAME_STG,TARGET_TABLE
LKP_NEW_TABLE_1,TARGET_TABLE
I may have up to 50 list like this, but wanted to put them in dependency order based on 2nd value.
OUTPUT:
1.TABLE_VIEW,SQ_TABLE_NAME
2.SQ_TABLE_NAME,EXP_TABLE_NAME_STG
3.SQ_TABLE_NAME,LKP_NEW_TABLE_1
4.EXP_TABLE_NAME_STG,LKP_NEW_TABLE_3
5.EXP_TABLE_NAME_STG,LKP_NEW_TABLE_2
6.EXP_TABLE_NAME_STG,TARGET_TABLE
7.LKP_NEW_TABLE_1,TARGET_TABLE
I have tried writing static query by taking multiple list variable and deleted already processed one original list, but I many never know when all values ends up. Can you please share some thoughts how to implement this dynamically?
sq_order_dependency=[]
for sq_dep in job_dependent_details:
if 'SQ' in sq_dep.split(',')[0] :
sq_order_dependency.append(sq_dep)
job_dependent_details.remove(sq_dep)
sq_order_dependency1=[]
for sq_depenent_order in sq_order_dependency:
next_dependency=sq_depenent_order.split(',')[1]
#print(next_dependency)
for job_dependent_details_list in job_dependent_details:
if next_dependency in job_dependent_details_list.split(','[0]):
#print(job_dependent_details_list)
sq_order_dependency.append(job_dependent_details_list)
for i in sq_order_dependency:
job_dependent_details.remove(i)

I would follow a different approach, i.e. build a tree-like structure of dependency pairs, and then print out the tree.
In the following code I defined a simple Dep class and chose a depth first traversal for showing the tree, both for readability; and since we meet the dependencies in an unspecified order, I used a helper dictionary. Oh, and I abbreviated the table names out of laziness :)
class Dep():
def __init__(self, name, children = None):
self.name = name
if children:
self.children = [children]
else:
self.children = []
def add_child(self, child):
self.children.append(child)
def show(self, level=0):
for c in self.children:
print ('\t'*level, self.name, c.name)
c.show(level+1)
def show_dependencies(deps):
out = {}
root = deps[0][0]
for d in y:
pname, cname = d
if cname in out:
c = out[cname]
else:
c = Dep(cname)
out[cname] = c
if pname in out:
out[pname].add_child(c)
else:
out[pname] = Dep(pname, c)
if root == cname:
root = pname
out[root].show()
>>> show_dependencies([('EXP','TARGET'),('SQ','EXP'),('TABLE','SQ'),('EXP','LKP3'),('SQ','LKP1'),('EXP','LKP2'),('LKP1','TARGET')])
TABLE SQ
SQ EXP
EXP TARGET
EXP LKP3
EXP LKP2
SQ LKP1
LKP1 TARGET

well according to your examples and data there should be no duplicates so it should work i think.
main function is based on recursive call (something like dfs)
this works only for directed edges and without self-node edge.
from collections import defaultdict
a=['EXP_TABLE_NAME_STG, TARGET_TABLE'
,'SQ_TABLE_NAME, EXP_TABLE_NAME_STG'
,'TABLE_VIEW, SQ_TABLE_NAME'
,'EXP_TABLE_NAME_STG, LKP_NEW_TABLE_3'
,'SQ_TABLE_NAME, LKP_NEW_TABLE_1'
,'EXP_TABLE_NAME_STG, LKP_NEW_TABLE_2'
,'LKP_NEW_TABLE_1, TARGET_TABLE']
a = [i.replace(" ",'') for i in a]
sql_data = [tuple(i.split(',')) for i in a]
class CustomGraphDependency:
#desired for question
def __init__(self,data:list):
self.graph = defaultdict(set) # no self edge
self.add_dependency(data)
self.count = 1
def add_dependency(self,data:list):
for node1,node2 in data: #directed edge only
self.graph[node1].add(node2)
def dependency_finder_with_count(self,node: str, sq_order_dependency: list, flag: list):
# (e.x 1.TABLE_VIEW,SQ_TABLE_NAME )
flag.append(node)
for item in self.graph[node]:
sq_order_dependency.append((self.count,node,item))
if item not in flag:
self.count+=1
self.dependency_finder_with_count(item, sq_order_dependency, flag)
return sorted(sq_order_dependency,key=lambda x: x[0])
obj_test = CustomGraphDependency(sql_data).dependency_finder_with_count('TABLE_VIEW', [], [])
for i in obj_test:
print(i)
'''
(1, 'TABLE_VIEW', 'SQ_TABLE_NAME')
(2, 'SQ_TABLE_NAME', 'EXP_TABLE_NAME_STG')
(3, 'EXP_TABLE_NAME_STG', 'LKP_NEW_TABLE_3')
(4, 'EXP_TABLE_NAME_STG', 'TARGET_TABLE')
(5, 'EXP_TABLE_NAME_STG', 'LKP_NEW_TABLE_2')
(6, 'SQ_TABLE_NAME', 'LKP_NEW_TABLE_1')
(7, 'LKP_NEW_TABLE_1', 'TARGET_TABLE')
'''

Related

Why min Heap of Python is not returning the minimum value here?

I printed out every iteration. The contents of the list are perfect. But for some reason heappop is returning -8 val even though there is -15 in the list. Help me out
class FoodRatings:
def __init__(self, foods: List[str], cuisines: List[str], ratings: List[int]):
self.find_cuisine = defaultdict()
self.hashset = defaultdict(list)
for f, c, r in zip(foods, cuisines, ratings):
self.find_cuisine[f] = (c, r)
heapq.heappush(self.hashset[c], (-r, f))
def changeRating(self, food: str, newRating: int) -> None:
c, r = self.find_cuisine[food]
self.hashset[c].remove((-r, food))
heapq.heappush(self.hashset[c], (-newRating, food))
self.find_cuisine[food] = (c, newRating)
def highestRated(self, cuisine: str) -> str:
r, f = heapq.heappop(self.hashset[cuisine])
heapq.heappush(self.hashset[cuisine], (r, f))
return f
Input:
foods = ["czopaaeyl","lxoozsbh","kbaxapl"],
cuisines = ["dmnuqeatj","dmnuqeatj","dmnuqeatj"],
ratings = [11,2,15]],
calls
"FoodRatings"
"changeRating" - ["czopaaeyl",12],
"highestRated" - ["dmnuqeatj"],
"changeRating" - ["kbaxapl",8],
"changeRating" - ["lxoozsbh",5],
"highestRated" - ["dmnuqeatj"],
My output: [null,null,"kbaxapl",null,null,"kbaxapl"]
Expected Output: [null,null,"kbaxapl",null,null,"czopaaeyl"]
You should only mutate a heap using the heapq methods. By calling remove you bring inconsistency to the tree, as the indexes of nodes are altered, changing parent-child relationships, and thereby likely breaking the heap property at one or more places in the tree.
What you can do is to not remove the node you want to replace, but to mark it as deleted, without changing its ordering. Then, if ever that node bubbles up to the top of the tree, and you want to get the top value, only then remove it through a heappop call. Keep doing this until the top value is a node that was not marked as deleted.
To mark a node as deleted, you cannot use an (immutable) tuple. Use a list instead. And then the marking can consist of extending such a list with a 3rd dummy value. The length of 3 will distinguish it from the expected length of 2.
Here is the modified code with comments where changes were made:
class FoodRatings:
def __init__(self, foods: List[str], cuisines: List[str], ratings: List[int]):
self.find_cuisine = defaultdict()
self.hashset = defaultdict(list)
for f, c, r in zip(foods, cuisines, ratings):
node = [-r, f] # Use a mutable list instead of tuple
self.find_cuisine[f] = (c, node) # Add reference to that node
heapq.heappush(self.hashset[c], node)
def changeRating(self, food: str, newRating: int) -> None:
c, node = self.find_cuisine[food]
node.append("deleted") # Don't remove, but mark as deleted
node = [-newRating, food] # Create new node
heapq.heappush(self.hashset[c], node)
self.find_cuisine[food] = (c, node)
def highestRated(self, cuisine: str) -> str:
while self.hashset[cuisine] and len(self.hashset[cuisine][0]) > 2:
heapq.heappop(self.hashset[cuisine]) # Ignore deleted nodes
if self.hashset[cuisine]:
return self.hashset[cuisine][0][1] # Just peek at index 0
A heapq list is a tree that just happens to be stored in a list, not a sorted list. Python won't stop you from removing an element from the middle of it using list.remove, but doing that can scramble the tree so that it no longer gives the right results. One way to fix that is to call heapq.heapify on the list after you change it.
Also note that you don't need to pop and re-push just to look at the lowest element. The lowest element is at index 0.

How to find two items of a list with the same return value of a function on their attribute?

Given a basic class Item:
class Item(object):
def __init__(self, val):
self.val = val
a list of objects of this class (the number of items can be much larger):
items = [ Item(0), Item(11), Item(25), Item(16), Item(31) ]
and a function compute that process and return a value.
How to find two items of this list for which the function compute return the same value when using the attribute val? If nothing is found, an exception should be raised. If there are more than two items that match, simple return any two of them.
For example, let's define compute:
def compute( x ):
return x % 10
The excepted pair would be: (Item(11), Item(31)).
You can check the length of the set of resulting values:
class Item(object):
def __init__(self, val):
self.val = val
def __repr__(self):
return f'Item({self.val})'
def compute(x):
return x%10
items = [ Item(0), Item(11), Item(25), Item(16), Item(31)]
c = list(map(lambda x:compute(x.val), items))
if len(set(c)) == len(c): #no two or more equal values exist in the list
raise Exception("All elements have unique computational results")
To find values with similar computational results, a dictionary can be used:
from collections import Counter
new_d = {i:compute(i.val) for i in items}
d = Counter(new_d.values())
multiple = [a for a, b in new_d.items() if d[b] > 1]
Output:
[Item(11), Item(31)]
A slightly more efficient way to find if multiple objects of the same computational value exist is to use any, requiring a single pass over the Counter object, whereas using a set with len requires several iterations:
if all(b == 1 for b in d.values()):
raise Exception("All elements have unique computational results")
Assuming the values returned by compute are hashable (e.g., float values), you can use a dict to store results.
And you don't need to do anything fancy, like a multidict storing all items that produce a result. As soon as you see a duplicate, you're done. Besides being simpler, this also means we short-circuit the search as soon as we find a match, without even calling compute on the rest of the elements.
def find_pair(items, compute):
results = {}
for item in items:
result = compute(item.val)
if result in results:
return results[result], item
results[result] = item
raise ValueError('No pair of items')
A dictionary val_to_it that contains Items keyed by computed val can be used:
val_to_it = {}
for it in items:
computed_val = compute(it.val)
# Check if an Item in val_to_it has the same computed val
dict_it = val_to_it.get(computed_val)
if dict_it is None:
# If not, add it to val_to_it so it can be referred to
val_to_it[computed_val] = it
else:
# We found the two elements!
res = [dict_it, it]
break
else:
raise Exception( "Can't find two items" )
The for block can be rewrite to handle n number of elements:
for it in items:
computed_val = compute(it.val)
dict_lit = val_to_it.get(computed_val)
if dict_lit is None:
val_to_it[computed_val] = [it]
else:
dict_lit.append(it)
# Check if we have the expected number of elements
if len(dict_lit) == n:
# Found n elements!
res = dict_lit
break

common_ancestor function for nested tree dictionary in python

I am trying to create a function called "common_ancestor()" that takes two inputs: the first a list of string taxa names, and the second a phylogenetic tree dictionary. It should return a string giving the name of the taxon that is the closest common ancestor of all the
species in the input list. Already made a separate function called "list_ancestors" that gives me the general ancestors of the elements in the list. Also, have a dictionary I am working with.
tax_dict = {
'Pan troglodytes': 'Hominoidea', 'Pongo abelii': 'Hominoidea',
'Hominoidea': 'Simiiformes', 'Simiiformes': 'Haplorrhini',
'Tarsius tarsier': 'Tarsiiformes', 'Haplorrhini': 'Primates',
'Tarsiiformes': 'Haplorrhini', 'Loris tardigradus':'Lorisidae',
'Lorisidae': 'Strepsirrhini', 'Strepsirrhini': 'Primates',
'Allocebus trichotis': 'Lemuriformes', 'Lemuriformes': 'Strepsirrhini',
'Galago alleni': 'Lorisiformes', 'Lorisiformes': 'Strepsirrhini',
'Galago moholi': 'Lorisiformes'
}
def halfroot(tree):
taxon = random.choice(list(tree))
result = [taxon]
for i in range(0,len(tree)):
result.append(tree.get(taxon))
taxon = tree.get(taxon)
return result
def root(tree):
rootlist = halfroot(tree)
rootlist2 = rootlist[::-1]
newlist = []
for e in range(0,len(rootlist)):
if rootlist2[e] != None:
newlist.append(rootlist2[e])
return newlist[0]
def list_ancestors(taxon, tree):
result = [taxon]
while taxon != root(tree):
result.append(tree.get(taxon))
taxon = tree.get(taxon)
return result
def common_ancestors(inputlist,tree)
biglist1 = []
for i in range(0,len(listname)):
biglist1.append(list_ancestors(listname[i],tree))
"continue so that I get three separate lists where i can cross reference all elements from the first list to every other list to find a common ancestor "
the result should look something like
print(common_ancestor([’Hominoidea’, ’Pan troglodytes’,’Lorisiformes’], tax_dict)
Output: ’Primates’"
One way would be to collect the all ancestors for each species, place them in a set and then get an intersection to get what they have in common:
def common_ancestor(species_list, tree):
result = None # initiate a `None` result
for species in species_list: # loop through each species in the species_list
ancestors = {species} # initiate the ancestors set with the species itself
while True: # rinse & repeat until there are leaves in the ancestral tree
try:
species = tree[species] # get the species' ancestor
ancestors.add(species) # store it in the ancestors set
except KeyError:
break
# initiate the result or intersect it with ancestors from the previous species
result = ancestors if result is None else result & ancestors
# finally, return the ancestor if there is only one in the result, or None
return result.pop() if result and len(result) == 1 else None
print(common_ancestor(["Hominoidea", "Pan troglodytes", "Lorisiformes"], tax_dict))
# Primates
You can use the 'middle' part of this function for the list_ancestors(), too - there is no need to complicate it by trying to find the tree's root:
def list_ancestors(species, tree, include_self=True):
ancestors = [species] if include_self else []
while True:
try:
species = tree[species]
ancestors.append(species)
except KeyError:
break
return ancestors
Of course, both rely on a valid ancestral tree dictionary - if some of the ancestors were to recurse on themselves or if there is a breakage in the chain it won't work. Also, if you were to do a lot of these operations it might be worth to turn your flat dictionary into a proper tree.

Pathfinding code produces unexpected results

First of all, excuse the bad title, but I don't know how to describe this in just one sentence...
Given a grid with 3 kinds of fields, empty fields, walls and exits, I wrote a program that checks for every empty field, whether that field is "safe".
A person walks through that grid, but can only walk non-diagonally and can't go through walls. The person, starting at one field, chooses one direction at random and starts walking that way. Once it hits a wall, it chooses a direction at random again, starting to move into that direction and so on.
A field is considered safe if a person traversing the grid as described above, starting at that field, is guaranteed to find an exit at some point.
I wrote a Python program to solve this problem. It builds a "tree" for every field it checks, containing every possible route from that field.
I have a function that just returns the "parent" of a given node, by recursively adding the parent of the current node to a list of nodes until it reaches the topmost node.
The program works as expected when checking only one field, for example (1, 4). However it doesn't work when checking all fields of the example grid.
I already looked into it and realized that the alle_parents() function which returns all parents of a given node yields unexpected results when checking all nodes. E.g. when checking the field (1, 4), one child of that node is (1, 8). The parents of (1, 8) should just be (1, 4). That's not the case, though. alle_parents((1, 8)) returns many different fields that shouldn't be there. However I can't figure out why it behaves as it does. My only guess is that it has to do with "left-over" data/GC not working as intended.
Relevant code:
class Knoten():
def __init__(self, x, y, parent = None):
self.x = x
self.y = y
self.parent = parent
self.children = []
n = len(spielfeld)
m = len(spielfeld[0])
for k in range(n):
for j in range(m):
if spielfeld[k][j] not in [None, '#', 'E']:
baum = []
i = 0
ebene = []
ebene.append(Knoten(k, j))
baum.append(ebene)
i += 1
while i <= 100:
ebene = []
for knoten in baum[i - 1]:
children = []
if spielfeld[knoten.x][knoten.y] == 'E':
continue
for feld in next_feld(knoten.x, knoten.y):
knoten_neu = Knoten(feld[0], feld[1], knoten)
hinzufuegen = True
for parent in alle_parents(knoten_neu):
if knoten_neu.x == parent.x and knoten_neu.y == parent.y:
hinzufuegen = False
if hinzufuegen:
ebene.append(knoten_neu)
children.append(knoten_neu)
knoten.children = children
if children == []:
if spielfeld[knoten.x][knoten.y] != 'E':
spielfeld[k][j] = '%' # Field not safe
baum.append(ebene)
i += 1
def alle_parents(knoten, parents = []):
if knoten.parent == None:
return parents
else:
parents.append(knoten.parent)
return alle_parents(knoten.parent, parents)
The example map I'm using:
############
# # # #
# ## #
# # E# #
# ## #
# #
# #E E###
############
Full code (parts of it are German, sorry for that): http://pastebin.com/3XUBbpkK
I suspect your issue is a common Python gotcha. This line:
def alle_parents(knoten, parents = []):
Creates an empty array when the module is loaded, NOT every time the function is called. Future calls to alle_parents() will reuse the same array (which may have grown in size) instead of a new empty array! A good way to fix is to do this:
def alle_parents(knoten, parents = None):
parents = parents or []

Extract the object names from different nesting levels in JSON

I've been trying to get the solution from my former question to running here, but unfortunately without success. I'm trying right now to change the code to deliver me in result not the Ids, but the "name" values itself. JSON this is my json, I want to extract the SUB, SUBSUB and NAME and when using a quasi for-chain, I cound not get back in hierarchy to get the SUBSUB2... Could anyone please put me somehow on the right track?
The solution code from the former question:
def locateByName(e,name):
if e.get('name',None) == name:
return e
for child in e.get('children',[]):
result = locateByName(child,name)
if result is not None:
return result
return None
What I exactly want to achieve is simple list as SUB1, SUBSUB1, NAME1, NAME2, SUBSUB2, etc...
Assuming x is your JSON,
def trav(node, acc = []):
acc += [node['name']]
if 'children' in node:
for child in node['children']:
trav(child, acc)
acc = []
trav(x, acc)
print acc
Output:
['MAIN', 'SUB1', 'SUBSUB1', 'NAME1', 'NAME2', 'SUBSUB2', 'SUBSUB3']
Another, more compact solution:
from itertools import chain
def trav(node):
if 'children' in node:
return [node['name']] + list(chain.from_iterable([trav(child) for child in node['children']]))
else:
return [node['name']]
print trav(x)

Categories