maximum recursion depth exceeded with recursive file structure crawl

maximum recursion depth exceeded with recursive file structure crawl - python

Here's my code, the intention of which is to crawl a given folder and look for .md and .pdf files, and build a tree-like structure which describes it.
I'm probably really overthinking it, so I could really use a second set of eyes on this.
class Resource_Item:
def __init__(self, name=None, stub=None, path=None, parent=None, html_file_location=None, documents=[], children=[]):
self.name = name
self.stub = stub
self.path = path
self.parent = parent
self.html_file_location = html_file_location
self.documents = documents
self.children = children
def add_child(self, c):
self.children.append(c)
def to_json(self):
o = {
'name' : self.name,
'stub' : self.stub,
'path' : self.path,
'parent' : self.parent,
'html_file_location' : self.html_file_location,
'documents' : self.documents,
'children' : [c.to_json() for c in self.children] } #len(self.children)
return json.dumps(o)
def walk_dir(root, parent = None):
"""
>>> walk_dir("./test_docs/folder containing pdfs/").documents
['dummy_pdf 2.pdf', 'dummy_pdf 3.pdf', 'dummy_pdf 4.pdf', 'dummy_pdf.pdf']
>>> len(walk_dir("./test_docs/folder containing pdfs/").children)
0
>>> walk_dir("./test_docs/folder containing markdown and pdfs/").stub is None
False
>>> walk_dir("./test_docs/folder containing markdown and pdfs/").children
['dummy_pdf 2.pdf', 'dummy_pdf 3.pdf', 'dummy_pdf 4.pdf', 'dummy_pdf.pdf']
"""
file_or_folder_name_no_ext = os.path.splitext(os.path.basename(root))[0]
entry = Resource_Item( name=file_or_folder_name_no_ext, parent=parent, path=os.path.abspath(root) )
for item in os.listdir(root):
path = os.path.join(os.path.abspath(root), item)
if os.path.isfile(path):
if item.endswith(".pdf"):
entry.documents.append(item)
elif item.endswith(".md"):
entry.stub = read_markdown_file_as_html(path)
elif os.path.isdir(path):
if dir_contains_pdf(path):
print('found a path to contain PDFs: "'+str(path)+'"')
entry.add_child(walk_dir(path)) # broken!
#entry.add_child(path)
return entry
What appears to be happening is that on the entry.add_child(walk_dir(path)) line, walk_dir doesn't properly create a new instance of Resource_Item, since my testing shows that Resource_Item.children gets populated with all the pdfs in that file tree, not just those in the immediate folder.
As for my supporting functions, I'm pretty sure they work properly, but here they are for completeness:
def dir_contains_pdf(root):
"""
>>> dir_contains_pdf("./test_docs/folder containing pdfs/")
True
>>> dir_contains_pdf("./test_docs/folder containing nothing/")
False
>>> dir_contains_pdf("./test_docs/folder containing folders, markdown, and pdf/")
True
>>> dir_contains_pdf("./test_docs/folder containing markdown and pdfs/")
True
"""
root = os.path.abspath(root)
for item in os.listdir(root):
item_path = os.path.join(root, item)
if os.path.isfile(item_path):
if item.endswith(".pdf"):
return True
elif os.path.isdir(item_path):
if dir_contains_pdf(item_path):
return True
return False
def read_markdown_file_as_html(markdown_filename):
f = open(markdown_filename, 'r')
markdown_content = f.read()
return markdown.markdown(markdown_content)
As another view of how this recursion should be working, I built this other program in the same style to confirm that it works, and it does work properly, so I'm guessing the issue has to do with how I'm using the Python file API:
class Item:
def __init__(self, n=None):
self.n = n
self.children = []
def add_child(self, c):
self.children.append(c)
def to_o(self):
o = { 'n' : self.n, 'children' : [c.to_o() for c in self.children] }
return o
def bad(count):
item = Item(n=count)
print('count : '+str(count))
if count > 100 or count == 0:
return item
elif (count-1) % 2 == 0:
print(str(count) + ' is odd')
item.add_child(bad(count*3))
elif count % 2 == 0:
print(str(count) + ' is even')
item.add_child(bad(count/2))
return item
import json
print(json.dumps(bad(7).to_o()))

Related

creating a set from multiple lists

I want to count the variables that occur in an AST (e.g., a+b+a should return {a,b}). Is there a one liner that I can write for each of the AST cases? I tried:
def count(e):
if isinstance(e, Add): # a + b
# XXX can I write the following as a one liner?
vs = set()
for v in s.args:
vs.update(count(v))
return vs
elif isinstance(e, Variable):
return e
...

To answer your actual question, you should probably be tree-walking for Name nodes.
This answer includes a variation of ast.NodeVisitor that passes the full path down to the node to the visitor function, since we need to be read some of the context to know whether we're in an Attribute access, or Storeing or loading.
There may be some corner cases I didn't think of yet, naturally, and attribute access (e.a, z.w) is currently fully ignored.
import ast
import collections
code = """
a = b + c + e.a
q = a + b + c
z.w = b + c
print(a)
"""
class NodeVisitorWithPath:
def visit(self, path):
node = path[-1]
method = "visit_" + node.__class__.__name__
visitor = getattr(self, method, self.generic_visit)
return visitor(path)
def generic_visit(self, path):
node = path[-1]
for field, value in ast.iter_fields(node):
if isinstance(value, list):
for item in value:
if isinstance(item, ast.AST):
self.visit(path + [item])
elif isinstance(value, ast.AST):
self.visit(path + [value])
class NameWalker(NodeVisitorWithPath):
def __init__(self):
self.events = collections.Counter()
def visit_Name(self, path):
if len(path) >= 1 and isinstance(
path[-2], ast.Attribute
): # Attribute access, ignore
return
event = (
"Assign"
if len(path) >= 1 and isinstance(path[-2], ast.Assign)
else "Load"
)
node = path[-1]
self.events[f"{event} {node.id}"] += 1
super().generic_visit(path)
tree = ast.parse(code, "x", "exec")
nw = NameWalker()
nw.visit([tree])
print(dict(nw.events))
outputs
{'Assign a': 1, 'Load b': 3, 'Load c': 3, 'Assign q': 1, 'Load a': 2, 'Load print': 1}

Add list as child of tree with python 3

I have looked at many very similar questions and cannot figure it out so:
I have a string like this:
{121{12}12{211}2}
I want to read the string into a tree like this:
I am confused as how to tell python to add a whole list as a child node?
I would also like to know how to change the current node to the parent of the old current node?
Here is my code so far:
class Node:
def __init__(self,val):
self.value = val
self.children = []
#init Node class so we can pass in values as nodes and set children to empty list
def add_child(self, obj):
self.children.append(obj)
s=[]
for i in filedata:
if i == leftbrace:
n = Node(i)
#create new child of current node
s = []
#reset list s to blank
if i == rightbrace:
n.add_child(s)
#add list s to current node
#make parent of current node the new current node
else:
s.append(i)
#add i to list s
for c in n.children:
print (c.data)

To make something like this work, it is easiest if you use recursion. Here is one way that this can be done.
Code:
class Node:
def __init__(self, stream):
val = []
children = []
while True:
try:
# get the next character from the stream
ch = next(stream)
# if this is an open brace, then recurse to a child
if ch == '{':
children.append(Node(stream))
# if this is a close brace, we are done on this level
elif ch == '}':
break
# otherwise add this character to our value
else:
val.append(ch)
# stream is empty, we are done
except StopIteration:
break
self.value = ''.join(val)
self.children = children
#classmethod
def from_string(cls, string):
stream = iter(string)
tree_top = Node(stream)
# assert that the string started with '{' and was one top node
assert len(tree_top.children) == 1 and tree_top.value == ''
return tree_top.children[0]
def __str__(self):
return self.value
def __repr__(self):
return "Node('%s', <%d children>)" % (
self.value, len(self.children))
def tree_string(self, level=0):
yield '-' + " " * level + str(self)
for child in self.children:
for child_string in child.tree_string(level+1):
yield child_string
tree = '{121{12}12{211}2}'
for line in Node.from_string(tree).tree_string():
print(line)
Results:
-121122
- 12
- 211

In Order Traversal AVL Tree: Name not defined

I was just wondering would anyone be able to help me. I am trying to do an inorder transversal of an AVL tree. But I keep getting an error that my function name 'r_in_order' is not defined. What is happening here and what am I missing? Here is the code:
class Node:
""" A node in a BST. It may have left and right subtrees """
def __init__(self, item, left = None, right = None):
self.item = item
self.left = left
self.right = right
class BST:
""" An implementation of a Binary Search Tree """
def __init__(self):
self.root = None
def recurse_add(self, ptr, item):
if ptr == None:
return Node(item)
elif item < ptr.item:
ptr.left = self.recurse_add(ptr.left, item)
elif item > ptr.item:
ptr.right = self.recurse_add(ptr.right, item)
return ptr
def add(self, item):
""" Add this item to its correct position on the tree """
self.root = self.recurse_add(self.root, item)
def r_count(self, ptr):
if ptr == None:
return 0
else:
return 1 + self.r_count(ptr.left) + self.r_count(ptr.right)
def count(self):
return self.r_count(self.root)
def r_height(self, ptr):
if ptr == None:
return 0
else:
return 1 + max(self.r_height(ptr.left), self.r_height(ptr.right))
def height(self):
return self.r_height(self.root)
def r_in_order(self, ptr):
if ptr != None:
r_in_order(ptr.left)
print(ptr.item + " ", end="")
r_in_order(ptr.right)
def in_order(self):
return self.r_in_order(self.root)
I am then testing the code with this:
import sys
from BST import BST
def main():
# Read each test case
line = sys.stdin.readline()
items = line.strip().split()
nums = [int(item) for item in items]
tree = BST()
for num in nums:
tree.add(num)
print("Print the elements of the tree in order:")
tree.in_order()
if __name__ == "__main__":
main()

r_in_order is a method of BST. It can only be called on a BST instance (or on the class with an instance as the first argument), but in the definition of r_in_order itself, you try to use it without one. So technically, it doesn't exist in the namespace you're trying to use it in.
Your function definition should be as follows:
def r_in_order(self, ptr):
if ptr != None:
self.r_in_order(ptr.left)
print(ptr.item + " ", end="")
self.r_in_order(ptr.right)

There is no general function r_in_order: you need to add self. to get a reference to the method you're already inside. There's also a syntax error lurking in the print statement. Try this:
def r_in_order(self, ptr):
if ptr != None:
self.r_in_order(ptr.left)
print(ptr.item, " ", end="")
self.r_in_order(ptr.right)
This runs, and yields the below (first line is input).
1 3 7 5 6 4 2
Print the elements of the tree in order:
1 2 3 4 5 6 7

Why does this character ▯ appear?

So this character ▯ appears when I run my code which I think means there is a missing character therefor it can't be displayed. (Not sure correct me if I am wrong) And well basically I want to be able to get rid of that character. Here is what it looks like when I run my code:
However in the back-end in the idle when I click on one of the boxes for it to be displayed up top it doesn't register and looks like this in idle:
Why does it appear on screen if it isn't going to appear in idle?
Also how can I get rid of the ▯ character from the main screen?
Here is my full code.
Here are segments in which I think the problem lies. (However I have not been able to solve the problem)
My classes for Tree comparison to find the sentences and their frequent use:
class Branch():
def __init__(self, value):
self.left = None
self.right = None
self.value = value
self.frequency = 1
def incFreq(self):
self.frequency = self.frequency + 1
def freq(self):
return self.frequency
class Tree():
highest = []
def __init__(self):
self.root = None
self.found = False
def findHighest(self):
from operator import itemgetter, attrgetter
self.highest = []
self.inorder(self.root)
self.highest = sorted(self.highest, key=itemgetter(1), reverse=True)
return self.highest
#lessThan function needed to compare strings
def lessThan(self, a, b):
if len(a) < len(b):
loopCount = len(a)
else:
loopCount = len(b)
for pos in range(0, loopCount):
if a[pos] > b[pos]:
return False
return True
def outputTree(self):
self.inorder(self.root)
def insert(self, value):
#increment freq if already exists, else insert
if not self.exists(value):
self.root = self.insertAtBranch(self.root, value)
def exists(self, value):
#set the class variable found to False to assume it is not there
self.found = False
self.findAtBranch(self.root, value)
return self.found
#Used to fine a value in a tree
def findAtBranch(self, branch, value):
if branch == None:
pass
else:
#print ("[" + branch.value + "][" + value + "]") # Error checking
if branch.value == value:
self.found = True
#print("found " + value)
branch.incFreq()
#print(branch.freq())
else:
self.findAtBranch(branch.left, value)
self.findAtBranch(branch.right, value)
def insertAtBranch(self, branch, value):
if branch == None:
return Branch(value)
else:
if self.lessThan(branch.value, value):
branch.right = self.insertAtBranch(branch.right, value)
else:
branch.left = self.insertAtBranch(branch.left, value)
return branch
def inorder(self, branch):
if branch == None: return
self.highest.append((branch.value, branch.freq()))
#print (branch.value)
#print (branch.freq())
#print(self.highest[0])
self.inorder(branch.left)
self.inorder(branch.right)
This is where I use the tree and pass sentences to be used on a different function:
def getPhrases(self, numToReturn):
topPhrases = []
phrasesTree = Tree()
#load tree with phrases from phrase text file
file = open('setPhrases.txt', 'r')
for line in file:
phrasesTree.insert(line)
#create a list of the top n of phrases to return
val = 0
for phrase in phrasesTree.findHighest():
if val < numToReturn:
topPhrases.append(phrase)
val = val + 1
return topPhrases
This is where I use the sentences to be able to display them on the screen:
def createPhrases(self):
print("createPhrases")
self.deletePanes()
self.show_keyboard = False
self.show_words = False
self.show_phrases = True
self.show_terminal = True
words = self.getPhrases(10)
for word, count in words:
self.addPane("{}".format(word, count), WORDS)
self.addPane("Boxes", PHRASE)
self.addPane("Keyboard", PHRASE)
self.addPane("OK", PHRASE)
self.drawPanes()

When you read lines from file, newline characters are at the end. pygame's documentation states that:
The text can only be a single line: newline characters are not rendered.
So, you should change this fragment:
file = open('setPhrases.txt', 'r')
for line in file:
phrasesTree.insert(line)
to this:
file = open('setPhrases.txt', 'r')
for line in file:
phrasesTree.insert(line.strip())

Python generate sorted list

I want to compress my movies automatically. So I've written a mediainfo wrapper class in python, to generate a xml output, which I then parse to a movieinfo class, with a list of audio and subtitle tracks.
__author__ = 'dominik'
class Error(Exception):
""" Error class
"""
class ValidationError(Error):
""" Invalid or missing xml items
"""
class MovieInfo(object):
""" Description of movie file
"""
def __init__(self, media_info):
self._video_track = None
self._audio_tracks = []
self._subtitle_tracks = []
self.valid_movie = True
for track in media_info.tracks:
if track.track_type == "Audio":
self._audio_tracks.append(AudioTrack(track))
elif track.track_type == "Text":
self._subtitle_tracks.append(SubtitleTrack(track))
elif track.track_type == "Video":
self._video_track = VideoTrack(track)
#property
def audio_tracks(self):
if not hasattr(self, "_audio_tracks"):
self._audio_tracks = []
if len(self._audio_tracks) != 0:
return self._audio_tracks
#property
def subtitle_tracks(self):
if not hasattr(self, "_subtitle_tracks"):
self._subtitle_tracks = []
if len(self._subtitle_tracks) != 0:
return self._subtitle_tracks
class Track(object):
""" Abstract track class for audio and subtitle tracks
"""
__KNOWN_LANGUAGE_CODES = {"en": "ENG", "de": "DE"}
def __init__(self, track, valid_codecs):
self._valid = True
track_id = int(track.id)
codec_id = self._determine_codec(track.codec_id, valid_codecs)
language = self._determine_language(track.language)
self._id = track_id
self._codec_id = codec_id
self._language = language
def _determine_codec(self, track_codec, types):
result = types.get(track_codec, None)
if result is None:
self._valid = False
return result
def _determine_language(self, track_language, types=__KNOWN_LANGUAGE_CODES):
result = types.get(track_language, None)
if result is None:
self._valid = False
return result
class AudioTrack(Track):
""" Audio track class
"""
__KNOWN_AUDIO_CODECS = {"A_DTS": "DTS", "A_AC3": "AC3"}
def __init__(self, track):
self._type = 1
Track.__init__(self, track, self.__KNOWN_AUDIO_CODECS)
class SubtitleTrack(Track):
""" Subtitle track class
"""
__KNOWN_SUBTITLE_CODECS = {"S_VOBSUB": "VOBSUB"}
def __init__(self, track):
self._type = 2
if track.forced == "Yes":
self._forced = True
else:
self._forced = False
Track.__init__(self, track, self.__KNOWN_SUBTITLE_CODECS)
class VideoTrack(object):
""" Video track class (only one video track in movie info!)
"""
def __init__(self, track):
self._type = 0
self._framerate = float(track.frame_rate)
self._width = track.width
self._height = track.height
Here is the mediainfo class (it's the pymediainfo class):
from subprocess import Popen
import os
from tempfile import mkstemp
from bs4 import BeautifulSoup, NavigableString
from setuptools.compat import unicode
class Track(object):
""" Hold the track information
"""
def __getattr__(self, item):
try:
return object.__getattribute__(self, item)
except:
pass
return None
def __init__(self, xml_track):
self.xml_track = xml_track
self.track_type = xml_track.attrs["type"]
for child in self.xml_track.children:
if not isinstance(child, NavigableString):
node_name = child.name.lower().strip()
node_value = unicode(child.string)
node_other_name = "other_%s" % node_name
if getattr(self, node_name) is None:
setattr(self, node_name, node_value)
else:
if getattr(self, node_other_name) is None:
setattr(self, node_other_name, [node_value, ])
else:
getattr(self, node_other_name).append(node_value)
for key in [c for c in self.__dict__.keys() if c.startswith("other_")]:
try:
primary = key.replace("other_", "")
setattr(self, primary, int(getattr(self, primary)))
except:
for value in getattr(self, key):
try:
actual = getattr(self, primary)
setattr(self, primary, int(value))
getattr(self, key).append(actual)
break
except:
pass
def __repr__(self):
return("<Track id='{0}', type='{1}'>".format(self.id, self.track_type))
def to_data(self):
data = {}
for k, v in self.__dict__.items():
if k != 'xml_track':
data[k] = v
return data
class Mediainfo(object):
""" MediaInfo wrapper
"""
def __init__(self, xml):
self.xml_dom = xml
if isinstance(xml, str):
self.xml_dom = BeautifulSoup(xml, "xml")
def _populate_tracks(self):
if self.xml_dom is None:
return
for xml_track in self.xml_dom.Mediainfo.File.find_all("track"):
self._tracks.append(Track(xml_track))
#property
def tracks(self):
if not hasattr(self, "_tracks"):
self._tracks = []
if len(self._tracks) == 0:
self._populate_tracks()
return self._tracks
#staticmethod
def parse(filename):
filehandler_out, filename_out = mkstemp(".xml", "mediainfo-")
filehandler_err, filename_err = mkstemp(".error", "mediainfo-")
filepointer_out = os.fdopen(filehandler_out, "r+b")
filepointer_err = os.fdopen(filehandler_err, "r+b")
mediainfo_command = ["mediainfo", "-f", "--Output=XML", filename]
p = Popen(mediainfo_command, stdout=filepointer_out, stderr=filepointer_err)
p.wait()
filepointer_out.seek(0)
xml_dom = BeautifulSoup(filepointer_out.read(), "xml")
filepointer_out.close()
filepointer_err.close()
print(xml_dom)
return Mediainfo(xml_dom)
def to_data(self):
data = {'tracks': []}
for track in self.tracks:
data['tracks'].append(track.to_data())
return data
This class gives me every track in the xml and then I parse the relevant info in movieinfo.
Ok now I have a list of audiotracks e.g. 3 tracks one in german language and DTS, one in german and AC3 and one in english and AC3. Now I want to get the ids from the tracks in the format "1,2,3" to give it to handbrake cli.
My problem is the order of the tracks. If there is a german DTS track this schould be the first track, the second track should be also the first, but compressed to aac and the third track should be one english track in AAC. If there is only a german AC3 track then the first track should be this track but compressed to AAC, and the second track should englisch and AAC.
I don't know exactly how I can achive that, can you help me? I'm new to python, and come from C, C++ and C#. In C# this is very easy to get with lambda.

Assuming you know to define a compare-tor that given two items can define which is bigger then Python functions as well as C or C++.
Start here -
1. https://wiki.python.org/moin/HowTo/Sorting/
https://developers.google.com/edu/python/sorting
http://docs.python.org/2/library/functions.html#sorted
Using sorted method and define the key you want.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

maximum recursion depth exceeded with recursive file structure crawl - python

Related

creating a set from multiple lists

Add list as child of tree with python 3

In Order Traversal AVL Tree: Name not defined

Why does this character ▯ appear?

Python generate sorted list

Categories

Resources