Python find next file by natural sort - python

I'm trying to find the next file by natural sorting with a depth variable but am facing some problems.
the folder structure is following:
tests/
------test1/
-----------test2/
----------------...
----------------30.jpg
----------------31.jpg
-----------test3/
----------------...
----------------30.jpg
----------------31.jpg
-----------1.jpg
------1.jpg
I want to reach the next or the item before my current item, iterating over them with the forward and backward function.
Getting items on the same level is working currently, also to get one on the max depth level.
For example I want to get with the backwards function on
path=tests/test1/test2/1.jpg
the result
tests/test1/1.jpg
but with
path=tests/test1/test3/1.jpg
the result
tests/test1/test2/31.jpg
obviously reversed results same with the forward functions.
My current problem is finding the next file on the next level without repeating myself and building a loop, iterating through the folders worked completely fine so far, but I'm currently completely stuck on this one.
My current code so far:
import os
import re
import wx
class PathSelect(wx.App):
"""
path select application
"""
def __init__(self):
"""
initializing function
:return:
"""
super(PathSelect, self).__init__()
#staticmethod
def ask_path():
"""
ask for our starting path
:return:
"""
wildcard = ("Image Files (*.*)|*.jpeg;*.jpg;*.png;*.bmp|"
"Joint Photographic Experts Group (*.jpeg;*.jpg)|*.jpeg;*.jpg|"
"Portable Network Graphics (*.png)|*.png|"
"Bitmap (*.bmp)|*.bmp|"
"All files (*.*)|*.*")
dialog = wx.FileDialog(None, "Choose a file", os.getcwd(), "", wildcard, wx.FD_OPEN | wx.FD_FILE_MUST_EXIST)
if dialog.ShowModal() == wx.ID_OK:
return dialog.GetPath()
dialog.Destroy()
class PathingAlgorithm(object):
"""
our pathing algorithm
"""
def __init__(self, depth=1):
"""
initializing function
:return:
"""
self.depth = depth
self.image_path = ""
#staticmethod
def natural_sort(current_list):
convert = lambda text: int(text) if text.isdigit() else text.lower()
alphanum_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key)]
return sorted(current_list, key=alphanum_key)
def current(self):
"""
return the current path or ask for the path
:return:
"""
if not self.image_path:
self.image_path = PathSelect.ask_path()
if self.image_path:
return self.image_path
def backward(self, path="", depth=0, ghost=False):
"""
return path for the previous picture
:param path:
:param depth:
:param ghost:
:return:
"""
# max recursion case, break our function here
if self.depth < depth:
return None
depth += 1
if path == "":
path = self.image_path
folder = os.path.dirname(path)
file_name = os.path.basename(path)
folder_content = self.natural_sort(os.listdir(folder))
file_index = folder_content.index(file_name)
if file_index == 0:
path = self.backward(folder, depth, ghost)
# handle max depth case
if path is None:
return None
# get in the same level of the foldertree again if possible
for x in xrange(depth):
path_list = os.listdir(path)
if path_list:
path = os.path.join(path, self.natural_sort(path_list)[len(path_list) - 1])
else:
path = os.path.join(folder, folder_content[folder_content.index(file_name) - 1])
if not ghost:
self.image_path = path
return path
def forward(self, path="", depth=0, ghost=False):
"""
return path for the next picture
:param path:
:param depth:
:return:
"""
depth += 1
# max recursion case, break our function here
if self.depth < depth:
return None
# on start use current path, on recursion skip this
if path == "":
path = self.image_path
folder = os.path.dirname(path)
file_name = os.path.basename(path)
if os.path.isfile(os.path.join(folder, file_name)):
folders = os.listdir(folder)
else:
folders = [name for name in os.listdir(folder) if os.path.isdir(os.path.join(folder, name))]
folder_content = self.natural_sort(folders)
file_index = folder_content.index(file_name)
if file_index == len(folder_content) - 1:
if self.depth - 1 < depth:
files = [name for name in os.listdir(folder) if os.path.isfile(os.path.join(folder, name))]
if files:
return os.path.join(folder, files[0])
path = self.forward(folder, depth, ghost)
# handle max depth case
if path is None:
return None
# get in the same level of the foldertree again if possible
for x in xrange(depth):
if not os.path.isfile(path):
file_list = os.listdir(path)
if file_list:
path = os.path.join(path, self.natural_sort(file_list)[0])
else:
path = os.path.join(folder, folder_content[folder_content.index(file_name) + 1])
if not ghost:
self.image_path = path
return path
if __name__ == "__main__":
app = wx.App()
app.MainLoop()
ps = PathingAlgorithm(depth=3)
# print ps.current()
# print ps.backward(ghost=True)
# print ps.forward(ghost=True)
print ps.forward(
path='../tests/test1/test2/31.jpg',
ghost=True,
)
thanks for any help in advance

I was too focused on the recursive function, solving it over a sorted file tree was the solution, currently not the best performance is the depth is too big and it wants to get all files, but good enough for my case
def get_file_tree(self, path):
"""
return a natural sorted file tree and the index of your original file
:param path:
:return:
"""
if not os.path.exists(path):
return None
filename = os.path.basename(path)
basepath = os.path.abspath(os.path.dirname(path))
for _ in xrange(self.depth):
path = os.path.abspath(os.path.join(basepath, os.pardir))
# list all files
configfiles = [os.path.join(dirpath, f)
for dirpath, dirnames, files in os.walk(path)
for f in fnmatch.filter(files, '*')]
# use natural sort for the tree
configfiles = self.natural_sort(configfiles)
original_path = os.path.join(basepath, filename)
original_index = configfiles.index(original_path)
return configfiles, original_index
def backward(self, path="", ghost=False):
"""
get the next file of our current or defined path
:param path:
:param ghost:
:return:
"""
if path == "":
path = self.image_path
path = os.path.abspath(path)
configfiles, original_index = self.get_file_tree(path)
# if file was non existant or the index was 0 return None
if original_index is None or original_index == 0:
return None
new_path = configfiles[original_index - 1]
if new_path.count("\\") > path.count("\\"):
return None
if not ghost:
self.image_path = new_path
return new_path
def forward(self, path="", ghost=False):
"""
get the next file of our current or defined path
:param path:
:param ghost:
:return:
"""
if path == "":
path = self.image_path
path = os.path.abspath(path)
configfiles, original_index = self.get_file_tree(path)
# if file was non existant or was the last file, return None
if original_index is None or len(configfiles) <= original_index + 1:
return None
new_path = configfiles[original_index + 1]
if not ghost:
self.image_path = new_path
return new_path

Related

How to use returned value from a function

I have a function that finds all paths through a graph. The function returns a list of all paths. How do I use this value later in my code?
def findpaths(attachednodes,startID,endID,path = []):
path = path + [startID]
if startID == endID:
return [path]
if startID not in attachednodes:
return []
paths = []
for n in attachednodes[startID]:
if n not in path:
newpath = findpaths(attachednodes,n,endID,path)
for new in newpath:
paths.append(new)
for i in range(len(paths)):
numflight = i
flight = paths[i]
flights.update({numflight: flight})
return paths
you put the function call to the right side of a variable assignment. The variable will have the return value:
e.g.
def some_function():
return 10
x = some_function()
print(x) # will print 10

How to make QTreeView CheckBox Pre-Selected

I have a TreeView with QtGui.QFileSystemModel to render file system folders.
My TreeView has a checkbox and checks & uncheck working fine. I want to pre-select all checkboxes on UI Load.
please suggest how to make default selected checkbox for TreeView.
My Model code is:-
class CheckableDirModel(QtGui.QFileSystemModel):
def __init__(self, resultFormats, parent=None):
QtGui.QFileSystemModel.__init__(self, None)
filters = QStringList()
#import ipdb;ipdb.set_trace()
filters.append('*.odb')
resultFormats = resultFormats.split(',')
for resultFormat in resultFormats:
filters.append('*.' + resultFormat)
self.setNameFilters(filters)
#set to False to hide files that match the filter
self.setNameFilterDisables(True)
self.checks = {}
self.selectedFiles = set()
#self.b1 = QCheckBox("SPM_CAE_DATA")
#self.b1.setChecked(True)
#Show the particular formate ('.pages') file into result set for the checked folder
def addFile(self,path1):
# import ipdb;ipd b.set_trace()
files = glob.iglob(os.path.join(str(path1), "*.pages"))#size= os.path.getsize(fp)
for file in files:
print(file)
if os.path.isfile(file):
#print(file)
Formate.append(file)
shutil.copy2(file, reportpath + '/../Particular_Formate')
#print(Formate)
#treeView1 = TreeView(reportpath)
#Show the file greater than 250 MB into list
def addFilegt250mb(self,path1):
# import ipdb;ipd b.set_trace()
global listview1
for root, dirs, files in os.walk(str(path1), topdown=False):
for name in files:
f = os.path.join(root, name)
if os.path.isfile(f):#if os.path.isfile(f)
if os.path.getsize(f) >= 0:
greaterSize.append(f)
print(name)
shutil.copy2(f, reportpath+ '/../Particular_Formate_size')
#print(greaterSize)
#remove the File, When uncheck the Folder
def deleteFile(self,path2):
for root, dirs, files in os.walk(str(path2), topdown=False):
for name in files:
f = os.path.join(root, name)
if name == ".DS_Store" :
pass
else:
if os.path.getsize(f) >=0 :
greaterSize.remove(f)
for x in Formate:
if x == f :
Formate.remove(x)
#Formate.remove(f)
self.deletSize(name)
self.deletFormate(name)
#print(greaterSize)
#print(Formate)
#remove the file size greater then 250 MB , When Folder is uncheck
def deletSize(self,name) :
folder = reportpath +'/../Particular_Formate_size'
for the_file in os.listdir(folder):
file_path = os.path.join(folder, the_file)
if the_file == name :
try:
if os.path.isfile(file_path):
os.unlink(file_path)
#elif os.path.isdir(file_path): shutil.rmtree(file_path)
except Exception as e:
print(e)
#print(greaterSize)
#Renove the particular formate file
def deletFormate(self,name) :
folder = reportpath + '/../Particular_Formate'
for the_file in os.listdir(folder):
file_path = os.path.join(folder, the_file)
if the_file == name :
try:
if os.path.isfile(file_path):
os.unlink(file_path)
#import ipdb;ipdb.set_trace()
#elif os.path.isdir(file_path): shutil.rmtree(file_path)
except Exception as e:
print(e)
def data(self, index, role=QtCore.Qt.DisplayRole):
if role != QtCore.Qt.CheckStateRole:
return QtGui.QFileSystemModel.data(self, index, role)
else:
if index.column() == 0:
return self.checkState(index)
def flags(self, index):
if (index.column() == 0):
return QtGui.QFileSystemModel.flags(self, index) | QtCore.Qt.ItemIsUserCheckable
else:
return QtGui.QFileSystemModel.flags(self, index)
def checkState(self, index):
if index in self.checks:
return self.checks[index]
else:
return QtCore.Qt.Unchecked
def setData(self, index, value, role):
if (role == QtCore.Qt.CheckStateRole and index.column() == 0):
# print(QtGui.QFileSystemModel.filePath(self, index))
self.checks[index] = value
if QtGui.QFileSystemModel.filePath(self, index) in self.selectedFiles:
self.selectedFiles.remove(QtGui.QFileSystemModel.filePath(self, index))
path2=QtGui.QFileSystemModel.filePath(self, index)
self.deleteFile(path2)
else:
path1 =QtGui.QFileSystemModel.filePath(self, index)
self.addFile(path1)
self.addFilegt250mb(path1)
self.selectedFiles.add(QtGui.QFileSystemModel.filePath(self, index))
self.emit(QtCore.SIGNAL("dataChanged(QModelIndex,QModelIndex)"), index, index)
return True
return QtGui.QFileSystemModel.setData(self, index, value, role)
Source Code
i tried to use different combination for Checkbox pre-selection but couldn't successed.
There is a method setCheckState that takes a bool
self.b1 = QCheckBox("SPM_CAE_DATA")
self.b1.setCheckState(True)

Testdome "Path" Python - Can't determine why my solution isn't 100% correct

So I'm using testdome's public questions to practice python and one of them is this path problem. I can only get 50% on the solution and I can't figure out why. I can't even create a test of my own that fails.
class Path:
def __init__(self, path):
self.current_path = path
def cd(self, new_path):
new_path_list = new_path.split('/')
for item in new_path_list:
if item == '':
self.current_path = '/'
elif item == '..':
self.current_path = self.current_path[:-2]
else:
self.current_path = self.current_path + '/' + item
if '//' in self.current_path:
self.current_path = self.current_path.replace('//','/')
Edit: Updated code based on first response. Still 50% though.
Thanks everyone for the help.
At a guess, where you have
for item in new_path_list:
if new_path_list[0] == '':
what you meant was
for item in new_path_list:
if item == '':
Edit: I thought I'd try it myself; here's how I did it (scores 100%):
# https://www.testdome.com/questions/python/path/8735
ROOT = "/"
DIV = "/"
PREV = ".."
class Path:
def __init__(self, path):
self.dirs = []
self.cd(path)
#property
def current_path(self):
return str(self)
def cd(self, path):
if path.startswith(ROOT):
# absolute path - start from the beginning
self.dirs = []
path = path[len(ROOT):]
# follow relative path
for dir in path.split(DIV):
if dir == PREV:
self.dirs.pop()
else:
self.dirs.append(dir)
def __str__(self):
return ROOT + DIV.join(self.dirs)
path = Path('/a/b/c/d')
path.cd('../x')
print(path.current_path)
root='/'
div='/'
parent='..'
class Path:
def __init__(self, path):
self.current_path = path
self.current = path.split('/')
def cd(self, new_path):
if new_path[0]=='/':
self.current_path= "/"
self.current= ['']
new_path_list = new_path.split('/')
for item in new_path_list:
if item != '' :
if item == parent:
self.current.pop()
else:
self.current.append(item)
self.current_path = div.join(self.current)
path = Path('/a/b/')
path.cd('../x')
path.cd('/a/b')
print(path.current_path)
I just started learning Python recently, thanks for ppl who replied above which are really helpful for newbie!
I created my own 100% resolution, share it here to be another reference for any newbie like me :
class Path:
def __init__(self, path):
self.current_path = path
def cd(self, new_path):
PREV = '..'
DIV = '/'
#c_list = ['', 'a', 'b', 'c', 'd']
c_list = self.current_path.split(DIV)
#n_list = ['..', 'x']
n_list = new_path.split(DIV)
for item in n_list:
if item == PREV:
#delete the last item in list
del c_list[-1]
else:
c_list.append(item)
#add "/" before each item in the list and printout as string
self.current_path = "/".join(c_list)
return self.current_path
path = Path('/a/b/c/d')
path.cd('../x')
print(path.current_path)
Same, same, but different...
class Path:
def __init__(self, path):
self.current_path = path
def cd(self, new_path):
if new_path[0] == '/':
self.current_path = new_path
else:
for part in new_path.split('/'):
if part == '..':
self.current_path = '/'.join((self.current_path.split('/')[:-1]))
else:
self.current_path = self.current_path + '/' + part

Adding to list a class instance

I'm implementing a code to find the shortest path between two nodes, but
why when I change the first line of the DFS function the output change too .
Isn't it true that
path += [start] is equivalent to path = path + [start]?
the output before changing is ::
Current DFS path: 0
Current DFS path: 0->1
Current DFS path: 0->1->2
Current DFS path: 0->1->2->3
Current DFS path: 0->1->2->3->4
Current DFS path: 0->1->2->3->5
Current DFS path: 0->1->2->4
Current DFS path: 0->2
Current DFS path: 0->2->3
Current DFS path: 0->2->3->1
Current DFS path: 0->2->3->4
Current DFS path: 0->2->3->5
Current DFS path: 0->2->4
shortest path is 0->2->3->5
after changing is ::
Current DFS path: 0
Current DFS path: 0->1
Current DFS path: 0->1->2
Current DFS path: 0->1->2->3
Current DFS path: 0->1->2->3->4
Current DFS path: 0->1->2->3->4->5
shortest path is 0->1->2->3->4->5
The code ::
class Node(object):
def __init__(self, name):
"""Assumes name is a string"""
self.name = name
def getName(self):
return self.name
def __str__(self):
return self.name
class Edge(object):
def __init__(self, src, dest):
"""Assumes src and dest are nodes"""
self.src = src
self.dest = dest
def getSource(self):
return self.src
def getDestination(self):
return self.dest
def __str__(self):
return self.src.getName() + '->' + self.dest.getName()
class WeightedEdge(Edge):
def __init__(self, src, dest, weight = 1.0):
"""Assumes src and dest are nodes, weight a number"""
self.src = src
self.dest = dest
self.weight = weight
def getWeight(self):
return self.weight
def __str__(self):
return self.src.getName() + '->(' + str(self.weight) + ')'\
+ self.dest.getName()
#Figure 12.8
class Digraph(object):
#nodes is a list of the nodes in the graph
#edges is a dict mapping each node to a list of its children
def __init__(self):
self.nodes = []
self.edges = {}
def addNode(self, node):
if node in self.nodes:
raise ValueError('Duplicate node')
else:
self.nodes.append(node)
self.edges[node] = []
def addEdge(self, edge):
src = edge.getSource()
dest = edge.getDestination()
if not (src in self.nodes and dest in self.nodes):
raise ValueError('Node not in graph')
self.edges[src].append(dest)
def childrenOf(self, node):
return self.edges[node]
def hasNode(self, node):
return node in self.nodes
def __str__(self):
result = ''
for src in self.nodes:
for dest in self.edges[src]:
result = result + src.getName() + '->'\
+ dest.getName() + '\n'
return result[:-1] #omit final newline
class Graph(Digraph):
def addEdge(self, edge):
Digraph.addEdge(self, edge)
rev = Edge(edge.getDestination(), edge.getSource())
Digraph.addEdge(self, rev)
#Figure 12.9
def printPath(path):
"""Assumes path is a list of nodes"""
result = ''
for i in range(len(path)):
result = result + str(path[i])
if i != len(path) - 1:
result = result + '->'
return result
def DFS(graph, start, end, path, shortest, toPrint = False):
"""Assumes graph is a Digraph; start and end are nodes;
path and shortest are lists of nodes
Returns a shortest path from start to end in graph"""
path = path + [start]
if toPrint:
print('Current DFS path:', printPath(path))
if start == end:
return path
for node in graph.childrenOf(start):
if node not in path: #avoid cycles
if shortest == None or len(path) < len(shortest):
newPath = DFS(graph, node, end, path, shortest,
toPrint)
if newPath != None:
shortest = newPath
return shortest
def shortestPath(graph, start, end, toPrint = False):
"""Assumes graph is a Digraph; start and end are nodes
Returns a shortest path from start to end in graph"""
return DFS(graph, start, end, [], None, toPrint)
#Figure 12.10
def testSP():
nodes = []
for name in range(6): #Create 6 nodes
nodes.append(Node(str(name)))
g = Digraph()
for n in nodes:
g.addNode(n)
g.addEdge(Edge(nodes[0],nodes[1]))
g.addEdge(Edge(nodes[1],nodes[2]))
g.addEdge(Edge(nodes[2],nodes[3]))
g.addEdge(Edge(nodes[2],nodes[4]))
g.addEdge(Edge(nodes[3],nodes[4]))
g.addEdge(Edge(nodes[3],nodes[5]))
g.addEdge(Edge(nodes[0],nodes[2]))
g.addEdge(Edge(nodes[1],nodes[0]))
g.addEdge(Edge(nodes[3],nodes[1]))
g.addEdge(Edge(nodes[4],nodes[0]))
sp = shortestPath(g, nodes[0], nodes[5])
print('Shortest path found by DFS:', printPath(sp))
Note :: this code is from this book enter link description here
They are not the same
path += [start] is equivalent to path.extend([start]) -- it mutates path.
On the other hand
path = path + [start] creates a new list and names it start.
Consider the following experiment, and note the IDs:
>>> a = [1]
>>> id(a)
55937672
>>> a += [2,3]
>>> id(a)
55937672
>>> b = [1]
>>> id(b)
55930440
>>> b = b + [1,2]
>>> id(b)
55937288
The ID of b changed but the ID of a didn't.
As to why it makes a difference in your code -- DFS is a function. In the version which uses path += [start], you are modifying the passed parameter path -- and this modification persists after the call returns. On the other hand, in the version which uses path = path + [start], you are creating a new local variable named path, one which goes out of scope when the call returns, without any changes to the parameter path.
In line
path=path+[start]
you create new list object.
In line
path+=[start]
you modify list object that already exists.
You can try this:
path2=path[:]
path2+=[start]

maximum recursion depth exceeded with recursive file structure crawl

Here's my code, the intention of which is to crawl a given folder and look for .md and .pdf files, and build a tree-like structure which describes it.
I'm probably really overthinking it, so I could really use a second set of eyes on this.
class Resource_Item:
def __init__(self, name=None, stub=None, path=None, parent=None, html_file_location=None, documents=[], children=[]):
self.name = name
self.stub = stub
self.path = path
self.parent = parent
self.html_file_location = html_file_location
self.documents = documents
self.children = children
def add_child(self, c):
self.children.append(c)
def to_json(self):
o = {
'name' : self.name,
'stub' : self.stub,
'path' : self.path,
'parent' : self.parent,
'html_file_location' : self.html_file_location,
'documents' : self.documents,
'children' : [c.to_json() for c in self.children] } #len(self.children)
return json.dumps(o)
def walk_dir(root, parent = None):
"""
>>> walk_dir("./test_docs/folder containing pdfs/").documents
['dummy_pdf 2.pdf', 'dummy_pdf 3.pdf', 'dummy_pdf 4.pdf', 'dummy_pdf.pdf']
>>> len(walk_dir("./test_docs/folder containing pdfs/").children)
0
>>> walk_dir("./test_docs/folder containing markdown and pdfs/").stub is None
False
>>> walk_dir("./test_docs/folder containing markdown and pdfs/").children
['dummy_pdf 2.pdf', 'dummy_pdf 3.pdf', 'dummy_pdf 4.pdf', 'dummy_pdf.pdf']
"""
file_or_folder_name_no_ext = os.path.splitext(os.path.basename(root))[0]
entry = Resource_Item( name=file_or_folder_name_no_ext, parent=parent, path=os.path.abspath(root) )
for item in os.listdir(root):
path = os.path.join(os.path.abspath(root), item)
if os.path.isfile(path):
if item.endswith(".pdf"):
entry.documents.append(item)
elif item.endswith(".md"):
entry.stub = read_markdown_file_as_html(path)
elif os.path.isdir(path):
if dir_contains_pdf(path):
print('found a path to contain PDFs: "'+str(path)+'"')
entry.add_child(walk_dir(path)) # broken!
#entry.add_child(path)
return entry
What appears to be happening is that on the entry.add_child(walk_dir(path)) line, walk_dir doesn't properly create a new instance of Resource_Item, since my testing shows that Resource_Item.children gets populated with all the pdfs in that file tree, not just those in the immediate folder.
As for my supporting functions, I'm pretty sure they work properly, but here they are for completeness:
def dir_contains_pdf(root):
"""
>>> dir_contains_pdf("./test_docs/folder containing pdfs/")
True
>>> dir_contains_pdf("./test_docs/folder containing nothing/")
False
>>> dir_contains_pdf("./test_docs/folder containing folders, markdown, and pdf/")
True
>>> dir_contains_pdf("./test_docs/folder containing markdown and pdfs/")
True
"""
root = os.path.abspath(root)
for item in os.listdir(root):
item_path = os.path.join(root, item)
if os.path.isfile(item_path):
if item.endswith(".pdf"):
return True
elif os.path.isdir(item_path):
if dir_contains_pdf(item_path):
return True
return False
def read_markdown_file_as_html(markdown_filename):
f = open(markdown_filename, 'r')
markdown_content = f.read()
return markdown.markdown(markdown_content)
As another view of how this recursion should be working, I built this other program in the same style to confirm that it works, and it does work properly, so I'm guessing the issue has to do with how I'm using the Python file API:
class Item:
def __init__(self, n=None):
self.n = n
self.children = []
def add_child(self, c):
self.children.append(c)
def to_o(self):
o = { 'n' : self.n, 'children' : [c.to_o() for c in self.children] }
return o
def bad(count):
item = Item(n=count)
print('count : '+str(count))
if count > 100 or count == 0:
return item
elif (count-1) % 2 == 0:
print(str(count) + ' is odd')
item.add_child(bad(count*3))
elif count % 2 == 0:
print(str(count) + ' is even')
item.add_child(bad(count/2))
return item
import json
print(json.dumps(bad(7).to_o()))

Categories