My goal is to store all files and directories in a structured data tree, where each:
directory is a node
file is a leaf
My code below works fine. However, I only take one step at a time and interrupt/restart the walking process for every directory. (see step_in() method)
Apparently it is possible and considered "advanced" to break into the process of an iteration itself and work with it. Therefore my question is, is it possible to "break into" the os.walk process and yield what's necessary?
import os
import sys
import inspect
DEBUG = True
def report(*args,**kwargs):
global DEBUG
if DEBUG: print(*args,**kwargs)
class directory:
def __init__(self, path):
self.path = path
#property
def name(self):
return os.path.basename(self.path)
def __repr__(self):
ID = hex(id(self))
return "<directory \"{:}\" at {}>".format(self.name,ID)
def step_in(self):
"""Step into the dir and find all files/dirs.
Step into the directory path and search for:
- directories --> add string name to children (SEMI CHILD)
- and files --> add string name to leafs
"""
for p,d,f in os.walk(self.path):
self.children = d
report("--->kids found : {}".format(d))
self.leafs = f
report("--->leafs found: {}".format(f))
return p
class walker:
def __init__(self, root_path):
self.root = directory(root_path)
def walk(self, target=None):
"""Walk through all dirs and create tree.
Recursive process with root directory as initial directory.
"""
if not(target):
target = self.root
path = target.step_in()
for i in range(len(target.children)):
#get the next path
next_path = os.path.join(path,target.children[i])
report("\nnext is: {}".format(next_path))
#save dir by replacing the string child with an actual child
target.children[i] = directory(next_path)
#walk into that child
self.walk(target.children[i])
if __name__ == "__main__":
w = walker('/Users/xxx/test/xxx')
w.walk()
Related
In this post, my goal is to concatenate two QFileSystemModels to one and display them together. (Lots of updates has been made)
Context :
In my C drive , I created the folder MyFolder (https://drive.google.com/drive/folders/1M-b2o9CiohXOgvjoZrAnl0iRVQBD1sXY?usp=sharing) , in which there are some folders and some files, for the sake of producing the minimal reproducible example . Their structure is :
The following Python code using PyQt5 library (modified from How to display parent directory in tree view?) runs after importing necessary libraries:
#The purpose of the proxy model is to display the directory.
#This proxy model is copied here from the reference without modification.
class ProxyModel(QSortFilterProxyModel):
def __init__(self, parent=None):
super().__init__(parent)
self._root_path = ""
def filterAcceptsRow(self, source_row, source_parent):
source_model = self.sourceModel()
if self._root_path and isinstance(source_model, QFileSystemModel):
root_index = source_model.index(self._root_path).parent()
if root_index == source_parent:
index = source_model.index(source_row, 0, source_parent)
return index.data(QFileSystemModel.FilePathRole) == self._root_path
return True
#property
def root_path(self):
return self._root_path
#root_path.setter
def root_path(self, p):
self._root_path = p
self.invalidateFilter()
class MainWindow(QMainWindow):
def __init__(self, parent=None):
super().__init__(parent)
self.create_treeview()
self.setCentralWidget(self.treeView_1) #The line I will be talking about.
def create_treeview(self):
self.treeView_1 = QTreeView()
self.dirModel_1 = QFileSystemModel()
self.dirModel_1.setRootPath(QDir.rootPath())
path_1 = 'C:/MyFolder/SubFolder1' # Changing the path is sufficient to change the displayed directory
root_index_1 = self.dirModel_1.index(path_1).parent()
self.proxy_1 = ProxyModel(self.dirModel_1)
self.proxy_1.setSourceModel(self.dirModel_1)
self.proxy_1.root_path = path_1
self.treeView_1.setModel(self.proxy_1)
proxy_root_index_1 = self.proxy_1.mapFromSource(root_index_1)
self.treeView_1.setRootIndex(proxy_root_index_1)
self.treeView_2 = QTreeView()
self.dirModel_2 = QFileSystemModel()
self.dirModel_2.setRootPath(QDir.rootPath())
path_2 = 'C:/MyFolder'
root_index_2 = self.dirModel_2.index(path_2).parent()
self.proxy_2 = ProxyModel(self.dirModel_2)
self.proxy_2.setSourceModel(self.dirModel_2)
self.proxy_2.root_path = path_2
self.treeView_2.setModel(self.proxy_2)
proxy_root_index_2 = self.proxy_2.mapFromSource(root_index_2)
self.treeView_2.setRootIndex(proxy_root_index_2)
if __name__ == "__main__":
import sys
app = QApplication(sys.argv)
w = MainWindow()
w.show()
sys.exit(app.exec_())
The line self.setCentralWidget(self.treeView_1) gives:
Changing self.setCentralWidget(self.treeView_1) to self.setCentralWidget(self.treeView_2) gives:
Objective:
My goal is to concatenate the two trees together. That is, when click run, the user should be able to see:
The order which they show up does not matter. All I care is that MyFolder and SubFolder1 show up as if they are completely independent items (even though in reality one is a subfolder of the other). I should remark that everything is static. That is, we are not trying to detect any changes on folders or files. The only time we ever need to peak at the existing folders and files will be when we click on run.
Update:
After several days of studying and trying, a major progress has been made. I thank musicamante for the hint of using QTreeWidget. The idea is, as said in comments, traverse through models and gradually move everything into one new QTreeWidget. To avoid freeze, my solution is to ask the QFileSystemModel to fetchMore whenever the user wants to see more (i.e. when the user wants to extend QTreeWidget).
The following code runs and almost solves my problem:
import os
from PyQt5.QtCore import*
from PyQt5.QtWidgets import*
from PyQt5 import QtTest
class To_Display_Folder(QSortFilterProxyModel):
def __init__(self, disables=False, parent=None):
super().__init__(parent)
#self.setFilterRegularExpression(r'^(.*\.dcm|[^.]+)$')
self._disables = bool(disables)
self._root_path = ""
def filterAcceptsRow(self, source_row, source_parent):
source_model = self.sourceModel()
#case 1 folder
if self._root_path and isinstance(source_model, QFileSystemModel):
root_index = source_model.index(self._root_path).parent()
if root_index == source_parent:
index = source_model.index(source_row, 0, source_parent)
return index.data(QFileSystemModel.FilePathRole) == self._root_path
return True
'''
#case 2 file
file_index = self.sourceModel().index(source_row, 0, source_parent)
if not self._disables:
return self.matchIndex(file_index)
return file_index.isValid()
'''
#property
def root_path(self):
return self._root_path
#root_path.setter
def root_path(self, p):
self._root_path = p
self.invalidateFilter()
def matchIndex(self, index):
return (self.sourceModel().isDir(index) or
super().filterAcceptsRow(index.row(), index.parent()))
def flags(self, index):
flags = super().flags(index)
if (self._disables and
not self.matchIndex(self.mapToSource(index))):
flags &= ~Qt.ItemIsEnabled
return flags
class Widget_Item_from_Proxy(QTreeWidgetItem):
def __init__(self, index_in_dirModel, parent = None):
super().__init__(parent)
self.setText(0, index_in_dirModel.data(QFileSystemModel.FileNameRole))
self.setText(1, index_in_dirModel.data(QFileSystemModel.FilePathRole))
if os.path.isfile(index_in_dirModel.data(QFileSystemModel.FilePathRole)):
self.setIcon(0,QApplication.style().standardIcon(QStyle.SP_FileIcon))
else:
self.setIcon(0,QApplication.style().standardIcon(QStyle.SP_DirIcon))
class MainWindow(QMainWindow):
def __init__(self, parent=None):
super().__init__(parent)
global treeWidget
treeWidget = QTreeWidget()
self.treeWidget = treeWidget
self.treeWidget.itemExpanded.connect(self.upon_expansion)
self.treeWidget.itemClicked.connect(self.tree_click)
#The following directories will be displayed on the tree.
self.add_path_to_tree_widget('C:/MyFolder')
self.add_path_to_tree_widget('C:/Users/r2d2w/OneDrive/Desktop')
self.add_path_to_tree_widget('C:/')
self.setCentralWidget(self.treeWidget)
def add_path_to_tree_widget(self,path):
dirModel = QFileSystemModel()
dirModel.setRootPath(QDir.rootPath())
dirModel.directoryLoaded.connect(lambda: self.once_loaded(path, dirModel))
def once_loaded(self, path, dirModel):
if dirModel.canFetchMore(dirModel.index(path)):
dirModel.fetchMore(dirModel.index(path))
return
root_index = dirModel.index(path).parent()
proxy = To_Display_Folder(disables = False, parent = dirModel)
proxy.setSourceModel(dirModel)
proxy.root_path = path
proxy_root_index = proxy.mapFromSource(root_index)
origin_in_proxy = proxy.index(0,0,parent = proxy_root_index)
root_item = Widget_Item_from_Proxy(
proxy.mapToSource(origin_in_proxy))
self.treeWidget.addTopLevelItem(root_item)
for row in range(0, proxy.rowCount(origin_in_proxy)):
proxy_index = proxy.index(row,0,parent = origin_in_proxy)
child = Widget_Item_from_Proxy(
proxy.mapToSource(proxy_index),
parent = self.treeWidget.topLevelItem(self.treeWidget.topLevelItemCount()-1))
dirModel.directoryLoaded.disconnect()
#pyqtSlot(QTreeWidgetItem)
def upon_expansion(self, treeitem):
for i in range(0, treeitem.childCount()):
if os.path.isdir(treeitem.child(i).text(1)):
self.add_child_path_to_tree_widget(treeitem.child(i))
def add_child_path_to_tree_widget(self,subfolder_item):
subfolder_path = subfolder_item.text(1)
dirModel = QFileSystemModel()
dirModel.setRootPath(QDir.rootPath())
dirModel.directoryLoaded.connect(lambda: self.child_once_loaded(subfolder_item, subfolder_path,dirModel))
def child_once_loaded(self, subfolder_item, subfolder_path, dirModel):
if dirModel.canFetchMore(dirModel.index(subfolder_path)):
dirModel.fetchMore(dirModel.index(subfolder_path))
return
root_index = dirModel.index(subfolder_path).parent()
proxy = To_Display_Folder(disables = False, parent = dirModel)
proxy.setSourceModel(dirModel)
proxy.root_path = subfolder_path
proxy_root_index = proxy.mapFromSource(root_index)
origin_in_proxy = proxy.index(0,0,parent = proxy_root_index)
root_item = Widget_Item_from_Proxy(
proxy.mapToSource(origin_in_proxy))
folder_item = subfolder_item.parent()
itemIndex = folder_item.indexOfChild(subfolder_item)
folder_item.removeChild(subfolder_item)
folder_item.insertChild(itemIndex, root_item)
for row in range(0, proxy.rowCount(origin_in_proxy)):
proxy_index = proxy.index(row,0,parent = origin_in_proxy)
child = Widget_Item_from_Proxy(
proxy.mapToSource(proxy_index),
parent = root_item)
dirModel.directoryLoaded.disconnect()
#pyqtSlot(QTreeWidgetItem)
def tree_click(self, item):
print(item.text(0))
print(item.text(1))
if __name__ == "__main__":
import sys
app = QApplication(sys.argv)
w = MainWindow()
w.show()
sys.exit(app.exec_())
Since the bounty period is still not over, I will use the time to post two new questions:
Sometimes, especially when the line self.add_path_to_tree_widget('C:/') is present, the code does not give all directories when we click run. This problem is easily fixed by closing the window and clicking on run again. This problem occurs because the QFileSystemModel does not yet have enough time to traverse through the designated folder. If it has just a little bit more time, it will be able to. I wonder if there is a way to fix this programatically.
The function add_path_to_tree_widget is similar to add_child_path_to_tree_widget. The function once_loaded is similar to child_once_loaded. I wonder if there is a way to write these functions more succinctly.
While not impossible, it's quite difficult to create a unique and dynamic model that is able to access different QFileSystemModel structures.
An easier and simpler implementation, which would be more practical for static purposes, is to use a QTreeWidget and create items recursively.
class MultiBrowser(QTreeWidget):
def __init__(self, *pathList):
super().__init__()
self.iconProvider = QFileIconProvider()
self.setHeaderLabels(['Name'])
for path in pathList:
item = self.createFSItem(QFileInfo(path), self.invisibleRootItem())
self.expand(self.indexFromItem(item))
def createFSItem(self, info, parent):
item = QTreeWidgetItem(parent, [info.fileName()])
item.setIcon(0, self.iconProvider.icon(info))
if info.isDir():
infoList = QDir(info.absoluteFilePath()).entryInfoList(
filters=QDir.AllEntries | QDir.NoDotAndDotDot,
sort=QDir.DirsFirst
)
for childInfo in infoList:
self.createFSItem(childInfo, item)
return item
# ...
multiBrowser = MultiBrowser('path1', 'path2')
For obvious reasons, the depth of each path and their contents will freeze the UI from interaction until the whole structure has been crawled.
If you need a more dynamic approach, consider using the QFileSystemModel as a source for path crawling, along with its directoryLoaded signal, which will obviously require a more complex implementation.
I'm trying to write a depth-first search algorithm that will find a path form where the agent (black cube is) to the exit at the bottom of the right-hand path. But the algorithm I have written loops back on itself as part of the path found. How do I implement a DFS algorithm that doesn't do this?
Any ideas on what I am doing wrong?
Any help much appreciated, please.
Thanks
What the world looks like:
The result of the depth first search path planning:
My code for the agent class:
class Agent(turtle.Turtle):
def __init__(self, location, endPoint, world):
turtle.Turtle.__init__(self)
self.shape("square")
self.color("black")
self.penup()
self.speed(0)
# Variables
self._bump = 0
self._location = location
self._endPoint = endPoint
self._world = world
self._map = dict()
def dfs_paths(self, start, goal, path=None):
if path is None:
path = [start]
if start == goal:
yield path
for next in self._map[tuple(start)] - set(path):
yield from dfs_paths(next, goal, path + [next])
def _planPath(self, node, visited=None):
if visited is None:
visited = [node]
self._map[tuple(node)] = self._world.testDirections(node)
if node not in visited:
visited.append(tuple((node)))
print("Visited = " + str(visited))
for neighbour in self._map[tuple((node))]:
print("Neighbour = " + str(neighbour))
if neighbour == self._endPoint:
visited.append(neighbour)
print("Here 1...")
return [node, neighbour]
else:
path = self._planPath(neighbour,visited)
if path:
print("Here 2...")
return [node] + path
You are building the path from the visited information. But that does not represent the path. It just represents the nodes you have visited, which includes unsuccessful paths from which you have already backtracked.
When you find the target node, you should instead create a (partial) path with just the end node in it, and return that to the caller. The caller can detect from the return value of the recursive call that the target was found and can then prepend its own node to that partial path, and return that to its own caller.
That way the backtracking phase (after success) will be used to build the path.
So, to get you started, replace this:
self._planPath(neighbour, visited)
with:
path = self._planPath(neighbour, visited)
if path: # success!
return [node] + path
Or do an append which is more efficient, but then you must reverse the path at the very end.
And replace this:
self._world.showPath(visited)
with:
return [node, neighbor]
The main caller of self._planPath would probably do something like this:
path = self._planPath(startnode, [])
if path:
self._world.showPath(path)
Note that for visited you should really be using a set, not a list.
I am trying to create a simple search engine to look inside a file. In order to reuse the code I separated the search function, but for some reason it just doesn't work the second time round.
The first time round it shows the result as it should but the second time I type a name it doesn't give me any result at all. Its like the c variable is not going in to the searchpart(c, path) function the second time round.
import os
def searchpart(c, path):
employees = os.walk(path)
for root, dirs, files in employees:
names = os.path.basename(root)
if c.lower() in names.lower():
print(root)
os.chdir(root)
for i in os.listdir():
print("-----> {}".format(i))
def welcomepart(path):
# this function allows to reuse the application after a name is search.
c = input("\n-------> please introduce the name? \n")
searchpart(c, path)
def mainfuntion():
path = 'WORKERS'
invalid_input = True
print('______________ Welcome ______________ \n ')
while invalid_input:
welcomepart(path)
mainfuntion()
This work-around seems to fix the problem:
def searchpart(c, path):
cwd = os.getcwd()
employees = os.walk(path)
for root, dirs, files in employees:
names = os.path.basename(root)
if c.lower() in names.lower():
print(root)
os.chdir(root)
for i in os.listdir():
print("-----> {}".format(i))
os.chdir(cwd)
It just remembers which directory you were in before the function call and changes back before returning.
However, I'm sure there will be a solution where the line: os.chdir(root) is not needed.
I am trying to create a walker that goes through directories. Here are the inputs and outputs which I have partly working. I am using a test directory but I would like this to be done on any directory which is leading to some problems.
[IN]: print testdir #name of the directory
[OUT]: ['j','k','l'] #directories under testdir
[IN]: print testdir.j
[OUT]: ['m','n'] # Files under testdir.j
Here is the code so far:
class directory_lister:
"""Lists directories under root"""
def __init__(self,path):
self.path = path
self.ex = []
for item in os.listdir(path):
self.ex.append(item)
def __repr__(self):
return repr(self.ex)
This returns the directories and files but I have to manually assign the names of the directories.
testdir = directory_lister(path/to/testdir)
j = directory_lister(path/to/j)
etc
Is there a way to automate instances such that:
for root,dirs,files in os.walk(/path/to/testdir/):
for x in dirs:
x = directory_lister(root) #I want j = directory_lister(path/to/j), k = directory_lister(path/to/k) and l = directory_lister(path/to/l) here.
Can there be a:
class directory_lister:
def __init__(self,path):
self.path = path
self.j = directory_lister(path + os.sep + j) # how to automate this attribute of the class when assigned to an instance??
The code above is wrong as the object x only becomes an instance but j,k,l have to be defined manually. Do I have to use another class or a dictionary with getattr but I always run into the same problem. If any extra information is required please ask, I hope I made this clear.
UPDATE 2
Is there a way to add other complex functions to the DirLister by Anurag below? So when it gets to a file say testdir/j/p, it prints out the first line of file p.
[IN] print testdir.j.p
[OUT] First Line of p
I have made a class for printing out the first line of the file:
class File:
def __init__(self, path):
"""Read the first line in desired path"""
self.path = path
f = open(path, 'r')
self.first_line = f.readline()
f.close()
def __repr__(self):
"""Display the first line"""
return self.first_line
Just need to know how to incorporate it in the class below. Thank you.
I assume you want sub-dir to be accessible like a attribute, you can achieve that two ways
Go thru list of files and create variables dynamically
Hook into attribute access and correctly return listers as needed
I prefer second approach as it is lazy, better and easier to implement
import os
class DirLister(object):
def __init__(self, root):
self.root = root
self._list = None
def __getattr__(self, name):
try:
var = super(DirLister).__getattr__(self, name)
return var
except AttributeError:
return DirLister(os.path.join(self.root, name))
def __str__(self):
self._load()
return str(self._list)
def _load(self):
"""
load once when needed
"""
if self._list is not None:
return
self._list = os.listdir(self.root) # list root someway
root = DirLister("/")
print root.etc.apache2
output:
['mods-enabled', 'sites-80', 'mods-available', 'ports.conf', 'envvars', 'httpd.conf', 'sites-available', 'conf.d', 'magic', 'apache2.conf', 'sites-enabled']
You can improve this to have better error checking etc
Code explanation: this is basically a recursive listing of directory, so a DirLister objects lists files under the given root and if some variable is accessed with dotted notation it returns a DirLister assuming that that attribute is a folder under the root. So if we try to create DirLister class step by step it will be more clear
1- A simple DirLister which just lists files/folders under it
class DirLister(object):
def __init__(self, root):
self.root = root
self._list = os.listdir(self.root)
2- Our simple lister just list files one level deep, if we want to get filers under subfolders we can hook into __getattr__ which is called with varname when obj.varname is used. So if our dir-lister doesn't have a attribute named varname we assume user is trying to access that directory under given root, so we create another DirLister whose root is root+subdirname
def __getattr__(self, name):
try:
var = super(DirLister).__getattr__(self, name)
return var
except AttributeError:
return DirLister(os.path.join(self.root, name))
Note: first we check base class for that attribute because we don't want to treat all variable access as sub-dir access, if there is no such attribute hence AttributeError then we create a new DirLister for sub-folder.
3- To improve code so that we don't list all folders even if user did not ask for them, we only list when user requires, hence a load method
def _load(self):
if self._list is not None:
return
self._list = os.listdir(self.root) # list root someway
so this method lists dir if not already listed, and this should be called when we finally need it e.g. while printing the list
Edit: as asked by OP here is the alternate method of recursively list whole tree though I would strongly recommend against it
import os
class RecursiveDirLister(object):
def __init__(self, root):
self._sublist = []
for folder in os.listdir(root):
self._sublist.append(folder)
path = os.path.join(root, folder)
if not os.path.isdir(path):
continue
# add it as attribute, assuming that dir-name is valid python varname
try:
sublister = RecursiveDirLister(path)
except OSError:
continue#ignore permission errors etc
setattr(self, folder, sublister)
def __str__(self):
return str(self._sublist)
etc = RecursiveDirLister("/etc")
print etc.fonts
output:
['conf.avail', 'conf.d', 'fonts.conf', 'fonts.dtd']
Not sure what you're asking, but would this work?
for root,dirs,files in os.walk(/path/to/testdir/):
listers = dict((dir, directory_lister(dir)) for dir in dirs)
#now you can use:
listers['j']
listers['k']
listers['l']
I'm trying to write some xml by this piece of code
docs = XmlReportGenerator()
docs.AddMatchRow('FC Barcelona','Madryt','5:0')
docs.Save()
and I wrote my own method:
from lxml import etree
class XmlReportGenerator:
""""""
root = etree.Element('results')
doc = etree.ElementTree(root)
#----------------------------------------------------------------------
def __init__(self):
""""""
def AddMatchRow(self,teamA,teamB, score):
pageElement = etree.SubElement(root,'Flight',teamA, teamB, score)
""""""
def Save(self,path = None):
outFile = open('Matches.xml', 'w')
doc.write(outFile)
NameError: global name 'root' is not defined
Process terminated with an exit code of 1
done
NameError: global name 'doc' is not defined
Process terminated with an exit code of 1
done
Am I missing something? I'm a newbie in python (I have more experience in c#).
Python is explicit. Instance variables must be prepended with self.. Class variables must be prepended with then name of the class.
Here's a fixed version. The original SubElement call was incorrect as well:
from lxml import etree
# derive from 'object' if Python 2.X (it is default in Python 3.X)
class XmlReportGenerator(object):
def __init__(self):
# clearer to init instance variables here.
self.root = etree.Element('results')
self.doc = etree.ElementTree(self.root)
def AddMatchRow(self,teamA,teamB, score):
# Need self.root here
pageElement = etree.SubElement(self.root,'Flight')
# Added data elements (or did you want attributes?)
etree.SubElement(pageElement,'teamA').text = teamA
etree.SubElement(pageElement,'teamB').text = teamB
etree.SubElement(pageElement,'score').text = score
def Save(self,path = None):
outFile = open('Matches.xml', 'w')
# Need self.doc here
self.doc.write(outFile)
# This code will run if the script is executed directly,
# but will be skipped if the script is imported by another script.
if __name__ == '__main__':
docs = XmlReportGenerator()
docs.AddMatchRow('FC Barcelona','Madryt','5:0')
docs.Save()
self is there for a reason. Use self.root, not root