How can I convert a PureWindowsPath to a Iterable? - python

I'm working on an academy project to encrypt some files I have managed to encrypt all files from one folder but when there is a folder into that folder i get errors so i decide to first list all files and sub-directories of the folder:
ROOT = r"C:\Users\Practiques\Desktop\archivos"
for path, subdirs, files in os.walk(ROOT):
for name in files:
pure_path = PurePath(path, name)
print (pure_path)
With this code I get the paths in that form: C:\Users\XXX\Desktop\archivos\external-content.duckduckgo.com.jpg
C:\Users\XXX\Desktop\archivos\hola.txt
and then when i try to pass to the function 'encrypt', i get this error:
TypeError: 'PureWindowsPath' object is not iterable
The format I need to pass to the function is this: ['C:\Users\XXX\Desktop\archivos\external-content.duckduckgo.com.jpg', 'C:\Users\XXX\Desktop\archivos\hola.txt', etc.]
I think one possible solution is to make a list when i obtain all recursive path and their files, but i don't know how to do that.
The function encrypt:
def encrypt(items, key):
f = Fernet(key)
for item in items:
with open(item, 'rb') as file:
file_data = file.read()
encrypted_data = f.encrypt(file_data)
with open(item, 'wb') as file:
file.write(encrypted_data)
How i call it:
for path, subdirs, files in os.walk(ROOT):
for name in files:
pure_path = PurePath(path, name)
print (pure_path)
encrypt(pure_path, key)

You need to use recursion to encrypt the sub-folders' contents:
import os
def recursive_search(path: str) -> "list[str]":
"""get all files from an absolute path
:param path: absolute path of the directory to search
:type path: str
:return: a list of all files
:rtype: list[str]
"""
found_files = []
if not os.path.isdir(path):
raise RuntimeError(f"'{path}' is not a directory")
for item in os.listdir(path):
full_path = os.path.join(path, item)
if os.path.isfile(full_path):
found_files.append(full_path)
elif os.path.isdir(full_path):
found_files.extend(recursive_search(full_path))
return found_files
directory = "YOUR ROOT/TOP-LEVEL DIRECTORY HERE"
print(recursive_search(directory))
Then, you would do:
f = Fernet(key)
for item in recursive_search(directory):
with open(item, 'rb') as file:
file_data = file.read()
encrypted_data = f.encrypt(file_data)
with open(item, 'wb') as file:
file.write(encrypted_data)
Edit 1: in regard to skipping over certain file extensions:
import os
def recursive_search(path: str) -> "list[str]":
"""get all files from an absolute path
:param path: absolute path of the directory to search
:type path: str
:return: a list of all files
:rtype: list[str]
"""
found_files = []
if not os.path.isdir(path):
raise RuntimeError(f"'{path}' is not a directory")
for item in os.listdir(path):
full_path = os.path.join(path, item)
dot_extension = os.path.splitext(full_path)[1] # ex.: '.txt'
if os.path.isfile(full_path):
if dot_extension == ".ini":
continue # this tells python to skip to break the for-loop run on the current item in an iterable and go to the next one
found_files.append(full_path)
elif os.path.isdir(full_path):
found_files.extend(recursive_search(full_path))
return found_files
directory = "/Users/nicholasbarrow/GitHub/com.nicholasrbarrow.cpp"
print(recursive_search(directory))
f = Fernet(key)
for item in recursive_search(directory):
with open(item, 'rb') as file:
file_data = file.read()
encrypted_data = f.encrypt(file_data)
with open(item, 'wb') as file:
file.write(encrypted_data)

Related

How to copy only non-duplicate files whilst maintaining folder structure?

I am trying to find duplicates between two folders and copy only unique image files to the 'dest' folder. I can copy all the non-dupes using the code below, however it doesn't maintain the source directory structure. I think OS.walk returns 3 tuples, but they aren't linked so not sure how to re-construct the sub dir?
Example:
import shutil, os
from difPy import dif
source = input('Input source folder:')
dest = input('Input backup \ destination folder:')
ext = ('.jpg','.jpeg','.gif','.JPG','.JPEG','.GIF')
search = dif(source, dest)
result = search.result
result
dupes = []
srcfiles = []
filecount = []
failed = []
removed = []
for i in result.values():
dupes.append(i['location'])
for dirpath, subdirs, files in os.walk(source):
for x in files:
if x.endswith(ext):
srcfiles.append(os.path.join(dirpath, x))
for f in srcfiles:
if f not in dupes:
shutil.copy(f, dest)
print('File copied successfully - '+f)
filecount.append(f)
else:
print('File not copied successfully !!!! - '+f)
failed.append(f)
I have also tried using the shutil.copytree function with an ignore list, however it requires a new folder and can't get the ignore list function to work
shutil.copytree example:
for i in result.values():
df = []
df.append(i['filename'])
def ignorelist(source, df):
return [f for f in df if os.path.isfile(os.path.join(source, f))]
shutil.copytree(source, destnew, ignore=ignorelist)
This function ignorelist should do the trick:
import shutil, os
from difPy import dif
source = input('Input source folder:')
dest = input('Input backup \ destination folder:')
ext = ('.jpg','.jpeg','.gif')
search = dif(source, dest)
dupes = [value['location'] for value in search.result.values()]
def ignorelist(source, files):
return [file for file in files
if (os.path.isfile(os.path.join(source, file))
and (os.path.join(source, file) in dupes
or not file.lower().endswith(ext)))]
shutil.copytree(source, dest, ignore=ignorelist)
And the other "more manual" way would be
import shutil, os
from difPy import dif
source = input('Input source folder:').rstrip('/\\')
dest = input('Input backup \ destination folder:').rstrip('/\\')
ext = ('.jpg','.jpeg','.gif')
search = dif(source, dest)
dupes = [value['location'] for value in search.result.values()]
srcfiles = []
copied = []
failed = []
skipped = []
for dirpath, subdirs, files in os.walk(source):
for file in files:
if file.lower().endswith(ext):
srcfile = os.path.join(dirpath,file)
srcfiles.append(srcfile)
if srcfile in dupes:
print('File not copied (duplicate) - '+srcfile)
skipped.append(srcfile)
else:
try:
destfile = os.path.join(dest,srcfile[len(source)+1:])
os.makedirs(os.path.dirname(destfile), exist_ok=True)
shutil.copy(srcfile,destfile)
print('File copied successfully - '+srcfile)
copied.append(srcfile)
except Exception as err:
print('File not copied (error %s) - %s' % (str(err),srcfile))
failed.append(f)
I have changed some variable names to make them more descriptive. And what you call failed is really just a list of files that are not copied because they are duplicates rather than files whose copying was attempted but failed.
import shutil, os
from difPy import dif
source = input('Input source folder: ')
dest = input('Input backup \ destination folder: ')
# Remove trailing path separators if they exist:
if source.endswith(('/', '\\')):
source = source[:-1]
if dest.endswith(('/', '\\')):
dest = dest[:-1]
# Use the correct path separator to
# ensure correct matching with dif results:
if os.sep == '/':
source = source.replace('\\', os.sep)
elif os.sep == '\\':
source = source.replace('/', os.sep)
source_directory_length = len(source) + 1
ext = ('.jpg','.jpeg','.gif','.JPG','.JPEG','.GIF')
search = dif(source, dest)
result = search.result
# Set comprehension:
dupes = {duplicate['location'] for duplicate in result.values()}
copied = []
not_copied = []
for dirpath, subdirs, files in os.walk(source):
for file in files:
if file.endswith(ext):
source_path = os.path.join(dirpath, file)
if source_path not in dupes:
# get subdirectory of source directory that this file is in:
file_length = len(file) + 1
# Get subdirectory relative to the source directory:
subdirectory = source_path[source_directory_length:-file_length]
if subdirectory:
dest_directory = os.path.join(dest, subdirectory)
# ensure directory exists:
os.makedirs(dest_directory, exist_ok=True)
else:
dest_directory = dest
dest_path = os.path.join(dest_directory, file)
shutil.copy(source_path, dest_path)
print('File copied successfully -', source_path)
copied.append(source_path)
else:
print('File not copied -', source_path)
not_copied.append(source_path)

Get files not in hidden folders

The test is failing because it's getting the files from hidden folders too. How can I modify the code so that it skips the hidden folders?
def get_files_not_in_hidden_folder(parent_folder: str, extension: str) -> List[str]:
"""
Get all files recursively from parent folder,
except for the ones that are in hidden folders
"""
files = []
for root, _, filenames in os.walk(parent_folder):
for filename in filenames:
if filename.endswith(extension) and not root.startswith('.'):
files.append(os.path.join(root, filename))
logger.debug(f"get_files_not_in_hidden_folder: {parent_folder}, {extension} -> {files}")
return files
def test_get_files_not_in_hidden_folder():
Path('tmp').mkdir(parents=True, exist_ok=True)
Path('tmp/test.json').touch()
Path('tmp/tmp/.tmp').mkdir(parents=True, exist_ok=True)
Path('tmp/tmp/.tmp/test.json').touch()
Path('tmp/.tmp/tmp').mkdir(parents=True, exist_ok=True)
Path('tmp/.tmp/tmp/test.json').touch()
assert get_files_not_in_hidden_folder('tmp', '.json') == ['tmp/test.json']
shutil.rmtree(Path('tmp'))
What you call root is the full path, including parent names.
If you want to convert to just the directory name, you can use os.path.basename, like:
for root, _, filenames in os.walk(parent_folder):
for filename in filenames:
if filename.endswith(extension) and "/." not in root:
files.append(os.path.join(root, filename))
I would implement this something like as follows ...
def my_walk(root_dir):
files,dirs = [],[]
try:
for fname in os.listdir(root_dir):
if not fname.startswith("."):
fpath = os.path.join(root_dir,fname)
if os.path.isdir(fpath):
dirs.append(fpath)
else:
files.append(fpath)
except:
print("SKIP:",root_dir)
yield root_dir,dirs,files
for d in dirs:
yield from my_walk(d)
I think should work ...
for root, _, filenames in my_walk(parent_folder):
print(f"{root} contains {filenames}")

Ttrying to copy the content of files from source to destination, if file is .txt and if files have the same names and then ZIP each just copied file

I'm new to python and trying to copy the content of files from dir_A to dir_B. If file is .txt and if these files from dir_A and dir_B have the same names and after zip each of these newly copied files.
import os, shutil, zipfile
src_folder = "C:/Users/pushka/pythonApp1/src_folder"
dst_folder = "C:/Users/pushka/pythonApp1/dst_folder"
# only .txt files will be copied
ext = (".txt")
try:
for src_f in os.scandir(src_folder):
for dst_f in os.scandir(dst_folder):
if src_f.path.endswith(ext) and os.path.basename(src_f) == os.path.basename(dst_f):
# copy file
shutil.copyfile(src_f, dst_f)
finally:
print("The 'try except' is finished")
I have searched and tried several options to ZIP, but none of them work properly, so I need your help please
I modified your code a bit, but this should do the trick:
import os, shutil, zipfile
src_folder = "C:/Users/pushka/pythonApp1/src_folder"
dst_folder = "C:/Users/pushka/pythonApp1/dst_folder"
# only .txt files will be copied
ext = ".txt"
copied_files = []
for src_f in os.scandir(src_folder):
if src_f.name.endswith(ext) and not src_f.is_dir():
dst_f = os.path.join(dst_folder, src_f.name)
if not os.path.exists(dst_f):
shutil.copyfile(src_f, dst_f)
copied_files.append(dst_f)
print(copied_files)
zipfile_name = os.path.join(dst_folder, "copied_files.zip")
if not os.path.exists(zipfile_name):
with zipfile.ZipFile(zipfile_name, "w") as zf:
for txtfile in copied_files:
print("Writing " + txtfile)
zf.write(txtfile, os.path.split(txtfile)[-1])
It should be pretty self-explanatory, but I'll walk you through it. In the first for loop, we scan all entries in src_folder. If the name ends in .txt and it is not a directory, we create a path to the destination file. Then, as long as the destination file does not exist, we copy the source to the destination, and add the destination to the copied_files list.
After all the copying is done, we create the zip file's name. If it doesn't exist, we create it using the zipfile.ZipFile context manager and write in each copied file (from the destination, not the source), stripping the full path from it in the archive.
Please note that, by default, the zipfile uses ZIP_STORED as the compression format - i.e., the data is not compressed. See the docs for the other supported compression formats if you need a compressed archive.
Thanks a lot, but here is the answer to my own question with your help
import os, shutil, zipfile
src_folder = "C:/Users/pushka/pythonApp1/src_folder"
dst_folder = "C:/Users/pushka/pythonApp1/dst_folder"
# only .txt files will be copied
ext = ".txt"
copied_files = []
for src_f in os.scandir(src_folder):
for dst_f in os.scandir(dst_folder):
if src_f.name.endswith(ext) and os.path.basename(src_f) == os.path.basename(dst_f):
# copy file
shutil.copyfile(src_f, dst_f)
copied_files.append(dst_f)
print(copied_files)
for txt_file in copied_files:
file_root = os.path.splitext(txt_file)[0]
zip_file_name = file_root + '.zip'
with zipfile.ZipFile(zip_file_name, mode='w') as zf:
zf.write(txt_file, os.path.basename(txt_file))
Works as expected
Simple format:
from pathlib import Path
from typing import List
from zipfile import ZipFile
src_folder = Path("C:/Users/pushka/pythonApp1/src_folder")
dst_folder = Path("C:/Users/pushka/pythonApp1/dst_folder")
SUFFIX = ".txt"
def copy_file(from_path: Path, to_path: Path, copied) -> None:
content = from_path.read_bytes()
to_path.write_bytes(content)
copied.append(to_path)
print(f"Copy file: {from_path} --> {to_path}")
def zip_them(paths: List[Path]) -> str:
filename = "copied.zip"
with ZipFile(filename, "w") as z:
for path in paths:
z.write(path, path.name)
return filename
def main():
assert src_folder.exists(), f"path `{src_folder}` not found!"
assert dst_folder.exists(), f"path `{dst_folder}` not found!"
copied = []
for p in src_folder.glob(f"*{SUFFIX}"):
dst = dst_folder / p.name
copy_file(p, dst, copied)
fn = zip_them(copied)
print(f"There are {len(copied)} files copied. And zipped to: {fn}")
if __name__ == "__main__":
main()
My prefer:
from typing import List
from zipfile import ZipFile
import anyio # pip install anyio
from anyio import Path
src_folder = Path("C:/Users/pushka/pythonApp1/src_folder")
dst_folder = Path("C:/Users/pushka/pythonApp1/dst_folder")
async def copy_file(from_path: Path, to_path: Path, copied) -> None:
content = await from_path.read_bytes()
await to_path.write_bytes(content)
copied.append(to_path)
print(f"copy file: {from_path} --> {to_path}")
def zip_them(paths: List[Path]) -> str:
filename = "copied.zip"
with ZipFile(filename, "w") as z:
for path in paths:
z.write(path, path.name)
return filename
async def main():
copied = []
async with anyio.create_task_group() as tg:
async for p in src_folder.glob("*.txt"):
dst = dst_folder / p.name
tg.start_soon(copy_file, p, dst, copied)
fn = zip_them(copied)
print(f"zip file created: {fn}")
if __name__ == "__main__":
import timeit
cost = timeit.timeit("anyio.run(main)", number=1, globals=globals())
print("Cost:", round(cost, 2), "seconds.")

How to search through both zipped and unzipped folders for a specific line

I'm trying to implement a Python script that takes a folder from the user (can be zipped or unzipped), and search through all the files in the folder to output the specific lines that my regular expression matches. My code below works for regular unzipped folders, but I can't figure out how to do the same with zipped folders that are inputted to function. Below are my code, thanks in advance!
def myFunction(folder_name):
path = folder_name
for (path, subdirs, files) in os.walk(path):
files = [f for f in os.listdir(path) if f.endswith('.txt') or f.endswith('.log') or f.endswith('-release') or f.endswith('.out') or f.endswith('messages') or f.endswith('.zip')] # Specify here the format of files you hope to search from (ex: ".txt" or ".log")
files.sort() # file is sorted list
files = [os.path.join(path, name) for name in files] # Joins the path and the name, so the files can be opened and scanned by the open() function
# The following for loop searches all files with the selected format
for filename in files:
#print('start parsing... ' + str(datetime.datetime.now()))
matched_line = []
try:
with open(filename, 'r', encoding = 'utf-8') as f:
f = f.readlines()
except:
with open(filename, 'r') as f:
f = f.readlines()
# print('Finished parsing... ' + str(datetime.datetime.now()))
for line in f:
#0strip out \x00 from read content, in case it's encoded differently
line = line.replace('\x00', '')
RE2 = r'^Version: \d.+\d.+\d.\w\d.+'
RE3 = r'^.+version.(\d+.\d+.\d+.\d+)'
pattern2 = re.compile('('+RE2+'|'+RE3+')', re.IGNORECASE)
for match2 in pattern2.finditer(line):
matched_line.append(line)
print(line)
#Calling the function to use it
myFunction(r"SampleZippedFolder.zip")
The try and except block of my code was my attempt to open the zipped folder and read it. I'm still not very clear with how to open the zipped folder or how it works. Please let me know how I can modify my code to make it work, much appreciated!
One possibility is first determine what object type folder_name is using zipfile and os.isdir() and whichever one succeeds, get the list of files and proceed. Maybe something like this:
import zipfile, os, re
def myFunction(folder_name):
files = None # nothing yet
path = folder_name
if zipfile.is_zipfile(path):
print('ZipFile: {}'.format(path))
f = zipfile.ZipFile(path)
files = f.namelist()
# for name in f.namelist(): # debugging
# print('file: {}'.format(name))
elif os.path.isdir(path):
print('Folder: {}'.format(path))
files = os.listdir(path)
# for name in os.listdir(path): # debugging
# print('file: {}'.format(name))
# should now have a list of files
# proceed processing the files
for filename in files:
...

Python: work with recursive folders to read and write

I have this code:
# cwd = "C:\Users\johnr\Desktop\myFolder" - current working directory
for filename in os.listdir(os.path.join(cwd, "content")):
header_file = open(header_file_dir, "r")
footer_file = open(footer_file_dir, "r")
if ".md" in filename:
newFilename = filename.replace(".md", ".html")
if ".tile" in filename:
newFilename = filename.replace(".tile", ".html")
elif ".html" in filename:
newFilename = filename
elif ".txt" in filename:
newFilename = filename.replace(".txt", ".html")
else:
print(filename+" is not a valid file type!")
currents_working_file = open(os.path.join(cwd, "build", newFilename), "w")
# Write the header
currents_working_file.write(header_file.read())
# Get the actual stuff we want to put on the page
text_content = open(os.path.join(cwd, "content", filename), "r")
if ".md" in filename:
text_cont1 = "\n"+markdown.markdown(text_content.read())+"\n"
elif ".tile" in filename:
text_cont1 = "\n"+textile.textile(text_content.read())+"\n"
elif ".html" in filename:
text_cont1 = text_content.read()
elif ".txt" in filename:
text_cont1 = text_content.read()
else:
print(filename+" is not a valid file type!")
# Write the text content into the content template and onto the build file
content_templ_dir = os.path.join(cwd, "templates", "content_page.html")
if os.path.exists(content_templ_dir):
content_templ_file = open(content_templ_dir, "r")
content_templ_file1 = content_templ_file.read()
content_templ_file2 = content_templ_file1.replace("{page_content}", text_cont1)
currents_working_file.write(content_templ_file2)
else:
currents_working_file.write(text_cont1)
# Write the footer to the build file
currents_working_file.write("\n"+footer_file.read())
# Close the build file
currents_working_file.close()
which searches for a file in the 'content' directory and then creates a file of the same name in the'build' directory. How can I make this work when there are files in folders in the 'content' directory?
In order to recursively traverse directories, Python provides os.walk:
for root, dirs, files in os.walk(os.path.join(cwd, "content")):
relative_path = os.path.relpath(root, os.path.join(cwd, "content"))
for filename in files:
currents_working_file = open(os.path.join(cwd, "build", relative_path, filename), "w")
Assuming that cwd just holds the path to the current working dir:
from pathlib import Path
from itertools import chain
source_extensions = {'md', 'html', 'txt'}
source_root_dir_path = Path("content")
source_file_paths = chain.from_iterable(
source_root_dir_path.glob("**/*.{}".format(ext)) for ext in source_extensions
)
for p in source_file_paths:
destination_file_path = Path("build", *p.with_suffix(".html").parts[1:])
destination_file_path.parent.mkdir(parents=True, exist_ok=True)
with destination_file_path.open('w') as f:
f.write(header_file.read())
f.write("\n")
f.write(footer_file.read())

Categories