loop over files in sub zip-directories python - python

My txt.files are saved in zipped-subfolders as follows:
mainfolder.zip
mainfolder/folder1 (folder 1 is no zip-file)
mainfolder/folder1/subfolder11.zip
mainfolder/folder1/subfolder12.zip
mainfolder/folder2 (folder 2 is no zip file)
mainfolder/folder1/subfolder21.zip
mainfolder/folder1/subfolder22.zip
I want to loop over all the text files in subfolders of the mainfolder. Is there an easy way to do it?

I hope to help.
If the script is not in the folder with the folder to be searched, replace os.getcwd() with the path to the folder.
import os
import logging
from pathlib import Path
from shutil import unpack_archive
# unzipping all zipped folders
zip_files = Path(os.getcwd()).rglob("*.zip")
while True:
try:
path = next(zip_files)
except StopIteration:
break
except PermissionError:
logging.exception("permission error")
else:
extract_dir = path.with_name(path.stem)
unpack_archive(str(path), str(extract_dir), 'zip')
# finding and dealing with a .txt file
for root, dirs, files in os.walk(os.getcwd()):
for f in files:
if os.path.splitext(f)[1].lower() == ".txt":
with open(os.path.join(root, f)) as fi:
lines = fi.readlines()
print("File: ",os.path.join(root, f),"\ncontent: ", lines)
# all what you want do with file...
Nice day :)

Related

Moving files with python using a list .txt

I want to move files from one directory to another from a .txt file containing the names of the files to be moved, the script must first browse the directory and if it finds the file it moves it to the new directory. Where to start? I've managed to do this for a file list but I'd like to do it directly via the .txt file without rewriting the names of the files to be moved
import shutil, os
files = ['file1.txt', 'file2.txt', 'file3.txt', 'file4.txt']
for file in files:
shutil.move(file, 'destination_directory')
As I know, U cant move your files with .txt
Just move your file_path
You can use my code below.
I have double checked and it work on my side.
Sorry for my poor English Skill :)
import os
import shutil
from pathlib import Path
def create_directory(dir_name: str):
"""To create directory before create files: txt, csv..."""
system_path = os.getcwd()
dir_path = os.path.join(system_path, dir_name)
try:
os.makedirs(dir_path, exist_ok=True)
except OSError as error:
print("Directory '%s' can not be created" % dir_name)
return dir_path
def create_files(dir_path: str, file_name: str):
"""Function for creating files"""
file_path = dir_path + fr"\{file_name}"
with open(file_path, "w") as open_file:
if Path(file_path).is_file():
print(f'File: {file_name} created successfully')
else:
print(f'File: {file_name} does not exist')
open_file.close() # Need to close.
return file_path
def main():
# Step 1: Creating file1.txt, file2.txt, file3.txt, file4.txt
file_one = create_files(create_directory("file1_dir"), 'file1.txt')
file_two = create_files(create_directory("file2_dir"), 'file2.txt')
file_three = create_files(create_directory("file3_dir"), 'file3.txt')
file_four = create_files(create_directory("file4_dir"), 'file4.txt')
# Step 2: Creating destination_directory:
destination_dir = create_directory('destination_directory')
files = [file_one, file_two, file_three, file_four]
# Step 3: Moving Your Files:
for file in files:
shutil.move(file, destination_dir)
if __name__ == "__main__":
main()

Python - Delete xlsx files from a folder

I am trying to delete all xlsx files from a folder, note it has files of other extension. Given below is what I have tried:
path = '/users/user/folder'. <-- Folder that has all the files
list_ = []
for file_ in path:
fileList = glob.glob(path + "/*.xlsx")
fileList1 = " ".join(str(x) for x in fileList)
try:
os.remove(fileList1)
except Exception as e:
print(e)
But the above does not delete the xlsx files.
Try:
import os
import glob
path = '/users/user/folder'
for f in glob.iglob(path+'/**/*.xlsx', recursive=True):
os.remove(f)
you can use this code to delete the xlsx or xls file
import os
path = r'your path '
os.chdir(path)
for file in os.listdir(path):
if file.endswith('.xlsx') or file.endswith('.xls'):
print(file)
os.remove(file)
You can use the below code as well to remove multiple .xlsx files in a folder.
import glob, os
path =r"folder path"
filenames = glob.glob(path + "/*.xlsx")
for i in filenames:
os.remove(i)
It would be better to use os.listdir() and fnmatch.
Try the below code .
`import os, fnmatch
listOfFiles = os.listdir('/users/user/folder') #filepath
pattern = "*.xslx"
for entry in listOfFiles:
if fnmatch.fnmatch(entry, pattern):
print ("deleting"+entry)
os.remove(entry)`

Reading names of multiple .txt files from a folder in python

Is there a way to read the names of multiple .txt files from a single folder? I right now am only able to read the contents of the files.
import glob
import errno
path = 'C:/Users/rabhi/Desktop/NLP/aclImdb/test/neg/*.txt'
files = glob.glob(path)
for name in files:
try:
with open(name) as f:
for line in f:
print(line.split())
except IOError as exc:
if exc.errno != errno.EISDIR:
raise
You can use os.walk() for this:
import os
def main():
for dirName, subDirList, fileList in os.walk('path'):
for subDir in subDirList:
for file in fileList:
#Do something with file
main()
This recursively goes through your files in a given directory.
To ignore sub-directories you can try something like this:
import os
def main():
for dirName, subDirList, fileList in os.walk('path'):
for file in fileList:
#Do something with file
main()

Recursively rename file extensions

I am having a difficult time creating a python script that will rename file extensions in a folder and continue to do so in sub directories. Here is the script I have thus far; it can only rename files in the top directory:
#!/usr/bin/python
# Usage: python rename_file_extensions.py
import os
import sys
for filename in os.listdir ("C:\\Users\\username\\Desktop\\test\\"): # parse through file list in the folder "test"
if filename.find(".jpg") > 0: # if an .jpg is found
newfilename = filename.replace(".jpg","jpeg") # convert .jpg to jpeg
os.rename(filename, newfilename) # rename the file
import os
import sys
directory = os.path.dirname(os.path.realpath(sys.argv[0])) #get the directory of your script
for subdir, dirs, files in os.walk(directory):
for filename in files:
if filename.find('.jpg') > 0:
subdirectoryPath = os.path.relpath(subdir, directory) #get the path to your subdirectory
filePath = os.path.join(subdirectoryPath, filename) #get the path to your file
newFilePath = filePath.replace(".jpg",".jpeg") #create the new name
os.rename(filePath, newFilePath) #rename your file
I modified Jaron's answer with the path to the file and the complete example of renaming the file
I modified the answer of Hector Rodriguez Jr. a little bit because it would replace ANY occurance of ".jpg" in the path, e.g. /path/to/my.jpg.files/001.jpg would become /path/to/my.jpeg.files/001.jpeg, which is not what you wanted, right?
Although it is generally not a good idea to use dots "." in a folder name, it can happen...
import os
import sys
directory = os.path.dirname(os.path.realpath(sys.argv[0])) # directory of your script
for subdir, dirs, files in os.walk(directory):
for filename in files:
if filename.find('.jpg') > 0:
newFilename = filename.replace(".jpg", ".jpeg") # replace only in filename
subdirectoryPath = os.path.relpath(subdir, directory) # path to subdirectory
filePath = os.path.join(subdirectoryPath, filename) # path to file
newFilePath = os.path.join(subdirectoryPath, newFilename) # new path
os.rename(filePath, newFilePath) # rename
You can process the directory like this:
import os
def process_directory(root):
for item in os.listdir(root):
if os.path.isdir(item):
print("is directory", item)
process_directory(item)
else:
print(item)
#Do stuff
process_directory(os.getcwd())
Although, this isn't really necessary. Simply use os.walk which will iterate through all toplevel and further directories / files
Do it like this:
for subdir, dirs, files in os.walk(root):
for f in files:
if f.find('.jpg') > 0:
#The rest of your stuff
That should do exactly what you want.

match filenames to foldernames then move files

I have files named "a1.txt", "a2.txt", "a3.txt", "a4.txt", "a5.txt" and so on. Then I have folders named "a1_1998", "a2_1999", "a3_2000", "a4_2001", "a5_2002" and so on.
I would like to make the conection between file "a1.txt" & folder "a1_1998" for example. (I'm guessing I'll need a regular expresion to do this). then use shutil to move file "a1.txt" into folder "a1_1998", file "a2.txt" into folder "a2_1999" etc....
I've started like this but I'm stuck because of my lack of understanding of regular expresions.
import re
##list files and folders
r = re.compile('^a(?P')
m = r.match('a')
m.group('id')
##
##Move files to folders
I modified the answer below slightly to use shutil to move the files, did the trick!!
import shutil
import os
import glob
files = glob.glob(r'C:\Wam\*.txt')
for file in files:
# this will remove the .txt extension and keep the "aN"
first_part = file[7:-4]
# find the matching directory
dir = glob.glob(r'C:\Wam\%s_*/' % first_part)[0]
shutil.move(file, dir)
You do not need regular expressions for this.
How about something like this:
import glob
files = glob.glob('*.txt')
for file in files:
# this will remove the .txt extension and keep the "aN"
first_part = file[:-4]
# find the matching directory
dir = glob.glob('%s_*/' % first_part)[0]
os.rename(file, os.path.join(dir, file))
A slight alternative, taking into account Inbar Rose's suggestion.
import os
import glob
files = glob.glob('*.txt')
dirs = glob.glob('*_*')
for file in files:
filename = os.path.splitext(file)[0]
matchdir = next(x for x in dirs if filename == x.rsplit('_')[0])
os.rename(file, os.path.join(matchdir, file))

Categories