Reading names of multiple .txt files from a folder in python - python

Is there a way to read the names of multiple .txt files from a single folder? I right now am only able to read the contents of the files.
import glob
import errno
path = 'C:/Users/rabhi/Desktop/NLP/aclImdb/test/neg/*.txt'
files = glob.glob(path)
for name in files:
try:
with open(name) as f:
for line in f:
print(line.split())
except IOError as exc:
if exc.errno != errno.EISDIR:
raise

You can use os.walk() for this:
import os
def main():
for dirName, subDirList, fileList in os.walk('path'):
for subDir in subDirList:
for file in fileList:
#Do something with file
main()
This recursively goes through your files in a given directory.
To ignore sub-directories you can try something like this:
import os
def main():
for dirName, subDirList, fileList in os.walk('path'):
for file in fileList:
#Do something with file
main()

Related

loop over files in sub zip-directories python

My txt.files are saved in zipped-subfolders as follows:
mainfolder.zip
mainfolder/folder1 (folder 1 is no zip-file)
mainfolder/folder1/subfolder11.zip
mainfolder/folder1/subfolder12.zip
mainfolder/folder2 (folder 2 is no zip file)
mainfolder/folder1/subfolder21.zip
mainfolder/folder1/subfolder22.zip
I want to loop over all the text files in subfolders of the mainfolder. Is there an easy way to do it?
I hope to help.
If the script is not in the folder with the folder to be searched, replace os.getcwd() with the path to the folder.
import os
import logging
from pathlib import Path
from shutil import unpack_archive
# unzipping all zipped folders
zip_files = Path(os.getcwd()).rglob("*.zip")
while True:
try:
path = next(zip_files)
except StopIteration:
break
except PermissionError:
logging.exception("permission error")
else:
extract_dir = path.with_name(path.stem)
unpack_archive(str(path), str(extract_dir), 'zip')
# finding and dealing with a .txt file
for root, dirs, files in os.walk(os.getcwd()):
for f in files:
if os.path.splitext(f)[1].lower() == ".txt":
with open(os.path.join(root, f)) as fi:
lines = fi.readlines()
print("File: ",os.path.join(root, f),"\ncontent: ", lines)
# all what you want do with file...
Nice day :)

Moving files with python using a list .txt

I want to move files from one directory to another from a .txt file containing the names of the files to be moved, the script must first browse the directory and if it finds the file it moves it to the new directory. Where to start? I've managed to do this for a file list but I'd like to do it directly via the .txt file without rewriting the names of the files to be moved
import shutil, os
files = ['file1.txt', 'file2.txt', 'file3.txt', 'file4.txt']
for file in files:
shutil.move(file, 'destination_directory')
As I know, U cant move your files with .txt
Just move your file_path
You can use my code below.
I have double checked and it work on my side.
Sorry for my poor English Skill :)
import os
import shutil
from pathlib import Path
def create_directory(dir_name: str):
"""To create directory before create files: txt, csv..."""
system_path = os.getcwd()
dir_path = os.path.join(system_path, dir_name)
try:
os.makedirs(dir_path, exist_ok=True)
except OSError as error:
print("Directory '%s' can not be created" % dir_name)
return dir_path
def create_files(dir_path: str, file_name: str):
"""Function for creating files"""
file_path = dir_path + fr"\{file_name}"
with open(file_path, "w") as open_file:
if Path(file_path).is_file():
print(f'File: {file_name} created successfully')
else:
print(f'File: {file_name} does not exist')
open_file.close() # Need to close.
return file_path
def main():
# Step 1: Creating file1.txt, file2.txt, file3.txt, file4.txt
file_one = create_files(create_directory("file1_dir"), 'file1.txt')
file_two = create_files(create_directory("file2_dir"), 'file2.txt')
file_three = create_files(create_directory("file3_dir"), 'file3.txt')
file_four = create_files(create_directory("file4_dir"), 'file4.txt')
# Step 2: Creating destination_directory:
destination_dir = create_directory('destination_directory')
files = [file_one, file_two, file_three, file_four]
# Step 3: Moving Your Files:
for file in files:
shutil.move(file, destination_dir)
if __name__ == "__main__":
main()

find string line startswith recursively python

i have to find recursively all lines( which start with string "excel") in all files (in directory and subdirectory) .i need for each filename the line found (for example :
filename1:
line1 founded...
filename2:
line2 founded...
Output result in file called "logfile"
if no line founded , filename not saved in logfile.
import os
word="excel"
from os.path import join
for (dirname, dirs, files) in os.walk('/batch/'):
for filename in files:
thefile = os.path.join(dirname,filename)
for line in files:
if line.startswith(word):
print (line)
print (thefile)
Thanks
Your code just has minor problems: The biggest one is that you loop on filename instead of file content.
import os
word="excel"
from os.path import join
for (dirname, dirs, files) in os.walk('/batch/'):
for filename in files:
thefile = os.path.join(dirname, filename)
with open(thefile) as f:
for line in f:
if line.startswith(word):
print (line)
print (thefile)
EDIT:
import os
word="excel"
from os.path import join
with open('log_result.txt', 'w') as log_file:
for (dirname, dirs, files) in os.walk('/tmp/toto'):
for filename in files:
thefile = os.path.join(dirname, filename)
with open(thefile) as f:
lines = [line for line in f if line.startswith(word)]
if lines:
log_file.write("File {}:\n".format(thefile))
log_file.writelines(lines)
Here is the fixed code.
you don't need to re traverse the same list of files.
os.walk() will return all sub directories in a directory, All you need you do is loop all directories.
Sample Code
import glob
import os
word="excel"
for (dirname, dirs, files) in os.walk("/batch/"):
for file_ in files :
if file_.startswith(word):
print(file_)
print(os.path.join(dirname, file_))
for dir_ in dirs :
myfiles = glob.glob(os.path.join(dirname,dir_))
for myfile in myfiles:
if myfile.startswith(word):
print(myfile)
print(os.path.join(dirname,myfiles))
hope this helps

How to Save file names and their directories path in a text file using Python

I am trying to find a string that is contained in files under a directory. Then make it to store it's file names and directories under a new text file or something.
I got upto where it is going through a directory and finding a string, then printing a result. But not sure of the next step.
Please help, I'm completely new to coding and python.
import glob, os
#Open a source as a file and assign it as source
source = open('target.txt').read()
filedirectories = []
#locating the source file and printing the directories.
os.chdir("/Users/a1003584/desktop")
for root, dirs, files in os.walk(".", topdown=True):
for name in files:
print(os.path.join(root, name))
if source in open(os.path.join(root, name)).read():
print 'treasure found.'
Don't do a string comparison if your looking for a dictionary. Instead use the json module. Like this.
import json
import os
filesFound = []
def searchDir(dirName):
for name in os.listdir(dirName):
# If it is a file.
if os.isfile(dirName+name):
try:
fileCon = json.load(dirName+name)
except:
print("None json file.")
if "KeySearchedFor" in fileCon:
filesFound.append(dirName+name)
# If it is a directory.
else:
searchDir(dirName+name+'/')
# Change this to the directory your looking in.
searchDir("~/Desktop")
open("~/Desktop/OutFile.txt",'w').write(filesFound)
This should write the output to a csv file
import csv
import os
with open('target.txt') as infile: source = infile.read()
with open("output.csv", 'w') as fout:
outfile = csv.writer(fout)
outfile.writerow("Directory FileName FilePath".split())
for root, dirnames, fnames in os.walk("/Users/a1003584/desktop", topdown=True):
for fname in fnames:
with open(os.path.join(root, fname)) as infile:
if source not in infile.read(): continue
outfile.writerow(root, fname, os.path.join(root, fname))

Recursively rename file extensions

I am having a difficult time creating a python script that will rename file extensions in a folder and continue to do so in sub directories. Here is the script I have thus far; it can only rename files in the top directory:
#!/usr/bin/python
# Usage: python rename_file_extensions.py
import os
import sys
for filename in os.listdir ("C:\\Users\\username\\Desktop\\test\\"): # parse through file list in the folder "test"
if filename.find(".jpg") > 0: # if an .jpg is found
newfilename = filename.replace(".jpg","jpeg") # convert .jpg to jpeg
os.rename(filename, newfilename) # rename the file
import os
import sys
directory = os.path.dirname(os.path.realpath(sys.argv[0])) #get the directory of your script
for subdir, dirs, files in os.walk(directory):
for filename in files:
if filename.find('.jpg') > 0:
subdirectoryPath = os.path.relpath(subdir, directory) #get the path to your subdirectory
filePath = os.path.join(subdirectoryPath, filename) #get the path to your file
newFilePath = filePath.replace(".jpg",".jpeg") #create the new name
os.rename(filePath, newFilePath) #rename your file
I modified Jaron's answer with the path to the file and the complete example of renaming the file
I modified the answer of Hector Rodriguez Jr. a little bit because it would replace ANY occurance of ".jpg" in the path, e.g. /path/to/my.jpg.files/001.jpg would become /path/to/my.jpeg.files/001.jpeg, which is not what you wanted, right?
Although it is generally not a good idea to use dots "." in a folder name, it can happen...
import os
import sys
directory = os.path.dirname(os.path.realpath(sys.argv[0])) # directory of your script
for subdir, dirs, files in os.walk(directory):
for filename in files:
if filename.find('.jpg') > 0:
newFilename = filename.replace(".jpg", ".jpeg") # replace only in filename
subdirectoryPath = os.path.relpath(subdir, directory) # path to subdirectory
filePath = os.path.join(subdirectoryPath, filename) # path to file
newFilePath = os.path.join(subdirectoryPath, newFilename) # new path
os.rename(filePath, newFilePath) # rename
You can process the directory like this:
import os
def process_directory(root):
for item in os.listdir(root):
if os.path.isdir(item):
print("is directory", item)
process_directory(item)
else:
print(item)
#Do stuff
process_directory(os.getcwd())
Although, this isn't really necessary. Simply use os.walk which will iterate through all toplevel and further directories / files
Do it like this:
for subdir, dirs, files in os.walk(root):
for f in files:
if f.find('.jpg') > 0:
#The rest of your stuff
That should do exactly what you want.

Categories