How could I grab file from many sub-folder via main folder? - python

If I have a main folder and many sub-folders, and my targeted files are located at these sub-folders. How could I set up my path properly then the program could grab these targeted files via my main folder directly?
For example,
Main_folder
>sub_1
>>sub_1_v1
>>>targeted_file.txt # file I need
>>sub_2_v2
>>>targeted_file.txt # file I need
>sub_2
>>sub_1_v1
>>>targeted_file.txt # file I need
>>sub_2_v2
>>>targeted_file.txt # file I need
Here is a program created by Julien Spronck (Grabbing data from certain files)
def get_all_files(path):
## get a generator with all file names
import os
import glob
return glob.iglob(os.path.join(path,'*.txt'))
def get_all_data(files):
## get a generator with all the data from all the files
for fil in files:
with open(fil, 'r') as the_file:
for line in the_file:
yield line
def write_lines_to_file(lines, outfile):
with open(outfile, 'w') as the_file:
for line in lines:
the_file.write(line+'\n')
path = 'blah blah' # path should be given here!
outfile = 'blah.csv'
files = get_all_files(path)
lines = get_all_data(files)
write_lines_to_file(lines, outfile)
My question is, how could I give the path properly (from main folder) then I can grab all targeted files at one time?
Thanks.

To iterate over folders, then files, use:
import os
def list_files(dir):
r = []
subdirs = [x[0] for x in os.walk(dir)]
for subdir in subdirs:
files = os.walk(subdir).next()[2]
if (len(files) > 0):
for file in files:
r.append(subdir + "/" + file)
return r
As seen here:
Python: Iterate through folders, then subfolders and print filenames with path to text file

Related

Read in multiple folder and combine multiple text files contents to one file per folder - Python

I'm new to Python. I have 100's of multiple folders in the same Directory inside each folder i have multiple text files each. i want to combine all text files contents to one per folder.
Folder1
text1.txt
text2.txt
text3.txt
.
.
Folder2
text1.txt
text2.txt
text3.txt
.
.
i need output as copy all text files content in to one text1.txt + text2.txt + text3.txt ---> Folder1.txt
Folder1
text1.txt
text2.txt
text3.txt
Folder1.txt
Folder2
text1.txt
text2.txt
text3.txt
Folder2.txt
i have below code which just list out the text files.
for path,subdirs, files in os.walk('./data')
for filename in files:
if filename.endswith('.txt'):
please help me how to proceed on the task. Thank you.
Breaking down the problem we need the solution to:
Find all files in a directory
Merge contents of all the files into one file - with the same name as the name of the directory.
And then apply this solution to every sub directory in the base directory. Tested the code below.
Assumption: the subfolders have only text files and no directories
import os
# Function to merge all files in a folder
def merge_files(folder_path):
# get all files in the folder,
# assumption: folder has no directories and all text files
files = os.listdir(folder_path)
# form the file name for the new file to create
new_file_name = os.path.basename(folder_path) + '.txt'
new_file_path = os.path.join(folder_path, new_file_name)
# open new file in write mode
with open(new_file_path, 'w') as nf:
# open files to merge in read mode
for file in files:
file = os.path.join(folder_path, file)
with open(file, 'r') as f:
# read all lines of a file and write into new file
lines_in_file = f.readlines()
nf.writelines(lines_in_file)
# insert a newline after reading each file
nf.write("\n")
# Call function from the main folder with the subfolders
folders = os.listdir("./test")
for folder in folders:
if os.path.isdir(os.path.join('test', folder)):
merge_files(os.path.join('test', folder))
First you will need to get all folder names, which can be done with os.listdir(path_to_dir). Then you iterate over all of them, and for each you will need to iterate over all of its children using the same function, while concatenating contents using this: https://stackoverflow.com/a/13613375/13300960
Try writing it by yourself and update the answer with your code if you will need more help.
Edit: os.walk might not be the best solution since you know your folder structure and just two listdirs will do the job.
import os
basepath = '/path/to/directory' # maybe just '.'
for dir_name in os.listdir(basepath):
dir_path = os.path.join(basepath, dir_name)
if not os.path.isdir(dir_path):
continue
with open(os.path.join(dir_path, dir_name+'.txt') , 'w') as outfile:
for file_name in os.listdir(dir_path):
if not file_name.endswith('.txt'):
continue
file_path = os.path.join(dir_path, file_name)
with open(file_path) as infile:
for line in infile:
outfile.write(line)
This is not the best code, but it should get the job done and it is the shortest.

How to read all .txt files in a folder and append its contents into one .txt file, using Python?

I have a folder with multiple .txt files.
for every .txt file in the folder, I want to take one line of the content and append them in a new .txt file. How to do this in Python?
I'm new to this, also new to publicly asking questions.this is all I got.
import os
Folder = os.listdir('E:\\Project\\tests')
f = open('outputFile.txt', 'a')
for file in Folder:
file.read()
for i in file:
f.write(i[1] + '\n')
f.close()
The problem in your code that you don't open the files to read.
Try this one:
from os import listdir
from os.path import isfile, join
folder_path = 'E:\\Project\\tests'
# get the full names of all the txt files in your folder
files = [join(folder_path, f) for f in listdir(folder_path) if isfile(join(folder_path, f)) and f.endswith(".txt")]
f = open('outputFile.txt', 'a')
for file in files:
line = open(file,"r").readlines()[1] # line will be equal to the second line of the file
f.write(line + '\n')
f.close()

Write in all the *.txt files within a directory using Python

I want to write in all the files within a directory based on their extensions. I can write to a specific file but my goal is code that can write to all the *.txt files that are in a specific directory. With the following code I can list all text files and search for a file but as a beginner in Python I don't know how to write a sentence in all the *.txt files.
import glob
import os
directory=os.listdir(r'C:\Users\Lenovo\Desktop\z')
myfiles=glob.glob('*.txt')
print(myfiles)
def find_files(filename, search_path):
result= []
for root, dir, files in os.walk(search_path):
if filename in files:
result.append(os.path.join(root, filename))
return result
print(find_files("zineb.txt",r"C:\Users\Lenovo\Desktop\z"))
In your example you should be able to just do the following:
textfiles = find_files("zineb.txt",r"C:\Users\Lenovo\Desktop\z")
for textfile in textfiles: # go over each file that it found
with open(textfile, "a") as f: # open the textfile in mode append (hence the a) and have it be assigned to f
f.write("a") # then write "a" to the file.
To do all of them:
for textfile in os.listdir():
if textfile.endswith(".txt"):
with open(textfile, "a") as f:
f.write("a")

Filter Directory using Regex and output filtered files to another directory

I am simply trying to create a python 3 program that runs through all .sql files in a specific directory and then apply my regex that adds ; after a certain instance and write the changes made to the file to a separate directory with their respective file names as the same.
So, if I had file1.sql and file2.sql in "/home/files" directory, after I run the program, the output should write those two files to "/home/new_files" without changes the content of the original files.
Here is my code:
import glob
import re
folder_path = "/home/files/d_d"
file_pattern = "/*sql"
folder_contents = glob.glob(folder_path + file_pattern)
for file in folder_contents:
print("Checking", file)
for file in folder_contents:
read_file = open(file, 'rt',encoding='latin-1').read()
#words=read_file.split()
with open(read_file,"w") as output:
output.write(re.sub(r'(TBLPROPERTIES \(.*?\))', r'\1;', f, flags=re.DOTALL))
I receive an error of File name too long:"CREATE EXTERNAL TABLe" and also I am not too sure where I would put my output path (/home/files/new_dd)in my code.
Any ideas or suggestions?
With read_file = open(file, 'rt',encoding='latin-1').read() the whole content of the file was being used as the file descriptor. The code provided here iterate over the files names found with glob.glob pattern open to read, process data, and open to write (assuming that a folder newfile_sqls already exist,
if not, an error would rise FileNotFoundError: [Errno 2] No such file or directory).
import glob
import os
import re
folder_path = "original_sqls"
#original_sqls\file1.sql, original_sqls\file2.sql, original_sqls\file3.sql
file_pattern = "*sql"
# new/modified files folder
output_path = "newfile_sqls"
folder_contents = glob.glob(os.path.join(folder_path,file_pattern))
# iterate over file names
for file_ in [os.path.basename(f) for f in folder_contents]:
# open to read
with open(os.path.join(folder_path,file_), "r") as inputf:
read_file = inputf.read()
# use variable 'read_file' here
tmp = re.sub(r'(TBLPROPERTIES \(.*?\))', r'\1;', read_file, flags=re.DOTALL)
# open to write to (previouly created) new folder
with open(os.path.join(output_path,file_), "w") as output:
output.writelines(tmp)

opening and reading all the files in a directory in python - python beginner

I'd like to read the contents of every file in a folder/directory and then print them at the end (I eventually want to pick out bits and pieces from the individual files and put them in a separate document)
So far I have this code
import os
path = 'results/'
fileList = os.listdir(path)
for i in fileList:
file = open(os.path.join('results/'+ i), 'r')
allLines = file.readlines()
print(allLines)
at the end I dont get any errors but it only prints the contents of the last file in my folder in a series of strings and I want to make sure its reading every file so I can then access the data I want from each file. I've looked online and I cant find where I'm going wrong. Is there any way of making sure the loop is iterating over all my files and reading all of them?
also i get the same result when I use
file = open(os.path.join('results/',i), 'r')
in the 5th line
Please help I'm so lost
Thanks!!
Separate the different functions of the thing you want to do.
Use generators wherever possible. Especially if there are a lot of files or large files
Imports
from pathlib import Path
import sys
Deciding which files to process:
source_dir = Path('results/')
files = source_dir.iterdir()
[Optional] Filter files
For example, if you only need files with extension .ext
files = source_dir.glob('*.ext')
Process files
def process_files(files):
for file in files:
with file.open('r') as file_handle :
for line in file_handle:
# do your thing
yield line
Save the lines you want to keep
def save_lines(lines, output_file=sys.std_out):
for line in lines:
output_file.write(line)
you forgot indentation at this line allLines = file.readlines()
and maybe you can try that :
import os
allLines = []
path = 'results/'
fileList = os.listdir(path)
for file in fileList:
file = open(os.path.join('results/'+ i), 'r')
allLines.append(file.read())
print(allLines)
You forgot to indent this line allLines.append(file.read()).
Because it was outside the loop, it only appended the file variable to the list after the for loop was finished. So it only appended the last value of the file variable that remained after the loop. Also, you should not use readlines() in this way. Just use read() instead;
import os
allLines = []
path = 'results/'
fileList = os.listdir(path)
for file in fileList:
file = open(os.path.join('results/'+ i), 'r')
allLines.append(file.read())
print(allLines)
This also creates a file containing all the files you wanted to print.
rootdir= your folder, like 'C:\\Users\\you\\folder\\'
import os
f = open('final_file.txt', 'a')
for root, dirs, files in os.walk(rootdir):
for filename in files:
data = open(full_name).read()
f.write(data + "\n")
f.close()
This is a similar case, with more features: Copying selected lines from files in different directories to another file

Categories