Concatenate .txt files. Write content in one .txt file - python

I have some .txt files in a folder. I need to collect their content all in one .txt file. I'm working with Python and tried:
import os
rootdir = "\\path_to_folder\\"
for files in os.walk(rootdir):
with open ("out.txt", 'w') as outfile:
for fname in files:
with open(fname) as infile:
for line in infile:
outfile.write(line)
but did not work. The 'out.txt' is generated but the code never ends. Any advice? Thanks in advance.

os.walk returns tuples, not filenames:
with open ("out.txt", 'w') as outfile:
for root, dirs, files in os.walk(rootdir):
for fname in files:
with open(os.path.join(root, fname)) as infile:
for line in infile:
outfile.write(line)
Also you should open outfile in the beginning, not in each loop.

This solved my problem. The generated 'out.txt' file is just 151KB.
file_list = os.listdir("\\path_to_folder\\")
with open('out.txt', 'a+') as outfile:
for fname in file_list:
with open(fname) as infile:
outfile.write(infile.read())
Thanks everybody.

Related

Python script to loop for files in subdirectories in order to change text and extension

I'm trying to loop through files in multiple subdirectories in order to :
1- Add some text inside the files (ending with .ext)
2- Change the extension of each file from .ext to .ext2
The script works fine when I have only one subdir in the main directory, but when I try to run the script on multiple subdirs it says:
line 8, in
with open(name, "r") as f:
FileNotFoundError: [Errno 2] No such file or directory: "here the name of the subdir"
import os
directory = 'C:\\Users\\folder\\subfolders'
for dir, subdirs, files in os.walk(directory):
for name in files:
if name.endswith((".ext")):
with open(name, "r") as f:
XMLContent = f.readlines()
XMLContent.insert(6, '<XMLFormat>\n')
XMLContent.insert(40, '\n</XMLFormat>')
with open(name, "w") as f:
XMLContent = "".join(XMLContent)
f.write(XMLContent)
os.rename(os.path.join(dir, name), os.path.join(dir, name[:name.index('.ext')] +".ext1"))
Above is a screenshot of the sub dirs I have in the folder (1.Modified).
I've also created a new folder called all and put in it three folders and for each folder, I've created 2 files of .ext type.
So, I was able to write inside each file of them and change its name as well.
import os
for root, dirs, files in os.walk("/Users/ghaith/Desktop/test/all"):
for file in files:
if file.endswith('.ext'):
path = root + '/' + file
with open(path, "r") as f:
content = f.readlines()
content.insert(1, '<XMLFormat>\n')
content.insert(3, '\n</XMLFormat>')
with open(path, "w") as f:
content = "".join(content)
f.write(content)
os.rename(path, path+'2')
Output:
< XMLFormat >
< /XMLFormat >
you need to pass the directory to open the file
with open(os.path.join(directory, name), "r") as f:
But, I think the best way is use the os.listdir() to loop in the directory
for item in os.listdir(directory):
if item.endswith(".ext"):
with open(os.path.join(directory, item), "r") as r:

How to concatenate a large number of text files in python

I have 367 text files I want to concatenate into one long text file. I was thinking I could go about this by doing:
filenames = ["file1.txt","file2.txt","file3.txt"...]
with open("output_file.txt","w") as outfile:
for filename in filenames:
with open(filename) as infile:
contents = infile.read()
outfile.write(contents)
However, this would require me to write out every single text file name, which would be very tedious. Is there an easier way to concatenate a large number of text files using Python? Thank you!
Use glob.glob to build list files and shutil.copyfileobj for a more efficient copy.
import shutil
import glob
filenames = glob.glob("*.txt") # or "file*.txt"
with open("output_file.txt", "wb") as outfile:
for filename in filenames:
with open(filename, "rb") as infile:
shutil.copyfileobj(infile, outfile)
Assuming these files all have a counter (1-367) in the format filename-count-.extension, we can generate 367 files using list comprehension.
filenames = [f'file{i}.txt' for i in range(1,368)]
with open("output_file.txt","w") as outfile:
for filename in filenames:
with open(filename) as infile:
contents = infile.read()
outfile.write(contents)
another option you could use is os.listdir.
import os
for i in os.listdir('files'):
print(i)
output
text1.txt
text2.txt

Python - loop through subfolders and files in a directory without ignoring the subfolder

I have read all the stack exchange help files on looping through subfolders, as as well as the os documentation, but I am still stuck. I am trying to loop over files in subfolders, open each file, extract the first number in the first line, copy the file to a different subfolder(with the same name but in the output directory) and rename the file copy with the number as a suffix.
import os
import re
outputpath = "C:/Users/Heather/Dropbox/T_Files/Raw_FRUS_Data/Wisconsin_Copies_With_PageNumbers"
inputpath = "C:/Users/Heather/Dropbox/T_Files/Raw_FRUS_Data/FRUS_Wisconsin"
suffix=".txt"
for root, dirs, files in os.walk(inputpath):
for file in files:
file_path = os.path.join(root, file)
foldername=os.path.split(os.path.dirname(file_path))[1]
filebname=os.path.splitext(file)[0]
filename=filebname + "_"
f=open(os.path.join(root,file),'r')
data=f.readlines()
if data is None:
f.close()
else:
with open(os.path.join(root,file),'r') as f:
for line in f:
s=re.search(r'\d+',line)
if s:
pagenum=(s.group())
break
with open(os.path.join(outputpath, foldername,filename+pagenum+suffix), 'w') as f1:
with open(os.path.join(root,file),'r') as f:
for line in f:
f1.write(line)
I expect the result to be copies of the files in the input directory placed in the corresponding subfolder in the output directory, renamed with a suffix, such as "005_2", where 005 is the original file name, and 2 is the number the python code extracted from it.
The error I get seems to indicates that I am not looping through files correctly. I know the code for extracting the first number and renaming the file works because I tested it on a single file. But using os.walk to loop through multiple subfolders is not working, and I can't figure out what I am doing wrong. Here is the error:
File "<ipython-input-1-614e2851f16a>", line 23, in <module>
with open(os.path.join(outputpath, foldername,filename+pagenum+suffix), 'w') as f1:
IOError: [Errno 2] No such file or directory: 'C:/Users/Heather/Dropbox/T_Files/Raw_FRUS_Data/Wisconsin_Copies_With_PageNumbers\\FRUS_Wisconsin\\.dropbox_1473986809.txt'
Well, this isn't eloquent, but it worked
from glob import glob
folderlist=glob("C:\\...FRUS_Wisconsin*\\")
outputpath = "C:\\..\Wisconsin_Copies_With_PageNumbers"
for folder in folderlist:
foldername = str(folder.split('\\')[7])
for root, dirs, files in os.walk(folder):
for file in files:
filebname=os.path.splitext(file)[0]
filename=filebname + "_"
if not filename.startswith('._'):
with open(os.path.join(root,file),'r') as f:
for line in f:
s=re.search(r'\d+',line)
if s:
pagenum=(s.group())
break
with open(os.path.join(outputpath, foldername,filename+pagenum+suffix), 'w') as f1:
with open(os.path.join(root,file),'r') as f:
for line in f:
f1.write(line)

generalize python script to run on all files in a directory

I have the following python script:
with open('ein.csv', 'r') as istr:
with open('aus.csv', 'w') as ostr:
for line in istr:
line = line.rstrip('\n') + ',1'
print(line, file=ostr)
how could this be generalized to run on all the files in a directory and output a seperate file for each one?
maybe have a function like this:
for phyle in list_files(.):
with open(phyle, 'r') as istr:
with open('ausput_xyz.csv', 'w') as ostr:
for line in istr:
line = line.rstrip('\n') + ',1'
print(line, file=ostr)
def list_files(path):
# returns a list of names (with extension, without full path) of all files
# in folder path
files = []
for name in os.listdir(path):
if os.path.isfile(os.path.join(path, name)):
files.append(name)
return files
Just put the code in a function and call it:
def process(infilename):
outfilename = os.path.splitext(infilename)[0] + "-out.csv"
with open(infilename, 'r') as istr:
with open(outfilename, 'w') as ostr:
for line in istr:
line = line.rstrip('\n') + ',1'
print(line, file=ostr)
def process_files(path):
for name in os.listdir(path):
if os.path.isfile(os.path.join(path, name)):
process(name)
In a directory with input files "abc.csv", "xyz.csv" this code will create output files named "abc-out.csv" and "xyz-out.csv".
Note that os.listdir(path) is called just once during the execution, so the list of files to process will not include the newly created output files.
First off, as a more pythonic way for dealing with csv files you better to use csv module and use with statement for opening the files which will close the file object automatically at the end of the block. And use os.walk() function to iterate over the files and directories of a specific path:
import csv
import os
for path_name, dirs, files in os.walk('relative_path'):
for file_name in files:
with open(file_name) as inp,open('{}.csv'.format(file_name),'wb') as out:
spamwriter = csv.writer(out, delimiter=',')
for line in inp:
spamwriter.writerow(line) # or line.split() with a specific delimiter
Note that if your script is not in a same path with your files directory you can add the path to the leading of your file name while you want to open them.
with open(path_name+'/'+file_name),...

Python Error with processing multiple files

I am new to python. I am trying to read multiple files one by one from subfolders, do some processing and output. Below is my code:
import os
rootdir = 'dir'
for subdir, dirs, files in os.walk(rootdir):
for fname in files:
print os.path.join(subdir, fname)
f = open(fname, 'r')
lines = f.readlines()
f.close()
f = open(fname, 'w')
for line in lines:
f['X1'] = f['X1'].astype(str)
But I am having the following error:
IOError: [Errno2] No such file or directory : 'test.txt'
Your problem is that you are doing this:
os.path.join(subdir, fname)
But you are not assigning it to a variable, so when you do this:
f=open(fname,'r')
You are still only using the filename.
You should do this:
file_path = os.path.join(subdir, fname)
So now you actually have the file path. Then this:
f=open(file_path,'r')

Categories