I am using os.walk in python 2.7 to open multiple files, then, add all lines of interest of those files to a list. Later I'd want to edit those lines with fileinput and close it. How can I achieve this? Using the code below is how I'm opening the files:
import os
import fnmatch
import fileinput
lines = []
def openFiles():
for root, dirs, files in os.walk('/home/test1/'):
for lists in fnmatch.filter(files, "*.txt"):
filepath = os.path.join(root, lists)
print filepath
with open(filepath, "r") as sources:#opens 8 files and read their lines
#edit = fileinput.input(filepath, inplace=1)
for line in sources:
if line.startswith('xe') :
lines.append(line)
Then later, for each lines that start with xe, I'd like to add a # in front of it then close that file. I'd like to do that in a different function.
Here's the I way I do it, adding to your code:
import os
import fnmatch
import fileinput
def openFiles(dir):
filePaths = []
for root, dirs, files in os.walk(dir):
for textFile in fnmatch.filter(files, "*.txt"):
filepath = os.path.join(root, textFile)
filePaths.append(filepath)
return filePaths
def prefixLines(filepaths, chartoPrefix, prefixWith):
res = ''
for filepath in filepaths:
# Read file
with open(filepath, 'r') as f:
for line in f:
if line.startswith(chartoPrefix):
res += prefixWith + line
else:
res += line
# Write to file
with open(filepath, 'w') as f:
f.write(res)
res = '' # Rest res
prefixLines(openFiles(r'/home/test1/'), 'xe', '#')
prefixLines suffers from many shortcomings:
Because we read all the lines of files and store them in res, we
may ran out of memory for large files.
If somehow the programmer forgot to indent res = '' in the
right block or if res was completely omitted and the code ran on
actual files that the user needs, you'll end up writing the contents
of the previous read file to the next file and the last
file will have the contents of all the read files. That's why you
have use this code in a testing environment or use it cautiously.
This code only serves to demonstrate how you could achieve your desired effects, prefixing file lines that starts with a string with another string. Therefore, a slight improvement of this code is recommended. For example, instead of reading all the contents of the file and storing them at res you could simply save the line number that needs to be prefixed and thus eliminating the need to load all the data into memory. enumerate could also be helpful to return the file number, it returns an iterable in 2.7. By obviating res not only do we save memory, but also eliminate the shortcoming in bullet 2.
I ended up doing it this way. But I'm using classes in my main code so It's split into 2 functions instead of one. In my main code, I used a list to hold all the file paths and use fileinput to open each filepaths from the list this way for line in fileinput.FileInput(pathlist, inplace=1): do something. I do thank #direprobs for her answer, as she shed some light on how I'm supposed to do this.
import fnmatch
import fileinput
import os
import sys
def openFiles():
for dirpath, dirs, files in os.walk('/home/test1/'):
for filename in fnmatch.filter(files, "*.txt"):
filepaths = os.path.join(dirpath, filename)
for line in fileinput.FileInput(filepaths, inplace=1):
if line.startswith("xe"):
add = "# {}".format(line)
line = line.replace(line, add)
sys.stdout.write(line)
fileinput.close()
openFiles()
Related
I have below list of text files , I wanted to combine group of files like below
Inv030001.txt - should have all data of files starting with Inv030001
Inv030002.txt - should have all data of files starting with Inv030002
I tried below code but it's not working
filenames = glob(textfile_dir+'*.txt')
for fname in filenames:
filename = fname.split('\\')[-1]
current_invoice_number = (filename.split('_')[0]).split('.')[0]
prev_invoice_number = current_invoice_number
with open(textfile_dir + current_invoice_number+'.txt', 'w') as outfile:
for eachfile in fnmatch.filter(os.listdir(textfile_dir), '*[!'+current_invoice_number+'].txt'):
current_invoice_number = (eachfile.split('_')[0]).split('.')[0]
if(current_invoice_number == prev_invoice_number):
with open(textfile_dir+eachfile) as infile:
for line in infile:
outfile.write(line)
prev_invoice_number = current_invoice_number
else:
with open(textfile_dir+eachfile) as infile:
for line in infile:
outfile.write(line)
prev_invoice_number = current_invoice_number
#break;
Does this answer your question? My version will append the data from "like" invoice numbers to a .txt file named with just the invoice number. In other words, anything that starts with "Inv030001" will have it's contents appended to "Inv030001.txt". The idea being that you likely don't want to overwrite files and possibly destroy them if your write logic had a mistake.
I actually recreated your files to test this. I did exactly what I suggested you do. I just treated every part as a separate task and built it up to this, and in doing that the script became far less verbose and convoluted. I labeled all of my comments with task to pound it in that this is just a series of very simple things.
I also renamed your vars to what they actually are. For instance, filenames aren't filenames, at all. They are entire paths.
import os
from glob import glob
#you'll have to change this path to yours
root = os.path.join(os.getcwd(), 'texts/')
#sorting this may be redundant
paths = sorted(glob(root+'*.txt'))
for path in paths:
#task: get filename
filename = path.split('\\')[-1]
#task: get invoice number
invnum = filename.split('_')[0]
#task: open in and out files
with open(f'{root}{invnum}.txt', 'a') as out_, open(path, 'r') as in_:
#task: append in contents to out
out_.write(in_.read())
Your code may have had a little too much complications in it. And so, the idea is that for every file in the directory, just add it's contents (that is, append) to the invoice file.
from glob import glob, fnmatch
import os
textfile_dir="invs" + os.sep # # I changed this to os.sep since I'm on a MAC - hopefully works in windows, too
filenames = glob(textfile_dir+'*.txt')
for fname in filenames:
filename = fname.split(os.sep)[-1]
current_invoice_number = (filename.split('_')[0]).split('.')[0]
with open(textfile_dir + current_invoice_number+'.txt', 'a') as outfile:
with open(fname) as infile:
for line in infile:
outfile.write(line)
Some room for improvement:
If you created your accumulation files in a different directory, there would be less of a chance of you picking them up when you run this program again (we are using append 'a' when we open the files for writing.
The order of the files is not preserved with glob (AFAIK). This may not be great for having deterministic results.
Below is the working code, if someone is looking for same solution
filenames = glob(textfile_dir+'*.txt')
dd = defaultdict(list)
for filename in filenames:
name, ext = os.path.splitext(filename)
name = name.split('\\')[-1].split('_')[0]
dd[name].append(filename)
for key, fnames in dd.items():
with open(textfile_dir+key+'.txt', "w") as newfile:
for line in fileinput.FileInput(fnames):
newfile.write(line)
I have a folder which has a text files in it. I want to be able to put in a path to this file and have python go through the folder, open each file and append its content to a list.
import os
folderpath = "/Users/myname/Downloads/files/"
inputlst = [os.listdir(folderpath)]
filenamelist = []
for filename in os.listdir(folderpath):
if filename.endswith(".txt"):
filenamelist.append(filename)
print(filename list)
So far this outputs:
['test1.txt', 'test2.txt', 'test3.txt', 'test4.txt', 'test5.txt', 'test6.txt', 'test7.txt', 'test8.txt', 'test9.txt', 'test10.txt']
I want to have the code take each of these files, open them and put all of its content into a single huge list not just print the file name. Is there any way to do this?
You should use file open for this.
Read here a documentation about its advanced options
Anyway, here is one way how you can do it:
import os
folderpath = r"yourfolderpath"
inputlst = [os.listdir(folderpath)]
filenamecontent = []
for filename in os.listdir(folderpath):
if filename.endswith(".txt"):
f = open(os.path.join(folderpath,filename), 'r')
filenamecontent.append(f.read())
print(filenamecontent)
If you are using Python3, you can use :
for filename in filename_list :
with open(filename,"r") as file_handler :
data = file_handler.read()
Please do mind that you will need the full (either relative or absolute) path to your file in filename
This way, your file handler will be automatically closed when you get out of the with scope.
More information around here : https://docs.python.org/fr/3/library/functions.html#open
On a side note, in order to list files, you might want to have a look to glob and use :
filename_list = glob.glob("/path/to/files/*.txt")
You can use fileinput
Code:
import fileinput
folderpath = "your_path_to_directory_where_files_are_stored"
file_list = [a for a in os.listdir(folderpath) if a.endswith(".txt")]
# This will return all the files which are in .txt format
get_all_files = fileinput.input(file_list)
with open("alldata.txt", 'ab+') as writefile:
for line in get_all_files:
writefile.write(line+'\n')
The above code will read all the data from .txt from a specified directory(folderpath) and store it in alldata.txt So, you wanted to have that long list, that list is now stored in .txt file if you want, else you can remove the write process.
Links:
https://docs.python.org/3/library/fileinput.html
https://docs.python.org/3/library/functions.html#open
I'd like to read the contents of every file in a folder/directory and then print them at the end (I eventually want to pick out bits and pieces from the individual files and put them in a separate document)
So far I have this code
import os
path = 'results/'
fileList = os.listdir(path)
for i in fileList:
file = open(os.path.join('results/'+ i), 'r')
allLines = file.readlines()
print(allLines)
at the end I dont get any errors but it only prints the contents of the last file in my folder in a series of strings and I want to make sure its reading every file so I can then access the data I want from each file. I've looked online and I cant find where I'm going wrong. Is there any way of making sure the loop is iterating over all my files and reading all of them?
also i get the same result when I use
file = open(os.path.join('results/',i), 'r')
in the 5th line
Please help I'm so lost
Thanks!!
Separate the different functions of the thing you want to do.
Use generators wherever possible. Especially if there are a lot of files or large files
Imports
from pathlib import Path
import sys
Deciding which files to process:
source_dir = Path('results/')
files = source_dir.iterdir()
[Optional] Filter files
For example, if you only need files with extension .ext
files = source_dir.glob('*.ext')
Process files
def process_files(files):
for file in files:
with file.open('r') as file_handle :
for line in file_handle:
# do your thing
yield line
Save the lines you want to keep
def save_lines(lines, output_file=sys.std_out):
for line in lines:
output_file.write(line)
you forgot indentation at this line allLines = file.readlines()
and maybe you can try that :
import os
allLines = []
path = 'results/'
fileList = os.listdir(path)
for file in fileList:
file = open(os.path.join('results/'+ i), 'r')
allLines.append(file.read())
print(allLines)
You forgot to indent this line allLines.append(file.read()).
Because it was outside the loop, it only appended the file variable to the list after the for loop was finished. So it only appended the last value of the file variable that remained after the loop. Also, you should not use readlines() in this way. Just use read() instead;
import os
allLines = []
path = 'results/'
fileList = os.listdir(path)
for file in fileList:
file = open(os.path.join('results/'+ i), 'r')
allLines.append(file.read())
print(allLines)
This also creates a file containing all the files you wanted to print.
rootdir= your folder, like 'C:\\Users\\you\\folder\\'
import os
f = open('final_file.txt', 'a')
for root, dirs, files in os.walk(rootdir):
for filename in files:
data = open(full_name).read()
f.write(data + "\n")
f.close()
This is a similar case, with more features: Copying selected lines from files in different directories to another file
Brand new to Python, first time poster, be easy on me please!
I would like to insert a line of text into all files of a specific extension (in the example, .mod) within the current folder. It can point to a specific folder if that is easier.
Below is something that I copied and modified, it is doing exactly what I need for one specific file, the part about replacing sit with SIT is completely unnecessary, but if I remove it the program doesn't work. I have no idea why that is, but I can live with that.
import sys, fileinput
for i, line in enumerate(fileinput.input('filename.mod', inplace=1)):
sys.stdout.write(line.replace('sit', 'SIT'))
if i == 30: sys.stdout.write('TextToInsertIntoLine32' '\n') #adds new line and text to line 32
My question is, how do I run this for all files in a directory? I have tried replacing the filename with sys.argv[1] and calling the script from the command line with '*.mod' which did not work for me. Any help would be appreciated.
You can do like this:
import os
folder = '...' # your directory
files = [f for f in os.listdir(folder) if f.endswith('.mod')]
Then you can get a list of files with the extension '.mod', you can run your function for all files.
You could use glob.glob to list all the files in the current working directory whose filename ends with .mod:
import fileinput
import glob
import sys
for line in fileinput.input(glob.glob('*.mod'), inplace=True):
sys.stdout.write(line.replace('sit', 'SIT'))
if fileinput.filelineno() == 32:
#adds new line and text to line 32
sys.stdout.write('TextToInsertIntoLine32' '\n')
You can use os.walk function:
for root, dirs, files in os.walk("/mydir"):
for file in files:
if file.endswith(".mod"):
filepath = os.path.join(root, file)
with open(filepath, 'r', encoding='utf-8') as f:
text = f.read()
print('%s read' % filepath)
with open(filepath, 'w', encoding='utf-8') as f:
f.write(text.replace('sit', 'SIT'))
print('%s updated' % filepath)
Easy: you don't have to specify a filename. fileinput will take the filenams from sys.argv by default. You don't even have to use enumerate, as fileinput numbers the lines in each file:
import sys, fileinput
for line in fileinput.input(inplace=True):
sys.stdout.write(line.replace('sit', 'SIT'))
if fileinput.filelineno() == 30:
sys.stdout.write('TextToInsertIntoLine32' '\n')
I want to write a program for this: In a folder I have n number of files; first read one file and perform some operation then store result in a separate file. Then read 2nd file, perform operation again and save result in new 2nd file. Do the same procedure for n number of files. The program reads all files one by one and stores results of each file separately. Please give examples how I can do it.
I think what you miss is how to retrieve all the files in that directory.
To do so, use the glob module.
Here is an example which will duplicate all the files with extension *.txt to files with extension *.out
import glob
list_of_files = glob.glob('./*.txt') # create the list of file
for file_name in list_of_files:
FI = open(file_name, 'r')
FO = open(file_name.replace('txt', 'out'), 'w')
for line in FI:
FO.write(line)
FI.close()
FO.close()
import sys
# argv is your commandline arguments, argv[0] is your program name, so skip it
for n in sys.argv[1:]:
print(n) #print out the filename we are currently processing
input = open(n, "r")
output = open(n + ".out", "w")
# do some processing
input.close()
output.close()
Then call it like:
./foo.py bar.txt baz.txt
You may find the fileinput module useful. It is designed for exactly this problem.
I've just learned of the os.walk() command recently, and it may help you here.
It allows you to walk down a directory tree structure.
import os
OUTPUT_DIR = 'C:\\RESULTS'
for path, dirs, files in os.walk('.'):
for file in files:
read_f = open(os.join(path,file),'r')
write_f = open(os.path.join(OUTPUT_DIR,file))
# Do stuff
Combined answer incorporating directory or specific list of filenames arguments:
import sys
import os.path
import glob
def processFile(filename):
fileHandle = open(filename, "r")
for line in fileHandle:
# do some processing
pass
fileHandle.close()
def outputResults(filename):
output_filemask = "out"
fileHandle = open("%s.%s" % (filename, output_filemask), "w")
# do some processing
fileHandle.write('processed\n')
fileHandle.close()
def processFiles(args):
input_filemask = "log"
directory = args[1]
if os.path.isdir(directory):
print "processing a directory"
list_of_files = glob.glob('%s/*.%s' % (directory, input_filemask))
else:
print "processing a list of files"
list_of_files = sys.argv[1:]
for file_name in list_of_files:
print file_name
processFile(file_name)
outputResults(file_name)
if __name__ == '__main__':
if (len(sys.argv) > 1):
processFiles(sys.argv)
else:
print 'usage message'
from pylab import *
import csv
import os
import glob
import re
x=[]
y=[]
f=open("one.txt",'w')
for infile in glob.glob(('*.csv')):
# print "" +infile
csv23=csv2rec(""+infile,'rb',delimiter=',')
for line in csv23:
x.append(line[1])
# print len(x)
for i in range(3000,8000):
y.append(x[i])
print ""+infile,"\t",mean(y)
print >>f,""+infile,"\t\t",mean(y)
del y[:len(y)]
del x[:len(x)]
I know I saw this double with open() somewhere but couldn't remember where. So I built a small example in case someone needs.
""" A module to clean code(js, py, json or whatever) files saved as .txt files to
be used in HTML code blocks. """
from os import listdir
from os.path import abspath, dirname, splitext
from re import sub, MULTILINE
def cleanForHTML():
""" This function will search a directory text files to be edited. """
## define some regex for our search and replace. We are looking for <, > and &
## To replaced with &ls;, > and &. We might want to replace proper whitespace
## chars to as well? (r'\t', ' ') and (f'\n', '<br>')
search_ = ((r'(<)', '<'), (r'(>)', '>'), (r'(&)', '&'))
## Read and loop our file location. Our location is the same one that our python file is in.
for loc in listdir(abspath(dirname(__file__))):
## Here we split our filename into it's parts ('fileName', '.txt')
name = splitext(loc)
if name[1] == '.txt':
## we found our .txt file so we can start file operations.
with open(loc, 'r') as file_1, open(f'{name[0]}(fixed){name[1]}', 'w') as file_2:
## read our first file
retFile = file_1.read()
## find and replace some text.
for find_ in search_:
retFile = sub(find_[0], find_[1], retFile, 0, MULTILINE)
## finally we can write to our newly created text file.
file_2.write(retFile)
This thing also works for reading multiple files, my file name is fedaralist_1.txt and federalist_2.txt and like this, I have 84 files till fedaralist_84.txt
And I'm reading the files as f.
for file in filename:
with open(f'federalist_{file}.txt','r') as f:
f.read()