I want to write a program for this: In a folder I have n number of files; first read one file and perform some operation then store result in a separate file. Then read 2nd file, perform operation again and save result in new 2nd file. Do the same procedure for n number of files. The program reads all files one by one and stores results of each file separately. Please give examples how I can do it.
I think what you miss is how to retrieve all the files in that directory.
To do so, use the glob module.
Here is an example which will duplicate all the files with extension *.txt to files with extension *.out
import glob
list_of_files = glob.glob('./*.txt') # create the list of file
for file_name in list_of_files:
FI = open(file_name, 'r')
FO = open(file_name.replace('txt', 'out'), 'w')
for line in FI:
FO.write(line)
FI.close()
FO.close()
import sys
# argv is your commandline arguments, argv[0] is your program name, so skip it
for n in sys.argv[1:]:
print(n) #print out the filename we are currently processing
input = open(n, "r")
output = open(n + ".out", "w")
# do some processing
input.close()
output.close()
Then call it like:
./foo.py bar.txt baz.txt
You may find the fileinput module useful. It is designed for exactly this problem.
I've just learned of the os.walk() command recently, and it may help you here.
It allows you to walk down a directory tree structure.
import os
OUTPUT_DIR = 'C:\\RESULTS'
for path, dirs, files in os.walk('.'):
for file in files:
read_f = open(os.join(path,file),'r')
write_f = open(os.path.join(OUTPUT_DIR,file))
# Do stuff
Combined answer incorporating directory or specific list of filenames arguments:
import sys
import os.path
import glob
def processFile(filename):
fileHandle = open(filename, "r")
for line in fileHandle:
# do some processing
pass
fileHandle.close()
def outputResults(filename):
output_filemask = "out"
fileHandle = open("%s.%s" % (filename, output_filemask), "w")
# do some processing
fileHandle.write('processed\n')
fileHandle.close()
def processFiles(args):
input_filemask = "log"
directory = args[1]
if os.path.isdir(directory):
print "processing a directory"
list_of_files = glob.glob('%s/*.%s' % (directory, input_filemask))
else:
print "processing a list of files"
list_of_files = sys.argv[1:]
for file_name in list_of_files:
print file_name
processFile(file_name)
outputResults(file_name)
if __name__ == '__main__':
if (len(sys.argv) > 1):
processFiles(sys.argv)
else:
print 'usage message'
from pylab import *
import csv
import os
import glob
import re
x=[]
y=[]
f=open("one.txt",'w')
for infile in glob.glob(('*.csv')):
# print "" +infile
csv23=csv2rec(""+infile,'rb',delimiter=',')
for line in csv23:
x.append(line[1])
# print len(x)
for i in range(3000,8000):
y.append(x[i])
print ""+infile,"\t",mean(y)
print >>f,""+infile,"\t\t",mean(y)
del y[:len(y)]
del x[:len(x)]
I know I saw this double with open() somewhere but couldn't remember where. So I built a small example in case someone needs.
""" A module to clean code(js, py, json or whatever) files saved as .txt files to
be used in HTML code blocks. """
from os import listdir
from os.path import abspath, dirname, splitext
from re import sub, MULTILINE
def cleanForHTML():
""" This function will search a directory text files to be edited. """
## define some regex for our search and replace. We are looking for <, > and &
## To replaced with &ls;, > and &. We might want to replace proper whitespace
## chars to as well? (r'\t', ' ') and (f'\n', '<br>')
search_ = ((r'(<)', '<'), (r'(>)', '>'), (r'(&)', '&'))
## Read and loop our file location. Our location is the same one that our python file is in.
for loc in listdir(abspath(dirname(__file__))):
## Here we split our filename into it's parts ('fileName', '.txt')
name = splitext(loc)
if name[1] == '.txt':
## we found our .txt file so we can start file operations.
with open(loc, 'r') as file_1, open(f'{name[0]}(fixed){name[1]}', 'w') as file_2:
## read our first file
retFile = file_1.read()
## find and replace some text.
for find_ in search_:
retFile = sub(find_[0], find_[1], retFile, 0, MULTILINE)
## finally we can write to our newly created text file.
file_2.write(retFile)
This thing also works for reading multiple files, my file name is fedaralist_1.txt and federalist_2.txt and like this, I have 84 files till fedaralist_84.txt
And I'm reading the files as f.
for file in filename:
with open(f'federalist_{file}.txt','r') as f:
f.read()
Related
I am using os.walk in python 2.7 to open multiple files, then, add all lines of interest of those files to a list. Later I'd want to edit those lines with fileinput and close it. How can I achieve this? Using the code below is how I'm opening the files:
import os
import fnmatch
import fileinput
lines = []
def openFiles():
for root, dirs, files in os.walk('/home/test1/'):
for lists in fnmatch.filter(files, "*.txt"):
filepath = os.path.join(root, lists)
print filepath
with open(filepath, "r") as sources:#opens 8 files and read their lines
#edit = fileinput.input(filepath, inplace=1)
for line in sources:
if line.startswith('xe') :
lines.append(line)
Then later, for each lines that start with xe, I'd like to add a # in front of it then close that file. I'd like to do that in a different function.
Here's the I way I do it, adding to your code:
import os
import fnmatch
import fileinput
def openFiles(dir):
filePaths = []
for root, dirs, files in os.walk(dir):
for textFile in fnmatch.filter(files, "*.txt"):
filepath = os.path.join(root, textFile)
filePaths.append(filepath)
return filePaths
def prefixLines(filepaths, chartoPrefix, prefixWith):
res = ''
for filepath in filepaths:
# Read file
with open(filepath, 'r') as f:
for line in f:
if line.startswith(chartoPrefix):
res += prefixWith + line
else:
res += line
# Write to file
with open(filepath, 'w') as f:
f.write(res)
res = '' # Rest res
prefixLines(openFiles(r'/home/test1/'), 'xe', '#')
prefixLines suffers from many shortcomings:
Because we read all the lines of files and store them in res, we
may ran out of memory for large files.
If somehow the programmer forgot to indent res = '' in the
right block or if res was completely omitted and the code ran on
actual files that the user needs, you'll end up writing the contents
of the previous read file to the next file and the last
file will have the contents of all the read files. That's why you
have use this code in a testing environment or use it cautiously.
This code only serves to demonstrate how you could achieve your desired effects, prefixing file lines that starts with a string with another string. Therefore, a slight improvement of this code is recommended. For example, instead of reading all the contents of the file and storing them at res you could simply save the line number that needs to be prefixed and thus eliminating the need to load all the data into memory. enumerate could also be helpful to return the file number, it returns an iterable in 2.7. By obviating res not only do we save memory, but also eliminate the shortcoming in bullet 2.
I ended up doing it this way. But I'm using classes in my main code so It's split into 2 functions instead of one. In my main code, I used a list to hold all the file paths and use fileinput to open each filepaths from the list this way for line in fileinput.FileInput(pathlist, inplace=1): do something. I do thank #direprobs for her answer, as she shed some light on how I'm supposed to do this.
import fnmatch
import fileinput
import os
import sys
def openFiles():
for dirpath, dirs, files in os.walk('/home/test1/'):
for filename in fnmatch.filter(files, "*.txt"):
filepaths = os.path.join(dirpath, filename)
for line in fileinput.FileInput(filepaths, inplace=1):
if line.startswith("xe"):
add = "# {}".format(line)
line = line.replace(line, add)
sys.stdout.write(line)
fileinput.close()
openFiles()
I have a directory /directory/some_directory/ and in that directory I have a set of files. Those files are named in the following format: <letter>-<number>_<date>-<time>_<dataidentifier>.log, for example:
ABC1-123_20162005-171738_somestring.log
DE-456_20162005-171738_somestring.log
ABC1-123_20162005-153416_somestring.log
FG-1098_20162005-171738_somestring.log
ABC1-123_20162005-031738_somestring.log
DE-456_20162005-171738_somestring.log
I would like to read those a subset of those files (for example, read only files named as ABC1-123*.log) and export all their contents to a single csv file (for example, output.csv), that is, a CSV file that will have all the data from the inidividual files collectively.
The code that I have written so far:
#!/usr/bin/env python
import os
file_directory=os.getcwd()
m_class="ABC1"
m_id="123"
device=m_class+"-"+m_id
for data_file in sorted(os.listdir(file_dir)):
if str(device)+"*" in os.listdir(file_dir):
print data_file
I don't know how to read a only a subset of filtered files and also how to export them to a common csv file.
How can I achieve this?
just use re lib to match file name pattern, and use csv lib to export.
Only a few adjustments, You were close
filesFromDir = os.listdir(os.getcwd())
fileList = [file for file in filesFromDir if file.startswith(device)]
f = open("LogOutput.csv", "ab")
for file in fileList:
#print "Processing", file
with open(file, "rb") as log_file:
txt = log_file.read()
f.write(txt)
f.write("\n")
f.close()
Your question could be better stated, based on your current code snipet, I'll assume that you want to:
Filter files in a directory based on glob pattern.
Concatenate their contents to a file named output.csv.
In python you can achieve (1.) by using glob to list filenames.
import glob
for filename in glob.glob('foo*bar'):
print filename
That would print all files starting with foo and ending with bar in
the current directory.
For (2.) you just read the file and write its content to your desired
output, using python's open() builtin function:
open('filename', 'r')
(Using 'r' as the mode you are asking python to open the file for
"reading", using 'w' you are asking python to open the file for
"writing".)
The final code would look like the following:
import glob
import sys
device = 'ABC1-123'
with open('output.csv', 'w') as output:
for filename in glob.glob(device+'*'):
with open(filename, 'r') as input:
output.write(input.read())
You can use the os module to list the files.
import os
files = os.listdir(os.getcwd())
m_class = "ABC1"
m_id = "123"
device = m_class + "-" + m_id
file_extension = ".log"
# filter the files by their extension and the starting name
files = [x for x in files if x.startswith(device) and x.endswith(file_extension)]
f = open("output.csv", "a")
for file in files:
with open(file, "r") as data_file:
f.write(data_file.read())
f.write(",\n")
f.close()
New to Python...
I'm trying to have python take a text file of file names (new name on each row), and store them as strings ...
i.e
import os, shutil
files_to_find = []
with open('C:\\pathtofile\\lostfiles.txt') as fh:
for row in fh:
files_to_find.append(row.strip)
...in order to search for these files in directories and then copy any found files somewhere else...
for root, dirs, files in os.walk('D:\\'):
for _file in files:
if _file in files_to_find:
print ("Found file in: " + str(root))
shutil.copy(os.path.abspath(root + '/' + _file), 'C:\\destination')
print ("process completed")
Despite knowing these files exist, the script runs without any errors but without finding any files.
I added...
print (files_to_find)
...after the first block of code to see if it was finding anything and saw screeds of "built-in method strip of str object at 0x00000000037FC730>,
Does this tell me it's not successfully creating strings to compare file names against? I wonder where I'm going wrong?
Use array to create a list of files.
import os
import sys
import glob
import shutil
def file_names(self,filepattern,dir):
os.chdir(dir)
count = len(glob.glob(filepattern))
file_list = []
for line in sorted(glob.glob(filepattern)):
line = line.split("/")
line = line[-1]
file_list.append(line)
return file_list
The loop over the array list to compare.
Brand new to Python, first time poster, be easy on me please!
I would like to insert a line of text into all files of a specific extension (in the example, .mod) within the current folder. It can point to a specific folder if that is easier.
Below is something that I copied and modified, it is doing exactly what I need for one specific file, the part about replacing sit with SIT is completely unnecessary, but if I remove it the program doesn't work. I have no idea why that is, but I can live with that.
import sys, fileinput
for i, line in enumerate(fileinput.input('filename.mod', inplace=1)):
sys.stdout.write(line.replace('sit', 'SIT'))
if i == 30: sys.stdout.write('TextToInsertIntoLine32' '\n') #adds new line and text to line 32
My question is, how do I run this for all files in a directory? I have tried replacing the filename with sys.argv[1] and calling the script from the command line with '*.mod' which did not work for me. Any help would be appreciated.
You can do like this:
import os
folder = '...' # your directory
files = [f for f in os.listdir(folder) if f.endswith('.mod')]
Then you can get a list of files with the extension '.mod', you can run your function for all files.
You could use glob.glob to list all the files in the current working directory whose filename ends with .mod:
import fileinput
import glob
import sys
for line in fileinput.input(glob.glob('*.mod'), inplace=True):
sys.stdout.write(line.replace('sit', 'SIT'))
if fileinput.filelineno() == 32:
#adds new line and text to line 32
sys.stdout.write('TextToInsertIntoLine32' '\n')
You can use os.walk function:
for root, dirs, files in os.walk("/mydir"):
for file in files:
if file.endswith(".mod"):
filepath = os.path.join(root, file)
with open(filepath, 'r', encoding='utf-8') as f:
text = f.read()
print('%s read' % filepath)
with open(filepath, 'w', encoding='utf-8') as f:
f.write(text.replace('sit', 'SIT'))
print('%s updated' % filepath)
Easy: you don't have to specify a filename. fileinput will take the filenams from sys.argv by default. You don't even have to use enumerate, as fileinput numbers the lines in each file:
import sys, fileinput
for line in fileinput.input(inplace=True):
sys.stdout.write(line.replace('sit', 'SIT'))
if fileinput.filelineno() == 30:
sys.stdout.write('TextToInsertIntoLine32' '\n')
I have a python script parse.py, which in the script open a file, say file1, and then do something maybe print out the total number of characters.
filename = 'file1'
f = open(filename, 'r')
content = f.read()
print filename, len(content)
Right now, I am using stdout to direct the result to my output file - output
python parse.py >> output
However, I don't want to do this file by file manually, is there a way to take care of every single file automatically? Like
ls | awk '{print}' | python parse.py >> output
Then the problem is how could I read the file name from standardin?
or there are already some built-in functions to do the ls and those kind of work easily?
Thanks!
Os
You can list all files in the current directory using os.listdir:
import os
for filename in os.listdir(os.getcwd()):
with open(os.path.join(os.getcwd(), filename), 'r') as f: # open in readonly mode
# do your stuff
Glob
Or you can list only some files, depending on the file pattern using the glob module:
import os, glob
for filename in glob.glob('*.txt'):
with open(os.path.join(os.getcwd(), filename), 'r') as f: # open in readonly mode
# do your stuff
It doesn't have to be the current directory you can list them in any path you want:
import os, glob
path = '/some/path/to/file'
for filename in glob.glob(os.path.join(path, '*.txt')):
with open(os.path.join(os.getcwd(), filename), 'r') as f: # open in readonly mode
# do your stuff
Pipe
Or you can even use the pipe as you specified using fileinput
import fileinput
for line in fileinput.input():
# do your stuff
And you can then use it with piping:
ls -1 | python parse.py
You should try using os.walk.
import os
yourpath = 'path'
for root, dirs, files in os.walk(yourpath, topdown=False):
for name in files:
print(os.path.join(root, name))
stuff
for name in dirs:
print(os.path.join(root, name))
stuff
I was looking for this answer:
import os,glob
folder_path = '/some/path/to/file'
for filename in glob.glob(os.path.join(folder_path, '*.htm')):
with open(filename, 'r') as f:
text = f.read()
print (filename)
print (len(text))
you can choose as well '*.txt' or other ends of your filename
You can actually just use os module to do both:
list all files in a folder
sort files by file type, file name etc.
Here's a simple example:
import os #os module imported here
location = os.getcwd() # get present working directory location here
counter = 0 #keep a count of all files found
csvfiles = [] #list to store all csv files found at location
filebeginwithhello = [] # list to keep all files that begin with 'hello'
otherfiles = [] #list to keep any other file that do not match the criteria
for file in os.listdir(location):
try:
if file.endswith(".csv"):
print "csv file found:\t", file
csvfiles.append(str(file))
counter = counter+1
elif file.startswith("hello") and file.endswith(".csv"): #because some files may start with hello and also be a csv file
print "csv file found:\t", file
csvfiles.append(str(file))
counter = counter+1
elif file.startswith("hello"):
print "hello files found: \t", file
filebeginwithhello.append(file)
counter = counter+1
else:
otherfiles.append(file)
counter = counter+1
except Exception as e:
raise e
print "No files found here!"
print "Total files found:\t", counter
Now you have not only listed all the files in a folder but also have them (optionally) sorted by starting name, file type and others. Just now iterate over each list and do your stuff.
import pyautogui
import keyboard
import time
import os
import pyperclip
os.chdir("target directory")
# get the current directory
cwd=os.getcwd()
files=[]
for i in os.walk(cwd):
for j in i[2]:
files.append(os.path.abspath(j))
os.startfile("C:\Program Files (x86)\Adobe\Acrobat 11.0\Acrobat\Acrobat.exe")
time.sleep(1)
for i in files:
print(i)
pyperclip.copy(i)
keyboard.press('ctrl')
keyboard.press_and_release('o')
keyboard.release('ctrl')
time.sleep(1)
keyboard.press('ctrl')
keyboard.press_and_release('v')
keyboard.release('ctrl')
time.sleep(1)
keyboard.press_and_release('enter')
keyboard.press('ctrl')
keyboard.press_and_release('p')
keyboard.release('ctrl')
keyboard.press_and_release('enter')
time.sleep(3)
keyboard.press('ctrl')
keyboard.press_and_release('w')
keyboard.release('ctrl')
pyperclip.copy('')
The code below reads for any text files available in the directory which contains the script we are running. Then it opens every text file and stores the words of the text line into a list. After store the words we print each word line by line
import os, fnmatch
listOfFiles = os.listdir('.')
pattern = "*.txt"
store = []
for entry in listOfFiles:
if fnmatch.fnmatch(entry, pattern):
_fileName = open(entry,"r")
if _fileName.mode == "r":
content = _fileName.read()
contentList = content.split(" ")
for i in contentList:
if i != '\n' and i != "\r\n":
store.append(i)
for i in store:
print(i)
If you would like to open files in a directory and append them into a list, do this:
mylist=[]
for filename in os.listdir('path/here/'):
with open(os.path.join('path/here/', filename), 'r') as f:
mylist.append(f.read())
you may try another approach of using os.walk and os.path.join which is a little different from the above options:
for root, dirs, files in os.walk(EnterYourPath):
for name in files:
with open(os.path.join(root,name))as f:
text = f.read()
text variable includes all the files in the folder in the directory.