Processing specific files in a directory in python

Processing specific files in a directory in python - python

I wrote a small python program which processes all the files in a directory. I want to restrict that to include only JSON files in that directory. For example, the line fname in fileList: in the code snipped below should only enumerate files with the extension *.json
#Set the directory you want to start from
rootDir = '/home/jas_parts'
for dirName, subdirList, fileList in os.walk(rootDir):
print('Found directory: %s' % dirName)
for fname in fileList:
print('\t%s' % fname)
fname='jas_parts/'+fname
with open(fname, 'r+') as f:
json_data = json.load(f)
event = json_data['Total']
print(event)

Since your file name is string you can use the str.endswith method to check if it is json file.
if fname.endswith('.json'):
#do_something()

Just filter the names that you are interested in.
if fname[-5:] == '.json':
(of course, you can also use os.path.splitext, or re, doesn't really matter how you get to the extension)

Here's a general solution to the question: "How do I do X to all files with names matching some pattern under directory Y?"
#!python
from __future__ import print_function
import fnmatch, os, os.path
def files_under(directory, pattern):
'''Yield all files matching pattern under some directory
'''
for p, dnames, fnames in os.walk(directory):
for match in fnmatch.filter(fnames, pattern):
yield(os.path.join(p, match))
if __name__ == '__main__':
import sys
if len(sys.argv) < 3:
print('Must supply path and (quoted) pattern', file=sys.stderr)
sys.exit(1)
try:
for each in files_under(sys.argv[1], sys.argv[2]):
print(each)
except EnvironmentError, e:
print ('Error trying to walk tree: %s ' % e, file=sys.stderr)
sys.exit(2)
The function is files_under() and the rest is just a very simplistic wrapper around it to print the matching results.
It's also easy to extend this to handle multiple patterns and even, with a little extra work, to ensure that files with names matching multiple patterns are only yielded once each. But I'll leave these enhancements as an exercise to the student.

Related

Searching file python

Using this code
I want that it search all the files with name sh like sh.c,sh.txt,sh.cpp etc.But this code does not search unless I write lookfor=sh.txt or lookfor=sh.pdf instead of lookfor=sh in the below code.
Hence I want that by writing lookfor=sh it searches all files named sh.Please help.
import os
from os.path import join
lookfor = "sh"
for root, dirs, files in os.walk('C:\\'):
if lookfor in files:
print "found: %s" % join(root, lookfor)
break

Try glob:
import glob
print glob.glob('sh.*') #this will give you all sh.* files in the current dir

Replace:
if lookfor in files:
With:
for filename in files:
if filename.rsplit('.', 1)[0] == lookfor:
What filename.rsplit('.', 1)[0] is remove the rightmost part of the file that's found after a dot (== the extension). In case the file has several dots in it, we keep the rest in the filename.

Presumably you want to search for files with sh as their basename. (The part of the name excluding the path and extension.) You can do this with the filter function from the fnmatch module.
import os
from os.path import join
import fnmatch
lookfor = "sh.*"
for root, dirs, files in os.walk('C:\\'):
for found in fnmatch.filter(files, lookfor):
print "found: %s" % join(root, found)

The line
if lookfor in files:
says that the following code should be executed if the list files contains the string given in lookfor.
However, you want that the test should be that the found file name starts with the given string and continues with a ..
Besides, you want the real filename to be determined.
So your code should be
import os
from os.path import join, splitext
lookfor = "sh"
found = None
for root, dirs, files in os.walk('C:\\'):
for file in files: # test them one after the other
if splitext(filename)[0] == lookfor:
found = join(root, file)
print "found: %s" % found
break
if found: break
This can even be improved, as I don't like the way how I break the outer for loop.
Maybe you want to have it as a function:
def look(lookfor, where):
import os
from os.path import join, splitext
for root, dirs, files in os.walk(where):
for file in files: # test them one after the other
if splitext(filename)[0] == lookfor:
found = join(root, file)
return found
found = look("sh", "C:\\")
if found is not None:
print "found: %s" % found

import os
from os.path import join
lookfor = "sh."
for root, dirs, files in os.walk('C:\\'):
for filename in files:
if filename.startswith(lookfor):
print "found: %s" % join(root, filename)
You may want to read the doc for fnmatch and glob too.

shutil moving files keeping the same directory structure

I want to move a lot of files. the path to these files is stored in a list. I want to keep the whole directory structure but want to move them to a different folder.
So for example the files are
D:\test\test1\test1.txt
D:\test\test1\test2.txt
I want to move them to C:\ from D:\ and keep the directory structure. How should I go about doing it?
this is the code i have, it is not working
import os, fnmatch
import shutil
f=open('test_logs.txt','r') #logs where filenames are stored with filenames as first entry
for line in f:
filename=line.split()
output_file="C:" + filename[0].lstrip("D:")
shutil.move(filename[0],output_file)
I read the file name fine and I can generate the destination filename fine but when I run it, it gives me an error saying "No such file or directory" (and gives the path of the output filename).

I think you want something like this:
import sys
import os
import shutil
# terminology:
# path = full path to a file, i.e. directory + file name
# directory = directory, possibly starting with a drive
# file name = the last component of the path
sourcedrive = 'D:'
destdrive = 'C:'
log_list_file = open('test_logs.txt', 'r')
for line in log_list_file:
sourcepath = line.split()[0] # XXX is this correct?
if sourcepath.startswith(sourcedrive):
destpath = sourcepath.replace(sourcedrive, destdrive, 1)
else:
print >>sys.stderr, 'Skipping %s: Not on %s' % (sourcepath, sourcedrive)
continue
destdir = os.path.dirname(destpath)
if not os.path.isdir(destdir):
try:
os.makedirs(destdir)
except (OSError, IOError, Error) as e:
print >>sys.stderr, 'Error making %s: %s' % (destdir, e)
continue
try:
shutil.move(sourcepath, destpath)
except (OSError, IOError, Error) as e:
print >>sys.stderr, 'Error moving %s to %s: %s' % (sourcepath, destpath, e)
Do you also want to remove the source directory if it's empty?

Update: Ah, ok, I see the problem -- shutil.move won't copy to a nonexistent directory. To do what you're trying to do, you have to create the new directory tree first. Since it's a bit safer to use a built-in move function than to roll your own copy-and-delete procedure, you could do this:
with open('test_logs.txt','r') as f:
files_to_copy = [line.split()[0] for line in f]
paths_to_copy = set(os.path.split(filename)[0] for filename in files_to_copy)
def ignore_files(path, names, ptc=paths_to_copy):
return [name for name in names if os.path.join(path, name) not in ptc]
shutil.copytree(src, dst, ignore=ignore_files)
for filename in files_to_copy:
output_file="C:" + filename.lstrip("D:")
shutil.move(filename, output_file)
Let me know if that doesn't work
Original Post: If you want to move only some of the files, your best bet is to use shutil.copytree's ignore keyword. Assuming your list of files includes full paths and directories (i.e. ['D:\test\test1\test1.txt', 'D:\test\test1\test2.txt', 'D:\test\test1']), create an ignore_files function and use it like this:
files_to_copy = ['D:\test\test1\test1.txt', 'D:\test\test1\test2.txt', 'D:\test\test1']
def ignore_files(path, names, ftc=files_to_copy):
return [name for name in names if os.path.join(path, name) not in ftc]
shutil.copytree(src, dst, ignore=ignore_files)
Then you can just delete the files in files_to_copy:
for f in files_to_copy:
try:
os.remove(f)
except OSError: # can't remove() a directory, so pass
pass
I tested this -- make certain that you include the paths you want to copy as well as the files in files_to_copy -- otherwise, this will delete files without copying them.

python grep reverse matching

I would like to build a small python script that basicaly does the reverse of grep.
I want to match the files in a directory/subdirectory that doesn't have a "searched_string".
So far i've done that:
import os
filefilter = ['java','.jsp']
path= "/home/patate/code/project"
for path, subdirs, files in os.walk(path):
for name in files:
if name[-4:] in filefilter :
print os.path.join(path, name)
This small script will be listing everyfiles with "java" or "jsp" extension inside each subdirectory, and will output them full path.
I'm now wondering how to do the rest, for example i would like to be able if I forgot a session management entry in one file (allowing anyone a direct file access), to search for :
"if (!user.hasPermission" and list the file which does not contain this string.
Any help would be greatly appreciated !
Thanks

To check if a file with a path bound to variable f contains a string bound to name s, simplest (and acceptable for most reasonably-sized files) is something like
with open(f) as fp:
if s in fp.read():
print '%s has the string' % f
else:
print '%s doesn't have the string' % f
In your os.walk loop, you have the root path and filename separately, so
f = os.path.join(path, name)
(what you're unconditionally printing) is the path you want to open and check.

Instead of printing file name call function that will check if file content do not match texts you want to have in source files. In such cases I use check_file() that looks like this:
WARNING_RX = (
(re.compile(r'if\s+\(!\s+user.hasPermission'), 'user.hasPermission'),
(re.compile(r'other regexp you want to have'), 'very important'),
)
def check_file(fn):
f = open(fn, 'r')
content = f.read()
f.close()
for rx, rx_desc in WARNING_RX:
if not rx.search(content):
print('%s: not found: %s' % (fn, rx_desc))

Rename multiple files in a directory in Python

I'm trying to rename some files in a directory using Python.
Say I have a file called CHEESE_CHEESE_TYPE.*** and want to remove CHEESE_ so my resulting filename would be CHEESE_TYPE
I'm trying to use the os.path.split but it's not working properly. I have also considered using string manipulations, but have not been successful with that either.

Use os.rename(src, dst) to rename or move a file or a directory.
$ ls
cheese_cheese_type.bar cheese_cheese_type.foo
$ python
>>> import os
>>> for filename in os.listdir("."):
... if filename.startswith("cheese_"):
... os.rename(filename, filename[7:])
...
>>>
$ ls
cheese_type.bar cheese_type.foo

Here's a script based on your newest comment.
#!/usr/bin/env python
from os import rename, listdir
badprefix = "cheese_"
fnames = listdir('.')
for fname in fnames:
if fname.startswith(badprefix*2):
rename(fname, fname.replace(badprefix, '', 1))

The following code should work. It takes every filename in the current directory, if the filename contains the pattern CHEESE_CHEESE_ then it is renamed. If not nothing is done to the filename.
import os
for fileName in os.listdir("."):
os.rename(fileName, fileName.replace("CHEESE_CHEESE_", "CHEESE_"))

Assuming you are already in the directory, and that the "first 8 characters" from your comment hold true always. (Although "CHEESE_" is 7 characters... ? If so, change the 8 below to 7)
from glob import glob
from os import rename
for fname in glob('*.prj'):
rename(fname, fname[8:])

I have the same issue, where I want to replace the white space in any pdf file to a dash -.
But the files were in multiple sub-directories. So, I had to use os.walk().
In your case for multiple sub-directories, it could be something like this:
import os
for dpath, dnames, fnames in os.walk('/path/to/directory'):
for f in fnames:
os.chdir(dpath)
if f.startswith('cheese_'):
os.rename(f, f.replace('cheese_', ''))

Try this:
import os
import shutil
for file in os.listdir(dirpath):
newfile = os.path.join(dirpath, file.split("_",1)[1])
shutil.move(os.path.join(dirpath,file),newfile)
I'm assuming you don't want to remove the file extension, but you can just do the same split with periods.

This sort of stuff is perfectly fitted for IPython, which has shell integration.
In [1] files = !ls
In [2] for f in files:
newname = process_filename(f)
mv $f $newname
Note: to store this in a script, use the .ipy extension, and prefix all shell commands with !.
See also: http://ipython.org/ipython-doc/stable/interactive/shell.html

Here is a more general solution:
This code can be used to remove any particular character or set of characters recursively from all filenames within a directory and replace them with any other character, set of characters or no character.
import os
paths = (os.path.join(root, filename)
for root, _, filenames in os.walk('C:\FolderName')
for filename in filenames)
for path in paths:
# the '#' in the example below will be replaced by the '-' in the filenames in the directory
newname = path.replace('#', '-')
if newname != path:
os.rename(path, newname)

It seems that your problem is more in determining the new file name rather than the rename itself (for which you could use the os.rename method).
It is not clear from your question what the pattern is that you want to be renaming. There is nothing wrong with string manipulation. A regular expression may be what you need here.

import os
import string
def rename_files():
#List all files in the directory
file_list = os.listdir("/Users/tedfuller/Desktop/prank/")
print(file_list)
#Change current working directory and print out it's location
working_location = os.chdir("/Users/tedfuller/Desktop/prank/")
working_location = os.getcwd()
print(working_location)
#Rename all the files in that directory
for file_name in file_list:
os.rename(file_name, file_name.translate(str.maketrans("","",string.digits)))
rename_files()

This command will remove the initial "CHEESE_" string from all the files in the current directory, using renamer:
$ renamer --find "/^CHEESE_/" *

I was originally looking for some GUI which would allow renaming using regular expressions and which had a preview of the result before applying changes.
On Linux I have successfully used krename, on Windows Total Commander does renaming with regexes, but I found no decent free equivalent for OSX, so I ended up writing a python script which works recursively and by default only prints the new file names without making any changes. Add the '-w' switch to actually modify the file names.
#!/usr/bin/python
# -*- coding: utf-8 -*-
import os
import fnmatch
import sys
import shutil
import re
def usage():
print """
Usage:
%s <work_dir> <search_regex> <replace_regex> [-w|--write]
By default no changes are made, add '-w' or '--write' as last arg to actually rename files
after you have previewed the result.
""" % (os.path.basename(sys.argv[0]))
def rename_files(directory, search_pattern, replace_pattern, write_changes=False):
pattern_old = re.compile(search_pattern)
for path, dirs, files in os.walk(os.path.abspath(directory)):
for filename in fnmatch.filter(files, "*.*"):
if pattern_old.findall(filename):
new_name = pattern_old.sub(replace_pattern, filename)
filepath_old = os.path.join(path, filename)
filepath_new = os.path.join(path, new_name)
if not filepath_new:
print 'Replacement regex {} returns empty value! Skipping'.format(replace_pattern)
continue
print new_name
if write_changes:
shutil.move(filepath_old, filepath_new)
else:
print 'Name [{}] does not match search regex [{}]'.format(filename, search_pattern)
if __name__ == '__main__':
if len(sys.argv) < 4:
usage()
sys.exit(-1)
work_dir = sys.argv[1]
search_regex = sys.argv[2]
replace_regex = sys.argv[3]
write_changes = (len(sys.argv) > 4) and sys.argv[4].lower() in ['--write', '-w']
rename_files(work_dir, search_regex, replace_regex, write_changes)
Example use case
I want to flip parts of a file name in the following manner, i.e. move the bit m7-08 to the beginning of the file name:
# Before:
Summary-building-mobile-apps-ionic-framework-angularjs-m7-08.mp4
# After:
m7-08_Summary-building-mobile-apps-ionic-framework-angularjs.mp4
This will perform a dry run, and print the new file names without actually renaming any files:
rename_files_regex.py . "([^\.]+?)-(m\\d+-\\d+)" "\\2_\\1"
This will do the actual renaming (you can use either -w or --write):
rename_files_regex.py . "([^\.]+?)-(m\\d+-\\d+)" "\\2_\\1" --write

You can use os.system function for simplicity and to invoke bash to accomplish the task:
import os
os.system('mv old_filename new_filename')

This works for me.
import os
for afile in os.listdir('.'):
filename, file_extension = os.path.splitext(afile)
if not file_extension == '.xyz':
os.rename(afile, filename + '.abc')

What about this :
import re
p = re.compile(r'_')
p.split(filename, 1) #where filename is CHEESE_CHEESE_TYPE.***

Replace strings in files by Python, recursively in given directory and its subdirectories?

How can you replace a string match inside a file with the given replacement, recursively, inside a given directory and its subdirectories?
Pseudo-code:
import os
import re
from os.path import walk
for root, dirs, files in os.walk("/home/noa/Desktop/codes"):
for name in dirs:
re.search("dbname=noa user=noa", "dbname=masi user=masi")
// I am trying to replace here a given match in a file

Put all this code into a file called mass_replace. Under Linux or Mac OS X, you can do chmod +x mass_replace and then just run this. Under Windows, you can run it with python mass_replace followed by the appropriate arguments.
#!/usr/bin/python
import os
import re
import sys
# list of extensions to replace
DEFAULT_REPLACE_EXTENSIONS = None
# example: uncomment next line to only replace *.c, *.h, and/or *.txt
# DEFAULT_REPLACE_EXTENSIONS = (".c", ".h", ".txt")
def try_to_replace(fname, replace_extensions=DEFAULT_REPLACE_EXTENSIONS):
if replace_extensions:
return fname.lower().endswith(replace_extensions)
return True
def file_replace(fname, pat, s_after):
# first, see if the pattern is even in the file.
with open(fname) as f:
if not any(re.search(pat, line) for line in f):
return # pattern does not occur in file so we are done.
# pattern is in the file, so perform replace operation.
with open(fname) as f:
out_fname = fname + ".tmp"
out = open(out_fname, "w")
for line in f:
out.write(re.sub(pat, s_after, line))
out.close()
os.rename(out_fname, fname)
def mass_replace(dir_name, s_before, s_after, replace_extensions=DEFAULT_REPLACE_EXTENSIONS):
pat = re.compile(s_before)
for dirpath, dirnames, filenames in os.walk(dir_name):
for fname in filenames:
if try_to_replace(fname, replace_extensions):
fullname = os.path.join(dirpath, fname)
file_replace(fullname, pat, s_after)
if len(sys.argv) != 4:
u = "Usage: mass_replace <dir_name> <string_before> <string_after>\n"
sys.stderr.write(u)
sys.exit(1)
mass_replace(sys.argv[1], sys.argv[2], sys.argv[3])
EDIT: I have changed the above code from the original answer. There are several changes. First, mass_replace() now calls re.compile() to pre-compile the search pattern; second, to check what extension the file has, we now pass in a tuple of file extensions to .endswith() rather than calling .endswith() three times; third, it now uses the with statement available in recent versions of Python; and finally, file_replace() now checks to see if the pattern is found within the file, and doesn't rewrite the file if the pattern is not found. (The old version would rewrite every file, changing the timestamps even if the output file was identical to the input file; this was inelegant.)
EDIT: I changed this to default to replacing every file, but with one line you can edit to limit it to particular extensions. I think replacing every file is a more useful out-of-the-box default. This could be extended with a list of extensions or filenames not to touch, options to make it case insensitive, etc.
EDIT: In a comment, #asciimo pointed out a bug. I edited this to fix the bug. str.endswith() is documented to accept a tuple of strings to try, but not a list. Fixed. Also, I made a couple of the functions accept an optional argument to let you pass in a tuple of extensions; it should be pretty easy to modify this to accept a command-line argument to specify which extensions.

Do you really need regular expressions?
import os
def recursive_replace( root, pattern, replace )
for dir, subdirs, names in os.walk( root ):
for name in names:
path = os.path.join( dir, name )
text = open( path ).read()
if pattern in text:
open( path, 'w' ).write( text.replace( pattern, replace ) )

Of course, if you just want to get it done without coding it up, use find and xargs:
find /home/noa/Desktop/codes -type f -print0 | \
xargs -0 sed --in-place "s/dbname=noa user=noa/dbname=masi user=masi"
(And you could likely do this with find's -exec or something as well, but I prefer xargs.)

This is how I would find and replace strings in files using python. This is a simple little function that will recursively search a directories for a string and replace it with a string. You can also limit files with a certain file extension like the example below.
import os, fnmatch
def findReplace(directory, find, replace, filePattern):
for path, dirs, files in os.walk(os.path.abspath(directory)):
for filename in fnmatch.filter(files, filePattern):
filepath = os.path.join(path, filename)
with open(filepath) as f:
s = f.read()
s = s.replace(find, replace)
with open(filepath, "w") as f:
f.write(s)
This allows you to do something like:
findReplace("some_dir", "find this", "replace with this", "*.txt")

this should work:
import re, os
import fnmatch
for path, dirs, files in os.walk(os.path.abspath(directory)):
for filename in fnmatch.filter(files, filePattern):
filepath = os.path.join(path, filename)
with open("namelist.wps", 'a') as out:
with open("namelist.wps", 'r') as readf:
for line in readf:
line = re.sub(r"dbname=noa user=noa", "dbname=masi user=masi", line)
out.write(line)

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Processing specific files in a directory in python - python

Since your file name is string you can use the str.endswith method to check if it is json file. if fname.endswith('.json'): #do_something()

Just filter the names that you are interested in. if fname[-5:] == '.json': (of course, you can also use os.path.splitext, or re, doesn't really matter how you get to the extension)

Related

Searching file python

shutil moving files keeping the same directory structure

python grep reverse matching

Rename multiple files in a directory in Python

Replace strings in files by Python, recursively in given directory and its subdirectories?

Categories

Resources