I am currently trying to search for a list of missing attachments that are scattered throughout our file share server.I have a txt file with a list of filenames I would like to find. Ideally at some point I would like to copy the file to another location when found. At this moment it doesn't seem to be working, because it returns no results and I have verified the files exist.
import os
source = open("Test.txt", "r")
dest = '//somewhere/somefolder'
path = '//somewhere/anotherfolder'
for line in source:
print 'Searching . . .'
print line
for root, dirs, files in os.walk(path):
for name in files:
if name in line:
# shutil.copy(os.path.join(root, name), dest)
print 'Found!'
print path, name
print 'Done';
Question/Comment: is there a way to ignore the path and only search for the filename?
For instance:
import os
fpath = "//somewhere/anotherfolder/filename.pdf"
fname = os.path.basename(fpath)
print('fname=%s' % fname)
Output:
fname=filename.pdf
Related
I have a path which have many directories. I need to go through each directory and get a specific file "file.log.gz" from it and read the file and do some process.
This is my current attempt:
import os
import sys
import gzip
infile = sys.argv[1]
directory = ("%s/NEW_FOLDER" % infile)
for root, dirs, files in os.walk(directory):
for file in files:
if "file.log.gz" in file:
with gzip.open(os.path.join(root, file)) as fin:
new = False
for line in fin:
if "CODE" in line.decode('utf-8'):
print("string is present")
found = True
exit()
else:
print("string is not present")
what i need is to go through each directories inside NEW_FOLDER and get file.log.gz. and do the following process for file.log.gz in each directory.
with the current code i get file.log.gz inside each directory but i'm not able to do rest of the process that is opening file.log.gz in each directory and do the rest process.
Expected Output:
/NEW_FOLDER/dir1/file.log.gz
string is present
/NEW_FOLDER/dir2/file.log.gz
string is present
/NEW_FOLDER/dir3/file.log.gz
string is not present
Because you are using os.walk(). You need to merge the root directory with the filename. You will notice it if you print (file) and see what the values you are getting.
Try print this out. You suppose to pass the entire directory to open and not just the file name.
for file in files:
print(os.path.join(root, file))
I have a file structure something like this:
/a.zip
/not_a_zip/
contents
/b.zip
contents
and I want to create a directory a and extract a.zip into it and all the nested zipped files where they are so I get something like this:
/a/
/not_a_zip/
contents
/b/
contents
I tried this solution, but I was getting errors because inside my main directory I have subdirectories, as well as zip files.
I want to be able to extract the main zip file into a directory of the same name, then be able to extract all nested files within, no matter how deeply nested they are.
EDIT: my current code is this
archive = zipfile.ZipFile(zipped, 'r')
for file in archive.namelist():
archive.extract(file, resultDirectory)
for f in [filename for filename in archive.NameToInfo if filename.endswith(".zip")]:
# get file name and path to extract
fileToExtract = resultDirectory + '/' + f
# get directory to extract new file to
directoryToExtractTo = fileToExtract.rsplit('/', 1)
directoryToExtractTo = directoryToExtractTo[0] + '/'
# extract nested file
nestedArchive = zipfile.ZipFile(fileToExtract, 'r')
for file in nestedArchive.namelist():
nestedArchive.extract(fileToExtract, directoryToExtractTo)
but I'm getting this error:
KeyError: "There is no item named 'nestedFileToExtract.zip' in the archive"
Even though it exists in the file system
Based on this other solutions: this and this.
import os
import io
import sys
import zipfile
def extract_with_structure(input_file, output):
with zipfile.ZipFile(input_file) as zip_file:
print(f"namelist: {zip_file.namelist()}")
for obj in zip_file.namelist():
filename = os.path.basename(obj)
if not filename:
# Skip folders
continue
if 'zip' == filename.split('.')[-1]:
# extract a zip
content = io.BytesIO(zip_file.read(filename))
f = zipfile.ZipFile(content)
dirname = os.path.splitext(os.path.join(output, filename))[0]
for i in f.namelist():
f.extract(i, dirname)
else:
# extract a file
zip_file.extract(obj, os.path.join(output))
if __name__ == "__main__":
if len(sys.argv) < 3:
print("No zipfile specified or output folder.")
exit(1)
extract_with_structure(sys.argv[1], sys.argv[2])
I've written a function to strip double spaces out of my raw data files:
def fixDat(file):
'''
Removes extra spaces in the data files. Replaces original file with new
and renames original to "...._original.dat".
'''
import os
import re
with open(file+'.dat', 'r') as infile:
with open(file+'_fixed.dat', 'w') as outfile:
lines = infile.readlines()
for line in lines:
fixed = re.sub("\s\s+" , " ", line)
outfile.write(fixed)
os.rename(file+'.dat', file+'_original.dat')
os.rename(file+'_fixed.dat', file+'.dat')
I have 19 files in a folder that I need to process with this function, but I'm not sure how to parse the filenames and pass them to the function. Something like
for filename in folder:
fixDat(filename)
but how do I code filename and folder in Python?
If I understand correctly, you are asking about the os module's .walk() functionality. Where an example would look like:
import os
for root, dirs, files in os.walk(".", topdown=False): # "." uses current folder
# change it to a pathway if you want to process files not where your script is located
for name in files:
print(os.path.join(root, name))
With filename outputs which can be fed to your fixDat() function such as:
./tmp/test.py
./amrood.tar.gz
./httpd.conf
./www.tar.gz
./mysql.tar.gz
./test.py
Note that these are all strings so you could change the script to:
import os
for root, dirs, files in os.walk(".", topdown=False):
for name in files:
if name.endswith('.dat'): # or some other extension
print(os.path.join(root, name))
fixDat(os.path.join(root, name))
I am trying to find a string that is contained in files under a directory. Then make it to store it's file names and directories under a new text file or something.
I got upto where it is going through a directory and finding a string, then printing a result. But not sure of the next step.
Please help, I'm completely new to coding and python.
import glob, os
#Open a source as a file and assign it as source
source = open('target.txt').read()
filedirectories = []
#locating the source file and printing the directories.
os.chdir("/Users/a1003584/desktop")
for root, dirs, files in os.walk(".", topdown=True):
for name in files:
print(os.path.join(root, name))
if source in open(os.path.join(root, name)).read():
print 'treasure found.'
Don't do a string comparison if your looking for a dictionary. Instead use the json module. Like this.
import json
import os
filesFound = []
def searchDir(dirName):
for name in os.listdir(dirName):
# If it is a file.
if os.isfile(dirName+name):
try:
fileCon = json.load(dirName+name)
except:
print("None json file.")
if "KeySearchedFor" in fileCon:
filesFound.append(dirName+name)
# If it is a directory.
else:
searchDir(dirName+name+'/')
# Change this to the directory your looking in.
searchDir("~/Desktop")
open("~/Desktop/OutFile.txt",'w').write(filesFound)
This should write the output to a csv file
import csv
import os
with open('target.txt') as infile: source = infile.read()
with open("output.csv", 'w') as fout:
outfile = csv.writer(fout)
outfile.writerow("Directory FileName FilePath".split())
for root, dirnames, fnames in os.walk("/Users/a1003584/desktop", topdown=True):
for fname in fnames:
with open(os.path.join(root, fname)) as infile:
if source not in infile.read(): continue
outfile.writerow(root, fname, os.path.join(root, fname))
I am trying to list directories and files (recursivley) in a directory with python:
./rootdir
./file1.html
./subdir1
./file2.html
./file3.html
./subdir2
./file4.html
Now I can list the directories and files just fine (borrowed it from here). But I would like to list it in the following format and ORDER (which is very important for what I am doing.
/rootdir/
/rootdir/file1.html
/rootdir/subdir1/
/rootdir/subdir1/file2.html
/rootdir/subdir1/file3.html
/rootdir/subdir2/
/rootdir/file4.html
I don't care how it gets done. If I walk the directory and then organize it or get everything in order. Either way, thanks in advance!
EDIT: Added code below.
# list books
import os
import sys
lstFiles = []
rootdir = "/srv/http/example/www/static/dev/library/books"
# Append the directories and files to a list
for path, dirs, files in os.walk(rootdir):
#lstFiles.append(path + "/")
lstFiles.append(path)
for file in files:
lstFiles.append(os.path.join(path, file))
# Open the file for writing
f = open("sidebar.html", "w")
f.write("<ul>")
for item in lstFiles:
splitfile = os.path.split(item)
webpyPath = splitfile[0].replace("/srv/http/example/www", "")
itemName = splitfile[1]
if item.endswith("/"):
f.write('<li>' + itemName + '</li>\n')
else:
f.write('<li>' + itemName + '</li>\n')
f.write("</ul>")
f.close()
Try the following:
for path, dirs, files in os.walk("."):
print path
for file in files:
print os.path.join(path, file)
You do not need to print entries from dirs because each directory will be visited as you walk the path, so you will print it later with print path.