reading a file and getting the MD5 hash in python - python

The loop is working but once I put the if statements in it only prints I am a dir
If the if statements are not there I am able to print the dirpath, dirname, filename to the console
I am trying to list all the file names in a directory and get the MD5 sum.
from os import walk
import hashlib
import os
path = "/home/Desktop/myfile"
for (dirpath, dirname, filename) in walk(path):
if os.path.isdir(dirpath):
print("I am a dir")
if os.path.isfile(dirpath):
print(filename, hashlib.md5(open(filename, 'rb').read()).digest())

You're only checking dirpath. What you have as dirname and filename are actually collections of directory names and files under dirpath. Taken from the python docs, and modified slightly, as their example removes the files:
import os
for root, dirs, files in os.walk(top):
for name in files:
print(os.path.join(root, name))
for name in dirs:
print(os.path.join(root, name))
Will print the list of of directories and files under top and then will recurse down the directories in under top and print the folders and directories there.

From the Python documentation about os.walk:
https://docs.python.org/2/library/os.html
dirpath is a string, the path to the directory. dirnames is a list of
the names of the subdirectories in dirpath (excluding '.' and '..').
filenames is a list of the names of the non-directory files in
dirpath.
With os.path.isfile(dirpath) you are checking whether dirpath is a file, which is never the case. Try changing the code to:
full_filename = os.path.join(dirpath, filename)
if os.path.isfile(full_filename):
print(full_filename, hashlib.md5(open(full_filename, 'rb').read()).digest())

Related

Folder containing subfolders, that contain multiple files (.xlsm, .pdf, .txt). How to rename .pdf files to subfolders' name?

This could be done with python, but I think I am missing a way to loop for all directories. Here is the code I am using:
import os
def renameInDir(directory):
for filename in os.listdir(directory):
if filename.endswith(".pdf"):
path = os.path.realpath(filename)
parents = path.split('/') //make an array of all the dirs in the path. 0 will be the original basefilename
newFilename=os.path.dirname(filename)+directory +parents[-1:][0] //reorganize data into format you want
os.rename(filename, newFilename)//rename the file
You should go with os.walk(). It will map the directory tree by the given directory param, and generate the file names.
Using os.walk() you'll accomplish the desired result is this way:
import os
from os.path import join
for dirpath, dirnames, filenames in os.walk('/path/to/directory'):
for name in filenames:
new_name = name[:-3] + 'new_file_extension'
os.rename(join(dirpath, name), join(dirpath, new_name))

Find the sub-paths of files with specific externsion in python

I am trying to locate all my files with externsion mp4 in a folder(and subfolder) and copy them in another dir. I manage to find all files with the extension mp4 however I didnt manage to keep the dir of those files. My code is the following:
import os
from shutil import copyfile
path = "videos/"
for root, dirs, files in os.walk(path):
for name in files:
if name.endswith((".mp4", ".mp4")):
print(name)
# copyfile(src, dst)
I want to find the path of the name (corresponding to my vids). How can I do so?
Use os.path.join()
import os
from shutil import copyfile
path = "videos/"
for root, dirs, files in os.walk(path):
for name in files:
if name.endswith((".mp4", ".mp4")):
print(os.path.join(root, name))
# copyfile(src, dst)
Although it is considered better to use absolute paths but you can use os.path.relpath if you want relative path. From the os.path.relpath documentation
os.path.relpath(path[, start])
Return a relative filepath to path either from the current directory or from an optional start directory. This is a path computation: the filesystem is not accessed to confirm the existence or nature of path or start.
start defaults to os.curdir.
Availability: Windows, Unix.
New in version 2.6.
Why not just use glob:
import glob, shutil
for file in glob.iglob('/foo/*.mp4'):
shutil.copy2(file, '/bar/{0}'.format(file))
From the documentation on os.walk:
dirpath is a string, the path to the directory. dirnames is a list of the names of the subdirectories in dirpath (excluding '.' and '..'). filenames is a list of the names of the non-directory files in dirpath. Note that the names in the lists contain no path components. To get a full path (which begins with top) to a file or directory in dirpath, do os.path.join(dirpath, name).
So your code should look like this:
import os
from shutil import copyfile
path = "videos/"
for root, dirs, files in os.walk(path):
for name in files:
if name.endswith((".mp4", ".mp4")):
print(name)
src = os.path.join(root, name)
copyfile(src, dst)

Python: Remove empty folders recursively

I'm having troubles finding and deleting empty folders with my Python script.
I have some directories with files more or less like this:
A/
--B/
----a.txt
----b.pdf
--C/
----d.pdf
I'm trying to delete all files which aren't PDFs and after that delete all empty folders. I can delete the files that I want to, but then I can't get the empty directories. What I'm doing wrong?
os.chdir(path+"/"+name+"/Test Data/Checklists")
pprint("Current path: "+ os.getcwd())
for root, dirs, files in os.walk(path+"/"+name+"/Test Data/Checklists"):
for name in files:
if not(name.endswith(".pdf")):
os.remove(os.path.join(root, name))
pprint("Deletting empty folders..")
pprint("Current path: "+ os.getcwd())
for root, dirs, files in os.walk(path+"/"+name+"/Test Data/Checklists", topdown=False):
if not dirs and not files:
os.rmdir(root)
use insted the function
os.removedirs(path)
this will remove directories until the parent directory is not empty.
Ideally, you should remove the directories immediately after deleting the files, rather than doing two passes with os.walk
import sys
import os
for dir, subdirs, files in os.walk(sys.argv[1], topdown=False):
for name in files:
if not(name.endswith(".pdf")):
os.remove(os.path.join(dir, name))
# check whether the directory is now empty after deletions, and if so, remove it
if len(os.listdir(dir)) == 0:
os.rmdir(dir)
For empty folders deletion you can use this snippet.
It can be combined with some files deletion, but as last run should be used as is.
import os
def drop_empty_folders(directory):
"""Verify that every empty folder removed in local storage."""
for dirpath, dirnames, filenames in os.walk(directory, topdown=False):
if not dirnames and not filenames:
os.rmdir(dirpath)
remove all empty folders
import os
folders = './A/' # directory
for folder in list(os.walk(folders)) :
if not os.listdir(folder[0]):
os.removedirs(folder[0])

Read file in unknown directory

I need to read and edit serveral files, the issue is I know roughly where these files are but not entirely.
so all the files are called QqTest.py in various different directories.
I know that the parent directories are called:
mdcArray = ['MDC0021','MDC0022','MDC0036','MDC0055','MDC0057'
'MDC0059','MDC0061','MDC0062','MDC0063','MDC0065'
'MDC0066','MDC0086','MDC0095','MDC0098','MDC0106'
'MDC0110','MDC0113','MDC0114','MDC0115','MDC0121'
'MDC0126','MDC0128','MDC0135','MDC0141','MDC0143'
'MDC0153','MDC0155','MDC0158']
but after that there is another unknown subdirectory that contains QqTest.txt
so I need to read the QqTest.txt from /MDC[number]/unknownDir/QqTest.txt
So how I wildcard read the file in python similar to how I would in bash
i.e
/MDC0022/*/QqTest.txt
You can use a Python module called glob to do this. It enables Unix style pathname pattern expansions.
import glob
glob.glob("/MDC0022/*/QqTest.txt")
If you want to do it for all items in the list you can try this.
for item in mdcArray:
required_files = glob.glob("{0}/*/QqTest.txt".format(item))
# process files here
Glob documentation
You could search your root folders as follows:
import os
mdcArray = ['MDC0021','MDC0022','MDC0036','MDC0055','MDC0057'
'MDC0059','MDC0061','MDC0062','MDC0063','MDC0065'
'MDC0066','MDC0086','MDC0095','MDC0098','MDC0106'
'MDC0110','MDC0113','MDC0114','MDC0115','MDC0121'
'MDC0126','MDC0128','MDC0135','MDC0141','MDC0143'
'MDC0153','MDC0155','MDC0158']
for root in mdcArray:
for dirpath, dirnames, filenames in os.walk(root):
for filename in filenames:
if filename == 'QqTest.txt':
file = os.path.join(dirpath, filename)
print "Found - {}".format(file)
This would display something like the following:
Found - MDC0022\test\QqTest.txt
The os.walk function can be used to traverse your folder structure.
To search all folders for MDC<number> in the path, you could use the following approach:
import os
import re
for dirpath, dirnames, filenames in os.walk('.'):
if re.search(r'MDC\d+', dirpath):
for filename in filenames:
if filename == 'QqTest.txt':
file = os.path.join(dirpath, filename)
print "Found - {}".format(file)
You might use os.walk. Not exactly what you wanted but will do the job.
rootDir = '.'
for dirName, subdirList, fileList in os.walk(rootDir):
print('Found directory: %s' % dirName)

How to copy files of a certain type in python

I have a folder (with other subfolders) from which i would like to copy only the .js files to another existing folder (this also has subfolders with the same folder structure as the first one, except this one has only the folders, so no file)
How can i do that with python? I tried shutil.copytree but it fails because some folders already exists.
use os.path.splitext or glob.iglob
glob.iglob(pathname)
Return an iterator which yields the same values as glob() without
actually storing them all simultaneously.
I propose a solution with os.path.splitext, walking with os.walk. I make use of os.path.relpath to find relative path in the duplicate tree.
source_dir is your source uppermost source folder, dest_dir your uppermost destination folder.
import os, shutil, glob
source_dir = "F:\CS\PyA"
dest_dir = "F:\CS\PyB"
for root, dirnames, filenames in os.walk(source_dir):
for file in filenames:
(shortname, extension) = os.path.splitext(file)
if extension == ".txt" :
shutil.copy2(os.path.join(root,file), os.path.join(dest_dir,
os.path.relpath(os.path.join(root,file),source_dir)))
from glob import glob
from shutil import copy
import os
def copyJS(src, dst):
listing = glob(src + '/*')
for f in listing:
if os.path.isdir(f):
lastToken = f.split('/')[-1]
copyJS(src+'/' + lastToken, dst+ '/' + lastToken)
elif f[-3:] == '.js':
copy(f, dst)

Categories