shutil.copy only works once - python

As a Python beginner, I'm having real issues moving files around. Below is a script I've finally(!) made work which simply moves select files from a directory of choice to a new folder.
For some reason which I cannot fathom, it only worked once, then the destination folder created was really bizarre. At one point it created a 'directory' that was an unknown application with the correct name and at other times it creates a text file using seemingly random files to generate the content - again the file it creates is correctly named.
Here is the relevant script:
#!/usr/bin/python
import os, shutil
def file_input(file_name):
newlist = [] #create new list
for names in os.listdir(file_name): #loops through directory
if names.endswith(".txt") or names.endswith(".doc"): #returns only extensions required
full_file_name = os.path.join(file_name, names) #creates full file path name - required for further file modification
newlist.append(full_file_name) #adds item to list
dst = os.path.join(file_name + "/target_files")
full_file_name = os.path.join(file_name, names)
if (os.path.isfile(full_file_name)):
print "Success!"
shutil.copy(full_file_name, dst)
def find_file():
file_name = raw_input("\nPlease carefully input full directory pathway.\nUse capitalisation as necessary.\nFile path: ")
file_name = "/root/my-documents" #permanent input for testing!
return file_input(file_name)
'''try:
os.path.exists(file_name)
file_input(file_name)
except (IOError, OSError):
print "-" * 15
print "No file found.\nPlease try again."
print "-" * 15
return find_file()'''
find_file()
Can someone please tell me why this script is not reproducible when I delete the folder created and try to run it again and what I can do to make that happen?
I know it's a bit messy, but this is going to part of a larger script and I'm still in first draft stages!!
Many thanks

This works:
import os, shutil
def file_input(file_name):
newlist = [] #create new list
for names in os.listdir(file_name): #loops through directory
if names.endswith(".txt") or names.endswith(".doc"): #returns only extensions required
full_file_name = os.path.join(file_name, names) #creates full file path name - required for further file modification
newlist.append(full_file_name) #adds item to list
dst = os.path.join(file_name + "/target_files")
if not os.path.exists(dst):
os.makedirs(dst)
full_file_name = os.path.join(file_name, names)
if (os.path.exists(full_file_name)):
print "Success!"
shutil.copy(full_file_name, dst)
def find_file():
file_name = raw_input("\nPlease carefully input full directory pathway.\nUse capitalisation as necessary.\nFile path: ")
file_name = "/home/praveen/programming/trash/documents" #permanent input for testing!
return file_input(file_name)
find_file()
You need to check if your copy destination directory actually exists, if not create it. shutil.copy would then copy your file to that directory

Related

Python - Find duplicate files and move to another folder

Working on a small script that allows the user to selected a folder to search for duplicate files, whether that be images, text etc. It should then move those duplicate files into another folder of the users choice.
This is the code i have so far:
from tkinter import Tk
from tkinter.filedialog import askdirectory
import os
import shutil
import hashlib
Tk().withdraw()
source = askdirectory(title="Select the source folder")
walker = os.walk(source)
uniqueFiles = dict()
total = 0
for folder, sub_folder, files in walker:
for file in files:
filepath = os.path.join(folder, file)
filehash = hashlib.md5((open(filepath, "rb").read())).hexdigest()
if filehash in uniqueFiles:
print(f"{filepath} is a duplicate")
total += 1
else:
uniqueFiles[filehash] = source
print("\n# of duplicate files found: {} ".format(total))
# destination = askdirectory(title="Select the target folder")
# shutil.move(filepath, destination, copy_function=shutil.copytree)
It works perfectly fine for now, finding all the duplicate files in a folder/sub folders and printing them out. The part im stuck in is how to move them. the commented code at the bottom seems to work but it prompts the user for a folder for every duplicate found. I just want it to list out all the duplicates and then move them at once.
Any ideas on how i could format my code?
Thanks!
So you have two options here (as described my the comments to your question):
Prompt for the target directory beforehand
Prompt for the target directory afterward
The first option is probably the simplest, most efficient, and requires the smallest amount of refactoring. It does however require the user to input a target directory weather or not there are any duplicate files or an error occurs when searching so might be worse from a user's perspective:
# prompt for directory beforehand
destination = askdirectory(title="Select the target folder")
for folder, sub_folder, files in walker:
for file in files:
filepath = os.path.join(folder, file)
filehash = hashlib.md5(open(filepath, "rb").read()).hexdigest()
if filehash in uniqueFiles:
shutil.move(filepath, destination, copy_function=shutil.copytree)
else:
uniqueFiles[filehash] = source
The second option would allow you to perform all the necessary checks and error handling, but is more complex and requires more refactoring:
# dictionary of hashes to all files
hashes = {}
for folder, sub_folder, files in walker:
for file in files:
filepath = os.path.join(folder, file)
filehash = hashlib.md5(open(filepath, "rb").read()).hexdigest()
if filehash in hashes
hashes[filehash].append(filepath)
else:
hashes[filehash] = [filepath]
# prompt for directory beforehand
destination = askdirectory(title="Select the target folder")
for duplicates in hashes.values():
if len(duplicates) < 2:
continue
for duplicate in hashes:
shutil.move(duplicate, destination, copy_function=shutil.copytree)
As a side note, I am not familiar with hashlib but I suspect that you will want to be closing the files you are hashing especially if checking a large file tree:
with open(filepath, "rb") as file:
filehash = hashlib.md5(file.read()).hexdigest()

User input only works for the last file extension in a folder

I am struggling to fix a bug in my code. The variable (fext) is only true for the last file in a folder. So if by chance the last file is 'jpg' then my code will continue as planned. But if by chance the last file is a 'gpx' or a 'csv' then the Else error will activate even though there is a 'jpg' file in the folder.
Can somebody please help me refine my code so that this work if all file types are in the folder? I am still quite new to Python and stuck on how to proceed.
Here is my code below:
import os, string
from os.path import isfile, join
file_path = input("Enter the folder link: ")
print("")
TF = False
path_it = (os.path.join(root, filename)
for root, _, filenames in os.walk(file_path)
for filename in filenames)
for path in path_it:
fext = os.path.splitext(os.path.basename(path))[1]
fname = os.path.splitext(os.path.basename(path))[0]
while True:
file_type = input("Enter file extention (e.g. txt, wav, jpg, gpx, pdf): ")
print(file_type)
if file_type in fext:
TF = True
break
else:
print("\n*** There is no '" + file_type + "' file extension in this folder, please try again.\n")
Other code...
Thanks
A list comprehension is likely your best solution to get your desired result. This will store all the filetypes in the directory passed in a list.
fext = [os.path.splitext(os.path.basename(path))[1] for path in path_it]
fname = [os.path.splitext(os.path.basename(path))[1] for path in path_it]
But, you also need to make sure that the input file type matches the format. The above will give you (for example) ['.csv', '.pdf', '.gpx'], so you need to make sure that the format of the input is the same, in other words, not just 'csv' but '.csv' otherwise there will be no match.
The while loop can also be changed to while not TF, and once TF changes to True, the loop will be broken, instead of breaking the loop using break.
The fext and fname variables should return an iterable if you are to check against all extensions contained within the folder. Try the following list comprehensions.
fext = [os.path.splitext(os.path.basename(path))[1] for path in path_it]
fname = [os.path.splitext(os.path.basename(path))[1] for path in path_it]

Is there a simpler function or one liner to check if folder exists if not create it and paste a specific file into it?

I am aiming to create a function that does the following:
Declare a path with a file, not just a folder. e.g. 'C:/Users/Lampard/Desktop/Folder1/File.py'
Create a folder in same folder as the declared file path - Calling it 'Archive'
Cut the file and paste it into the new folder just created.
If the folder 'Archive' already exists - then simply cut and paste the file into there
I have spent approx. 15-20min going through these:
https://www.programiz.com/python-programming/directory
Join all except last x in list
https://docs.python.org/3/library/pathlib.html#operators
And here is what I got to:
import os
from pathlib import Path, PurePath
from shutil import copy
#This path will change every time - just trying to get function right first
path = 'C:/Users/Lampard/Desktop/Folder1/File.py'
#Used to allow suffix function
p = PurePath(path)
#Check if directory is a file not a folder
if not p.suffix:
print("Not an extension")
#If it is a file
else:
#Create new folder before last file
#Change working directory
split = path.split('/')
new_directory = '/'.join(split[:-1])
apply_new_directory = os.chdir(new_directory)
#If folder does not exist create it
try:
os.mkdir('Archive')#Create new folder
#If not, continue process to copy file and paste it into Archive
except FileExistsError:
copy(path, new_directory + '/Archive/' + split[-1])
Is this code okay? - does anyone know a simpler method?
Locate folder/file in path
print [name for name in os.listdir(".") if os.path.isdir(name)]
Create path
import os
# define the name of the directory to be created
path = "/tmp/year"
try:
os.mkdir(path)
except OSError:
print ("Creation of the directory %s failed" % path)
else:
print ("Successfully created the directory %s " % path)
To move and cut files you can use this library
As you're already using pathlib, there's no need to use shutil:
from pathlib import Path
path = 'C:/Users/Lampard/Desktop/Folder1/File.py' # or whatever
p = Path(path)
target = Path(p.with_name('Archive')) # replace the filename with 'Archive'
target.mkdir() # create target directory
p.rename(target.joinpath(p.name)) # move the file to the target directory
Feel free to add appriopriate try…except statements to handle any errors.
Update: you might find this version more readable:
target = p.parent / 'Archive'
target.mkdir()
p.rename(target / p.name)
This is an example of overloading / operator.

Move pairs of files (.txt & .xml) into their corresponding folder using Python

I have been working this challenge for about a day or so. I've looked at multiple questions and answers asked on SO and tried to 'MacGyver' the code used for my purpose, but still having issues.
I have a directory (lets call it "src\") with hundreds of files (.txt and .xml). Each .txt file has an associated .xml file (let's call it a pair). Example:
src\text-001.txt
src\text-001.xml
src\text-002.txt
src\text-002.xml
src\text-003.txt
src\text-003.xml
Here's an example of how I would like it to turn out so each pair of files are placed into a single unique folder:
src\text-001\text-001.txt
src\text-001\text-001.xml
src\text-002\text-002.txt
src\text-002\text-002.xml
src\text-003\text-003.txt
src\text-003\text-003.xml
What I'd like to do is create an associated folder for each pair and then move each pair of files into its respective folder using Python. I've already tried working from code I found (thanks to a post from Nov '12 by Sethdd, but am having trouble figuring out how to use the move function to grab pairs of files. Here's where I'm at:
import os
import shutil
srcpath = "PATH_TO_SOURCE"
srcfiles = os.listdir(srcpath)
destpath = "PATH_TO_DEST"
# grabs the name of the file before extension and uses as the dest folder name
destdirs = list(set([filename[0:9] for filename in srcfiles]))
def create(dirname, destpath):
full_path = os.path.join(destpath, dirname)
os.mkdir(full_path)
return full_path
def move(filename, dirpath):
shutil.move(os.path.join(srcpath, filename)
,dirpath)
# create destination directories and store their names along with full paths
targets = [
(folder, create(folder, destpath)) for folder in destdirs
]
for dirname, full_path in targets:
for filename in srcfile:
if dirname == filename[0:9]:
move(filename, full_path)
I feel like it should be easy, but Python isn't something I work with everyday and it's been a while since my scripting days... Any help would be greatly appreciated!
Thanks,
WK2EcoD
Use the glob module to interate all of the 'txt' files. From that you can parse and create the folders and copy the files.
The process should be as simple as it appears to you as a human.
for file_name in os.listdir(srcpath):
dir = file_name[:9]
# if dir doesn't exist, create it
# move file_name to dir
You're doing a lot of intermediate work that seems to be confusing you.
Also, insert some simple print statements to track data flow and execution flow. It appears that you have no tracing output so far.
You can do it with os module. For every file in directory check if associated folder exists, create if needed and then move the file. See the code below:
import os
SRC = 'path-to-src'
for fname in os.listdir(SRC):
filename, file_extension = os.path.splitext(fname)
if file_extension not in ['xml', 'txt']:
continue
folder_path = os.path.join(SRC, filename)
if not os.path.exists(folder_path):
os.mkdir(folderpath)
os.rename(
os.path.join(SRC, fname),
os.path.join(folder_path, fname)
)
My approach would be:
Find the pairs that I want to move (do nothing with files without a pair)
Create a directory for every pair
Move the pair to the directory
#! /usr/bin/env python
# -*- coding: utf-8 -*-
import os, shutil
import re
def getPairs(files):
pairs = []
file_re = re.compile(r'^(.*)\.(.*)$')
for f in files:
match = file_re.match(f)
if match:
(name, ext) = match.groups()
if ext == 'txt' and name + '.xml' in files:
pairs.append(name)
return pairs
def movePairsToDir(pairs):
for name in pairs:
os.mkdir(name)
shutil.move(name+'.txt', name)
shutil.move(name+'.xml', name)
files = os.listdir()
pairs = getPairs(files)
movePairsToDir(pairs)
NOTE: This script works when called inside the directory with the pairs.

Moving files and creating directories if certain file type in python

This is probably a simple question, but I'm brand new to python and programming in general.
I'm working on a simple program to copy/move .mp3 files from on location to another while mirroring the directory structure of the source location. What I have so far works, however it also creates new folders in the destination location even if the source folder contained no mp3 files. I only want to create the new directories if the source contains .mp3s, otherwise it could lead to a bunch of empty folders in the destination.
Here is what I have so far:
import os
import shutil #Used for copying files
##CONFIG
source_dir = "C:\Users\username\Desktop\iTunes\\" #set the root folder that you want to scan and move files from. This script will scan recursively.
destPath = "C:\Users\username\Desktop\converted From iTunes" #set the destination root that you want to move files to. Any non-existing sub directories will be created.
ext = ".mp3" #set the type of file you want to search for.
count = 0 #initialize counter variable to count number of files moved
##
##FIND FILES
for dirName, subdirList, fileList in os.walk(source_dir):
#set the path for the destination folder(s)
dest = destPath + dirName.replace(source_dir, '\\')
#if the source directory doesn't exist in the destination folder
#then create a new folder
if not os.path.isdir(dest):
os.mkdir(dest)
print('Directory created at: ' + dest)
for fname in fileList:
if fname.endswith(ext) :
#determine source & new file locations
oldLoc = dirName + '\\' + fname
newLoc = dest + '\\' + fname
if os.path.isfile(newLoc): # check to see if the file already exists. If it does print out a message saying so.
print ('file "' + newLoc + fname + '" already exists')
if not os.path.isfile(newLoc): #if the file doesnt exist then copy it and print out confirmation that is was copied/moved
try:
shutil.move(oldLoc, newLoc)
print('File ' + fname + ' copied.')
count = count + 1
except IOError:
print('There was an error copying the file: "' + fname + '"')
print 'error'
print "\n"
print str(count) + " files were moved."
print "\n"
so if the folder structure is something like:
root->
band 1->
album name->
song.m4a,
song2.m4a
right now it will create all those folders in the destination driectory, even though there are no .mp3s to copy.....
Any help is appreciated!
I think I would create my own wrapper around copy for this sort of thing:
def fcopy(src,dest):
"""
Copy file from source to dest. dest can include an absolute or relative path
If the path doesn't exist, it gets created
"""
dest_dir = os.path.dirname(dest)
try:
os.makedirs(dest_dir)
except os.error as e:
pass #Assume it exists. This could fail if you don't have permissions, etc...
shutil.copy(src,dest)
Now you can just walk the tree calling this function on any .mp3 file.
The simplest thing to do I can think of for your existing code would be to just make it skip over any folders that don't have any .mp3 files in them. This can easily be done by adding the following items and if statement to the top of your loop. The itertools.ifilter() and fnmatch.fnmatch() functions can be used together to simplify checking for files with the proper extension.
from itertools import ifilter
from fnmatch import fnmatch
ext = '.mp3'
fnPattern = '*'+ext
for dirName, subdirList, fileList in os.walk(source_dir):
if not any(ifilter(lambda fname: fnmatch(fname, fnPattern), fileList)):
print ' skipping "{}"'.format(dirName)
continue
...
You will also have to change the os.mkdir(dest) to os.makedirs(dest) in the code further down to ensure that any subdirectories skipped by earlier iterations get created when there's a need to copy files to a corresponding subbranch of the destination directory.
You could optimize things a bit by creating and saving a possibly empty iterator of matching files that have the extension, and then use it again later to to determine what files to copy:
from itertools import ifilter
from fnmatch import fnmatch
ext = '.mp3'
fnPattern = '*'+ext
for dirName, subdirList, fileList in os.walk(source_dir):
# generate list of files in directory with desired extension
matches = ifilter(lambda fname: fnmatch(fname, fnPattern), fileList)
# skip subdirectory if it does not contain any files of interest
if not matches:
continue
...
... create destination directory with os.makedirs()
...
# copy each file to destination directory
for fname in matches:
... copy file
Would shutils.copytree not do what you want in fewer lines?

Categories