How do I convert all files in directory one by one with the code below?
This code takes all the files in a folder and converts them together, but uses up too much memory. I need to do it in the loop for each file separately.
i.e. Find file. Convert. Move. Repeat.
import os
import shutil
import glob
command = ('convert -compress LZW -alpha off -density 320 -depth 4 -
contrast-stretch 700x0 -gamma .45455 *.pdf -set filename:base "%
[basename]" +adjoin "%[filename:base].tiff"')
newpath = r'...'
new_dir = 'tiff'
if not os.path.exists(newpath):
try:
os.mkdir(new_dir)
os.system(command)
except:
print "The folder is already exist"
for file in glob.glob("*.tiff"):
try:
print('"' + file + '"' + ' has just moved to ' + '"' + new_dir + '"' + ' folder')
shutil.move(file, new_dir);
except:
print "Error"
using rename?
import os
os.mkdir("new_folder")
for file in ['file1.txt', 'file2.txt']:
os.rename(file,f'new_folder/{file}')
Related
I was advised to ask a new question linked to Python: Unicode characters in file or folder names.
The former was about reading the path from the list, this one is about walking through folders and subfolders.
Below is the extract from the code. It walks through the folders and prints some information about the files into the output, which can be later imported into Excel.
import os
from PyPDF2 import PdfFileReader
import magic
import traceback
filepath = open('F:/filepath.txt','w',encoding='utf-8')
rootdir = "F:"
for subdir, dirs, files in os.walk(rootdir):
if len(files)>0:
l = len(files)
print(subdir + os.sep + '>>>'+ str(l)) # folder (if not empty) and number of files in it
f = subdir + os.sep + ">>>>" + str(len(files))
for file in files:
try:
ff = subdir + os.sep + file
ff = os.path.abspath(ff) # workaround MAX_PATH
ff = u"\\\\?\\" + ff # workaround MAX_PATH
file_size = os.path.getsize(ff)/1000000
file_no_ext = os.path.splitext(os.path.basename(file))[0]
file_type=magic.from_file(ff)
filepath.writelines(ff + ">>>>" + file + ">>>>" + file_no_ext + ">>>>" + os.path.splitext(file)[1] + ">>>>" + str(file_size) + ">>>>" + file_type + '\n')
# Filepath Filename Extension Size
except Exception as e:
traceback.print_exc()
continue
filepath.close()
On files with extended character set in their name magic throws an exception like: "cannot open `\?\F:\Csom\303\263s\23-Dec-2020.pdf' (No such file or directory)". At the same time file path is written out as "\?\F:\Csomós\23-Dec-2020.pdf".
Q1: How to ensure magic can open the file and return its type?
Q2: How to output the file/folder name exactly as it appears?
I believe MAX_PATH has been redefined in all environments, but not bothered about leading "\?".
I am on Windows 10 and my file system is NTFS.
I want to extract all files that have the same filetype from a zip file.
I have this code:
from zipfile import ZipFile
counter = 0
with ZipFile('Video.zip', 'r') as zipObject:
listOfFileNames = zipObject.namelist()
for fileName in listOfFileNames:
if fileName.endswith('.MXF'):
zipObject.extract(fileName, 'Greenscreen')
print('File ' + str(counter) + ' extracted')
counter += 1
print('All ' + str(counter) + ' files extraced')
The problem is that the zip file also has multiple sub-folders with the required .MXF files in them.
Thus after running the script my Greenscreen folder also shows all sub-folders like this:
But i just need the files of the same file-type. So it should look like this:
I'm a begynder in python and trying to make a script that does the following:
Check number of files, if they exist in the destFile
If they all exist, exit the script (don't do anything)
If some files are missing, copy only the missing files from the srcFile to the destFile
The script that I have made is working, but the issue that I would like your help with is to make my script only copies the file/files missing and not as my script is doing now, which copies from file 1 (test1.txt) to the file missing. Example if test4.txt & test5.txt files are missing in destFile, my script will copy from test1.txt to test5.txt, in stead of only copying the two missing files test4.txt & test5.txt.
import os, shutil
from datetime import datetime
count = 0
error = "ERROR! file is missing! (files have been copied)"
sttime = datetime.now().strftime('%d/%m/%Y - %H:%M:%S - ')
os.chdir("C:\log")
log = "log.txt"
srcFile = [r"C:\srcFile\test1.txt",
r"C:\srcFile\test2.txt",
r"C:\srcFile\test3.txt",
r"C:\srcFile\test4.txt",
r"C:\srcFile\test5.txt"]
destFile = [r"C:\destFile\test1.txt",
r"C:\destFile\test2.txt",
r"C:\destFile\test3.txt",
r"C:\destFile\test4.txt",
r"C:\destFile\test5.txt"]
for file in destFile:
if not os.path.exists(file):
for file_sr in srcFile:
if not os.path.exists(file):
shutil.copy(file_sr, 'C:\destFile')
count +=1
with open(log, 'a') as logfile:
logfile.write(sttime + error + " " + str(count) + " => " + file + '\n')
The problem is that you're iterating over all of the source files whenever you detect a missing destination file: for file_sr in srcFile:. Instead, you can copy just the missing file by keeping track of the position (in the array) of the missing destination file:
for position, file in enumerate(destFile):
if not os.path.exists(file):
file_sr = srcFile[position]
if not os.path.exists(file):
shutil.copy(file_sr, 'C:\destFile')
Using your code, you can do:
import os, shutil
from datetime import datetime
count = 0
error = "ERROR! file is missing! (files have been copied)"
sttime = datetime.now().strftime('%d/%m/%Y - %H:%M:%S - ')
os.chdir("C:\log")
log = "log.txt"
srcFile = [r"C:\srcFile\test1.txt",
r"C:\srcFile\test2.txt",
r"C:\srcFile\test3.txt",
r"C:\srcFile\test4.txt",
r"C:\srcFile\test5.txt"]
destFile = [r"C:\destFile\test1.txt",
r"C:\destFile\test2.txt",
r"C:\destFile\test3.txt",
r"C:\destFile\test4.txt",
r"C:\destFile\test5.txt"]
for file in destFile:
if not os.path.exists(file):
src_file = destFile.replace("destFile","srcFile")
shutil.copy(src_file, file)
count +=1
with open(log, 'a') as logfile:
logfile.write(sttime + error + " " + str(count) + " => " + file + '\n')
Thank you for your help guys. Exactly my problem was that I was iterating over all of the source files whenever I detected a missing destination file. The following logic from mackorone is doing what I was looking for.
for position, file in enumerate(destFile):
if not os.path.exists(file):
file_sr = srcFile[position]
shutil.copy(file_sr, 'C:\destFile')
I have updated the script, so now this script compares two folders, source folder and destination folder. If destination folder is missing files from the source folder, it will be copied. The script is working fine.
import os
import shutil
from datetime import datetime
sttime = datetime.now().strftime('%d/%m/%Y - %H:%M:%S - ')
error = "ERROR! file is missing! (files have been copied)"
des_path = 'C:\des_folder'
sr_path = 'C:\sr_folder'
des_folder = os.listdir(des_path)
sr_folder = os.listdir(sr_path)
count = 0
os.chdir("C:\log")
log = "log.txt"
def compare_folder(folder1,folder2):
files_in_sr_folder = set(sr_folder) - set(des_folder)
return files_in_sr_folder
files_missing = compare_folder(sr_folder,des_folder)
if len(files_missing) != 0:
for file in files_missing:
full_path_files = os.path.join(sr_path,file)
shutil.copy(full_path_files,des_path)
count +=1
with open(log, 'a') as logfile:
logfile.write(sttime + error + " " + str(count) + " => " + file + '\n')
else:
exit
I have the following code:
Basically is to pull md5 from each file. The problem is with the files that has spaces , what would be the solution to the program can take into account those files and not skip them.
def onepath(archivo):
logging.basicConfig(filename=salida,filemode="w", format='%(message)s', level=logging.DEBUG)
for filename in (file for file in os.listdir(archivo)):
with open(filename) as checkfile:
logging.info("MD5 " + "(%s) = " % filename + hashlib.md5(checkfile.read()).hexdigest())
I was reading about the method shlex, but not sure how can I implement.
Can you help me?
I think that files with spaces are showing. I did a short snippet not im facing a problem that I cant have control in how Linux understand the spaces on the filenames in order to do as follows:
files_destino = [f for f in os.listdir(os.path.join(sys.argv[1].strip()))]
for i in files_destino:
print i
subprocess.call(["cp","-v", "%s" % i,"/tmp/"])
In the shell shows:
bash-3.2$ ./comodin.py ./espacio/
Boxx view.pdf
cp: Boxx view.pdf: No such file or directory
hola.txt
hola.txt -> /tmp/hola.txt
bash-3.2$
def onepath(archivo):
logging.basicConfig(filename=salida,filemode="w", format='%(message)s', level=logging.DEBUG)
for filename in os.listdir(archivo):
filepath = os.path.join(archivo, filename)
with open(filepath) as checkfile:
logging.info("MD5 " + "(%s) = " % filename + hashlib.md5(checkfile.read()).hexdigest())
I've already read this thread but when I implement it into my code it only works for a few iterations.
I'm using python to iterate through a directory (lets call it move directory) to copy mainly pdf files (matching a unique ID) to another directory (base directory) to the matching folder (with the corresponding unique ID). I started using shutil.copy but if there are duplicates it overwrites the existing file.
I'd like to be able to search the corresponding folder to see if the file already exists, and iteratively name it if more than one occurs.
e.g.
copy file 1234.pdf to folder in base directory 1234.
if 1234.pdf exists to name it 1234_1.pdf,
if another pdf is copied as 1234.pdf then it would be 1234_2.pdf.
Here is my code:
import arcpy
import os
import re
import sys
import traceback
import collections
import shutil
movdir = r"C:\Scans"
basedir = r"C:\Links"
try:
#Walk through all files in the directory that contains the files to copy
for root, dirs, files in os.walk(movdir):
for filename in files:
#find the name location and name of files
path = os.path.join(root, filename)
print path
#file name and extension
ARN, extension = os.path.splitext(filename)
print ARN
#Location of the corresponding folder in the new directory
link = os.path.join(basedir,ARN)
# if the folder already exists in new directory
if os.path.exists(link):
#this is the file location in the new directory
file = os.path.join(basedir, ARN, ARN)
linkfn = os.path.join(basedir, ARN, filename)
if os.path.exists(linkfn):
i = 0
#if this file already exists in the folder
print "Path exists already"
while os.path.exists(file + "_" + str(i) + extension):
i+=1
print "Already 2x exists..."
print "Renaming"
shutil.copy(path, file + "_" + str(i) + extension)
else:
shutil.copy(path, link)
print ARN + " " + "Copied"
else:
print ARN + " " + "Not Found"
Sometimes it is just easier to start over... I apologize if there is any typo, I haven't had the time to test it thoroughly.
movdir = r"C:\Scans"
basedir = r"C:\Links"
# Walk through all files in the directory that contains the files to copy
for root, dirs, files in os.walk(movdir):
for filename in files:
# I use absolute path, case you want to move several dirs.
old_name = os.path.join( os.path.abspath(root), filename )
# Separate base from extension
base, extension = os.path.splitext(filename)
# Initial new name
new_name = os.path.join(basedir, base, filename)
# If folder basedir/base does not exist... You don't want to create it?
if not os.path.exists(os.path.join(basedir, base)):
print os.path.join(basedir,base), "not found"
continue # Next filename
elif not os.path.exists(new_name): # folder exists, file does not
shutil.copy(old_name, new_name)
else: # folder exists, file exists as well
ii = 1
while True:
new_name = os.path.join(basedir,base, base + "_" + str(ii) + extension)
if not os.path.exists(new_name):
shutil.copy(old_name, new_name)
print "Copied", old_name, "as", new_name
break
ii += 1
I always use the time-stamp - so its not possible, that the file exists already:
import os
import shutil
import datetime
now = str(datetime.datetime.now())[:19]
now = now.replace(":","_")
src_dir="C:\\Users\\Asus\\Desktop\\Versand Verwaltung\\Versand.xlsx"
dst_dir="C:\\Users\\Asus\\Desktop\\Versand Verwaltung\\Versand_"+str(now)+".xlsx"
shutil.copy(src_dir,dst_dir)
For me shutil.copy is the best:
import shutil
#make a copy of the invoice to work with
src="invoice.pdf"
dst="copied_invoice.pdf"
shutil.copy(src,dst)
You can change the path of the files as you want.
I would say you have an indentation problem, at least as you wrote it here:
while not os.path.exists(file + "_" + str(i) + extension):
i+=1
print "Already 2x exists..."
print "Renaming"
shutil.copy(path, file + "_" + str(i) + extension)
should be:
while os.path.exists(file + "_" + str(i) + extension):
i+=1
print "Already 2x exists..."
print "Renaming"
shutil.copy(path, file + "_" + str(i) + extension)
Check this out, please!
import os
import shutil
import glob
src = r"C:\Source"
dest = r"C:\Destination"
par = "*"
i=1
d = []
for file in glob.glob(os.path.join(src,par)):
f = str(file).split('\\')[-1]
for n in glob.glob(os.path.join(dest,par)):
d.append(str(n).split('\\')[-1])
if f not in d:
print("copied",f," to ",dest)
shutil.copy(file,dest)
else:
f1 = str(f).split(".")
f1 = f1[0]+"_"+str(i)+"."+f1[1]
while f1 in d:
f1 = str(f).split(".")
f1 = f1[0]+"_"+str(i)+"."+f1[1]
print("{} already exists in {}".format(f1,dest))
i =i + 1
shutil.copy(file,os.path.join(dest,f1))
print("renamed and copied ",f1 ,"to",dest)
i = 1