Code always writes filename in the output - python

I am trying to find which files have not had a relevant file with a similar filename (almost) so that I can generate them. But this code writes all file names basically whereas I want it to go through the first directory, go through the files and check if they have their equivilent _details.txt in the other folder, if not write the name.
I have in folder 1 those two 11.avi and 22.avi and in folder two only 11_details.txt , so am sure i should get one filename as a result
import os,fnmatch
a = open("missing_detailss5.txt", "w")
for root, dirs, files in os.walk("1/"):
for file1 in files:
if file1.endswith(".dat"):
for root, dirs, files in os.walk("2/"):
print(str(os.path.splitext(file1)[0]) + "_details.txt")
print(files)
if not (os.path.splitext(file1)[0] + "_details.txt") in files:
print(str(os.path.splitext(file1)[0]) + "_details.txt is missing")
a.write(str(os.path.splitext(file1)[0]) + "_details.txt" + os.linesep)
a.close()
here is my debug >>>
11_details.txt
['22_details.txt']
11_details.txt is missing
22_details.txt
['22_details.txt']
22_details.txt is missing

I just corrected your code directly without writing new code, you just missed a txt extension on the comparaison if.
import os
a = open("missing_detailss4.txt", "w")
for root, dirs, files in os.walk("1/"):
for file in files:
if file.endswith(".avi"):
for root, dirs, files in os.walk("2/"):
if not (str(os.path.splitext(file)[0]) + "_details.txt") in files:
a.write(str(os.path.splitext(file)[0]) + "_details.txt" + os.linesep)
a.close()

If I read your question correctly, the files ending in "_details.txt" are supposed to be in the same (relative) directory. That is, "1/some/path/file.avi" should have a corresponding file "2/some/path/file_details.txt". If that's the case, you need not iterate twice:
import os
with open("missing_detailss5.txt", "w") as outfile:
path1 = '1/'
path2 = '2/'
allowed_extensions = ['.dat', '.avi']
for root, dirs, files in os.walk(path1):
for file1 in files:
file1, ext = os.path.splitext(file)
if ext not in allowed_extensions: continue
path2 = os.path.join(path2, os.path.relpath(os.path.join(root, file1 + '_details.txt'), path1))
if not os.path.exists(path2):
print(os.path.basename(path2) + ' is missing.')
outfile.write(os.path.basename(path2) + os.linesep)
If you don't care about which extensions to check for in the first folder, then delete allowed_extensions = ['.dat', '.avi'] and if ext not in allowed_extensions: continue lines, and change file1, ext = os.path.splitext(file) to file1 = os.path.splitext(file)[0].

Related

How to use the renaming function

This is the error which I get:
The system cannot find the file specified: '1.jpg' -> '0.jpg'
even through i have a file named 1.jpg in the directory.
I'm making file renaming script that renames all files in the directory given with a number that increases +1 with every file.
import os
def moving_script():
directory = input("Give the directory")
xlist = os.listdir(directory)
counter = 0
for files in xlist:
os.rename(files, str(counter)+".jpg")
counter = counter + 1
moving_script()
It should be renaming all files, to "0.jpg", "1.jpg" etc
Code:
import os
def moving_script():
directory = input("Give the directory")
xlist = os.listdir(directory)
counter = 0
for files in xlist:
os.rename(os.path.join(directory, files),
os.path.join(directory, str(counter)+".jpg"))
counter = counter + 1
if __name__ == '__main__':
moving_script()
Results:
~/Documents$ touch file0 file1 file2 file3 file4
ls ~/Documents/
file0 file1 file2 file3 file4
$ python renamer.py
Give the directory'/home/suser/Documents'
$ ls ~/Documents/
0.jpg 1.jpg 2.jpg 3.jpg 4.jpg
os.listdir() will return filenames, but will not include path. Thus when you pass files to os.rename() it's looking for it in the current working directory, not the one where they are (i.e. supplied by the user).
import os
def moving_script():
directory = input("Give the directory")
counter = -1
for file_name in os.listdir(directory):
old_name = os.path.join(directory, file_name)
ext = os.path.splitext(file_name)[-1] # get the file extension
while True:
counter += 1
new_name = os.path.join(directory, '{}{}'.format(counter, ext))
if not os.path.exists(new_name):
os.rename(old_name, new_name)
break
moving_script()
note that this code detects what the file extension is. In your code you may rename a non-jpg file with .jpg extension. To avoid this you may change os.listdir(directory) to glob.glob(os.path.join(directory, *.jpg')) and it will iterate only over '*.jpg' files. Don't forget you need to import glob and also on Linux it's case-sensitive, so '*.jpg' will not return '*.JPG' files
EDIT: code updated to check if new file name already exists.

How to search through both zipped and unzipped folders for a specific line

I'm trying to implement a Python script that takes a folder from the user (can be zipped or unzipped), and search through all the files in the folder to output the specific lines that my regular expression matches. My code below works for regular unzipped folders, but I can't figure out how to do the same with zipped folders that are inputted to function. Below are my code, thanks in advance!
def myFunction(folder_name):
path = folder_name
for (path, subdirs, files) in os.walk(path):
files = [f for f in os.listdir(path) if f.endswith('.txt') or f.endswith('.log') or f.endswith('-release') or f.endswith('.out') or f.endswith('messages') or f.endswith('.zip')] # Specify here the format of files you hope to search from (ex: ".txt" or ".log")
files.sort() # file is sorted list
files = [os.path.join(path, name) for name in files] # Joins the path and the name, so the files can be opened and scanned by the open() function
# The following for loop searches all files with the selected format
for filename in files:
#print('start parsing... ' + str(datetime.datetime.now()))
matched_line = []
try:
with open(filename, 'r', encoding = 'utf-8') as f:
f = f.readlines()
except:
with open(filename, 'r') as f:
f = f.readlines()
# print('Finished parsing... ' + str(datetime.datetime.now()))
for line in f:
#0strip out \x00 from read content, in case it's encoded differently
line = line.replace('\x00', '')
RE2 = r'^Version: \d.+\d.+\d.\w\d.+'
RE3 = r'^.+version.(\d+.\d+.\d+.\d+)'
pattern2 = re.compile('('+RE2+'|'+RE3+')', re.IGNORECASE)
for match2 in pattern2.finditer(line):
matched_line.append(line)
print(line)
#Calling the function to use it
myFunction(r"SampleZippedFolder.zip")
The try and except block of my code was my attempt to open the zipped folder and read it. I'm still not very clear with how to open the zipped folder or how it works. Please let me know how I can modify my code to make it work, much appreciated!
One possibility is first determine what object type folder_name is using zipfile and os.isdir() and whichever one succeeds, get the list of files and proceed. Maybe something like this:
import zipfile, os, re
def myFunction(folder_name):
files = None # nothing yet
path = folder_name
if zipfile.is_zipfile(path):
print('ZipFile: {}'.format(path))
f = zipfile.ZipFile(path)
files = f.namelist()
# for name in f.namelist(): # debugging
# print('file: {}'.format(name))
elif os.path.isdir(path):
print('Folder: {}'.format(path))
files = os.listdir(path)
# for name in os.listdir(path): # debugging
# print('file: {}'.format(name))
# should now have a list of files
# proceed processing the files
for filename in files:
...

How do I create a python script that sorts document after amount of characters?

I have tried to create a script that counts a number of characters in all the files in a folder, and move the files with less than 50 characters to another folder.
import os
newPath = "E:\\Sorteret 3"
for root, dirs, files in os.walk("."):
for fileName in files:
if(fileName == 'sort.py'): continue
words=line.split()
if len(words) < 50 in open(os.path.join(root, fileName), 'r', encoding="Latin-1").read():
os.rename(os.path.join(root, fileName), os.path.join(newPath, fileName))`
I get the following error:
"Name line is not defined".
You are not reading anything into line.
I would read all the contents of the file and then do all checks:
import os
newPath = "<your path>"
oldPath = "<your path>"
for root, dirs, files in os.walk(oldPath):
for fileName in files:
if(fileName == 'sort.py'):
continue
fpath = os.path.join(root, fileName)
len = os.stat(fpath).st_size
if len < 50:
os.rename(fpath, os.path.join(newPath, fileName))
Hopes this helps

Going into subfolders (python)

I've written something to remove special characters in Filenames. But it just includes the one folder and not it's subfolders. How can I do this also in subfolders and subsubfolders and so on?
import os
import re
def dir_list2(directory, *args):
fileList = []
content = os.listdir(directory)
for file in content :
dirfile = os.path.join(directory, file)
if os.path.isfile(dirfile):
if len(args) == 0:
fileList.append(dirfile)
else:
if os.path.splitext(dirfile)[1][1:] in args:
fileList.append(dirfile)
print "##################################################"
print "Old filename:", file
filename = file
remove = re.compile("[^.a-zA-z0-9_]")
output = remove.sub('_', filename)
newfile = directory + "/" + output
os.rename(dirfile, newfile)
print "Corrected filename:", output
#Removes Special Characters
return fileList
if __name__ == '__main__':
fileList = dir_list2('/path/')
Try using os.walk instead of os.listdir, it allows you to walk through a folder and its files and subfolders and so on.
Edit your code to be like:
content = os.walk(directory)
for dirpath, dirnames, filenames in content:
for file in filenames:
dirfile = os.path.join(dirpath, file)
# The rest of your code

Recursively append files to zip archive in python

In Python 2.7.4 on Windows, if I have a directory structure that follows:
test/foo/a.bak
test/foo/b.bak
test/foo/bar/c.bak
test/d.bak
And I use the following to add them to an existing archive such that 'd.bak' is at the root of the archive:
import zipfile
import os.path
import fnmatch
def find_files(directory, pattern):
for root, dirs, files in os.walk(directory):
for basename in files:
if fnmatch.fnmatch(basename, pattern):
filename = os.path.join(root, basename)
yield filename
if __name__=='__main__':
z = zipfile.ZipFile("testarch.zip", "a", zipfile.ZIP_DEFLATED)
for filename in find_files('test', '*.*'):
print 'Found file:', filename
z.write(filename, os.path.basename(filename), zipfile.ZIP_DEFLATED)
z.close()
The directory of the zip file is flat. It creates the foo/ directory only if a sub-directory exists in it (If I exclude test/foo/bar/c.bak, it does not create the directory. If it is included, foo/ is created but not foo/bar/ if that makes sense), but no sub-directories or files:
foo/
a.bak
b.bak
c.bak
d.bak
Am I missing something?
The problem is that you're explicitly asking it to flatten all the paths:
z.write(filename, os.path.basename(filename), zipfile.ZIP_DEFLATED)
If you look at the docs, the default arcname is:
the same as filename, but without a drive letter and with leading path separators removed
But you're overriding that with os.path.basename(filename). (If you don't know what basename does, it returns "the last pathname component". If you don't want just the last pathname component, don't call basename.)
If you just do z.write('test/foo/bar/c.bak'), it will create a zip entry named test/foo/bar/c.bak, but if you do z.write('test/foo/bar/c.bak', 'c.bak'), it will create a zip entry named c.bak. Since you do that for all of the entries, the whole thing ends up flattened.
I figured it out. As abarnet pointed out, I had misread the docs on zipfiles. Using the following function, I can create the correct archive name for the zip file:
def createArchName(path):
line = path
if "\\" in line:
''' windows '''
discard, val = line.split("\\", 1)
return val
else:
''' unix '''
discard, val = line.split("/", 1)
return val
For those interested, the full code is as follows:
import urllib2
import zipfile
import os.path
import fnmatch
def find_files(directory, pattern):
for root, dirs, files in os.walk(directory):
for basename in files:
if fnmatch.fnmatch(basename, pattern):
filename = os.path.join(root, basename)
yield filename
def createArchName(path):
line = path
if "\\" in line:
''' windows '''
discard, val = line.split("\\", 1)
return val
else:
''' unix '''
discard, val = line.split("/", 1)
return val
if __name__=='__main__':
if not os.path.exists("test"):
os.mkdir("test")
z = zipfile.ZipFile("testarch.zip", "a", zipfile.ZIP_DEFLATED)
for filename in find_files('test', '*.*'):
archname = createArchName(filename)
print 'Found file:', archname
z.write(filename, archname, zipfile.ZIP_DEFLATED)
z.close()

Categories