I am working on a code that will encode my files according to the filename. If the file is named DOG, CAT, the file should be renamed to DEL/DBD plus date and number (001,002). The problem is that if I have several CAT files, I get an error as the code does not increment the number. Also, the numeration should be separate for each file type (so CAT 001, 002 and DOG 001, 002).
I've tried to increase the counter by one every time I get the FileNameError exception, but it does not seem to work. Could you please help me out?
import os
from datetime import date
def rename_files():
path = 'U:\get_filename'
cur_day = str(date.today().strftime("%Y%m%d"))
counter =+ 1
stamp_2 = "{} 00{}.pdf".format(cur_day,str(counter))
del01 = 'DEL ' + stamp_2
dbd02 = 'DBD ' + stamp_2
cgf01 = 'CGF ' + stamp_2
cle01 = 'CLE ' + stamp_2
dmm01 = 'DMM ' + stamp_2
dqt01 = 'DQT ' + stamp_2
sri01 = 'SRI ' + stamp_2
cal01 = 'CAL ' + stamp_2
for r,d,f in os.walk(path):
for file in f:
counter +=1
try:
if 'DOG' in file:
os.rename(file, del01)
elif 'CAT' in file:
os.rename(file, dbd02)
elif 'BIRD' in file:
os.rename(file, cgf01)
elif 'FISH' in file:
os.rename(file, cle01)
elif 'INSECT' in file:
os.rename(file, dmm01)
elif 'CAR' in file:
os.rename(file, dqt01)
elif 'BIKE' in file:
os.rename(file, sri01)
elif 'SCOOTER' in file:
os.rename(file, cal01)
except:
counter +=1
rename_files()
import os
from datetime import date
names = {
"DEL": 1,
"DBD": 1,
"CGF": 1,
"CLE": 1,
"DMM": 1,
"DQT": 1,
"SRI": 1,
"CAL": 1,
}
cur_day = str(date.today().strftime("%Y%m%d"))
def rename(source, name):
prefix = "{} {0:03d}.pdf".format(cur_day, names[name])
try:
os.rename(source, "{} {}".format(name, prefix))
names[name] += 1
except:
pass
def rename_files():
path = "U:\get_filename"
for r, d, f in os.walk(path):
for file in f:
if "DOG" in file:
rename(file, "DEL")
elif "CAT" in file:
rename(file, "DBD")
elif "BIRD" in file:
rename(file, "CGF")
elif "FISH" in file:
rename(file, "CLE")
elif "INSECT" in file:
rename(file, "DMM")
elif "CAR" in file:
rename(file, "DQT")
elif "BIKE" in file:
rename(file, "SRI")
elif "SCOOTER" in file:
rename(file, "CAL")
rename_files()
Great, thanks for help. Though it's not quite perfect yet, when I run the rename function, I get a - TypeError: unhashable type: 'dict'.
import os
from datetime import date
import pandas as pd
cur_day = str(date.today().strftime("%Y%m%d"))
def main():
print("Current Working Directory " , os.getcwd())
os.chdir(r'U:\\get_filename')
i = 1
j = 1
k = 1
l = 1
m = 1
n = 1
o = 1
p = 1
q = 1
r = 1
path = r'U:\get_filename'
for filename in os.listdir(r'U:\get_filename'):
my_dest = 'CLE ' + cur_day + ' 00' + str(i) + ".pdf"
my_dest2 = 'DBD ' + cur_day + ' 00' + str(j) + ".pdf"
my_dest3 = 'CGF ' + cur_day + ' 00' + str(k) + ".pdf"
my_dest4 = 'CLE ' + cur_day + ' 00' + str(l) + ".pdf"
my_dest5 = 'DMM ' + cur_day + ' 00' + str(m) + ".pdf"
my_dest6 = 'DQT ' + cur_day + ' 00' + str(n) + ".pdf"
my_dest7 = 'SRI ' + cur_day + ' 00' + str(o) + ".pdf"
my_dest8 = 'CAL ' + cur_day + ' 00' + str(p) + ".pdf"
my_dest9 = 'BIL ' + cur_day + ' 00' + str(q) + ".pdf"
my_dest10 = 'DEL ' + cur_day + ' 00' + str(r) + ".pdf"
my_source = path + filename
if 'Jerry' in filename:
os.rename(filename, my_dest)
i +=1
elif 'Bob' in filename:
os.rename(filename, my_dest2)
j +=1
elif 'Sara' in filename:
os.rename(filename, my_dest3)
k +=1
elif 'Greg' in filename:
os.rename(filename, my_dest4)
l +=1
elif 'Annie' in filename:
os.rename(filename, my_dest5)
m +=1
elif 'Beth' in filename:
os.rename(filename, my_dest6)
n +=1
elif 'Claire' in filename:
os.rename(filename, my_dest7)
o +=1
elif 'Johnny' in filename:
os.rename(filename, my_dest8)
p +=1
elif 'Bob' in filename:
os.rename(filename, my_dest9)
q +=1
elif 'Jimbo' in filename:
os.rename(filename, my_dest10)
r +=1
def exp_names():
list_files = os.listdir(r'U:\\get_filename')
list_2 = [x.strip('.pdf') for x in list_files]
list_2 = pd.DataFrame(list_2)
list_3 = list_2[list_2[0] != 'iles.xlsx']
list_3.to_excel(r'U:\get_filename\\files.xlsx', header = False, index = False)
if __name__ == "__main__":
main()
exp_names()
Related
This is my code to create a hashtag file. The issue is it does not put the # for the first hashtag and at he end it puts a double hashtag like below.
passiveincome, #onlinemarketing, #wahmlife, #cash, #entrepreneurlifestyle, #makemoneyonline, #makemoneyfast, #entrepreneurlifestyle, #mlm, #mlm
How do I get the code to remove the double output and put the # at the beginning?
import random, os, sys
basepath = os.path.dirname(sys.argv[0]) + "/"
outputpath = "C:/Users/matth/OneDrive/Desktop/Create hashtags/"
paragraphsmin = 9
paragraphsmax = 9
sentencemin = 1
sentencemax = 1
keywords = []
for line in open(basepath + "/base.txt", "r"):
keywords.append(line.replace("\n",""))
keywordlist = []
keyword = open(basepath + "/text-original.txt", "r")
for line in keyword:
keywordlist.append(line.replace("\n", "\n"))
def type(name):
value = name[random.randint(0,len(name)-1)]
return value
"""
def xyz(num):
s1 = '' + type(keywordlist).strip()
return eval('s' + str(num))
"""
def s1():
return '' + type(keywordlist).strip()
def randomSentence():
sent = eval("s" + str(random.randint(1,1)) + "()")
return sent
for keyword in keywords:
outputfile = open(outputpath + keyword.replace(" ", " ") + ".txt", "w")
outputfile.write('')
for p in range(1,random.randint(paragraphsmin,paragraphsmax) + 1):
outputfile.write('')
for s in range(1,random.randint(sentencemin,sentencemax) + 1):
sentence = randomSentence()
if str(sentence)[0] == "\"":
outputfile.write("" + str(sentence)[0] + str(sentence)[1] + str(sentence)[2:] + " ")
else:
outputfile.write("" + str(sentence)[0] + str(sentence)[1:] + ", #")
outputfile.write('')
outputfile.write(sentence.replace("", "") + "")
outputfile.close()
Try replacing
outputfile.write("" + str(sentence)[0] + str(sentence)[1:] + ", #")
with
outputfile.write("#" + str(sentence)[0] + str(sentence)[1:] + ", ")
I have 30911 html files. I need to do webscraping and then save the info into a txt file named index.txt.
It should look like
filename1, title, t1, date, p1
filename2, title, t1, date, p1
filename3, title, t1, date, p2
and so on...
I only want filename, but output gave me path+filename.
Your problem is that filename is filepath in reality, in order to get the filename you could use os module
os.path.basename('filepath')
so in order to write to the file:
indexFile.write(os.path.basename(filename)+ ', ' + title.get_text(strip=True) + ', '+ ticker.get_text(strip=True) + ', ' + d_date.get_text(strip=True) + ', ' + parti_names + '\n')
You can use:
path = 'C:/Users/.../.../output/'
#read html files
for filename in glob.glob(os.path.join(path, '*.html')):
soup = bs4.BeautifulSoup(open(filename).read(), "lxml")
title = soup.find('h1')
ticker = soup.find('p')
d_date = soup.find_all('div', {"id": "a-body"})[0].find_all("p")[2]
try:
def find_participant(tag):
return tag.name == 'p' and tag.find("strong", text=re.compile(r"Executives|Corporate Participants"))
participants = soup.find(find_participant)
parti_names = ""
for parti in participants.find_next_siblings("p"):
if parti.find("strong", text=re.compile(r"(Operator)")):
break
parti_names += parti.get_text(strip=True) + ","
except:
indexFile = open('C:/Users/.../output1/' + 'index.txt', 'a+')
indexFile.write(filename + ', ' + title.get_text(strip=True) + ', '+ ticker.get_text(strip=True) + ', ' + d_date.get_text(strip=True) + ', ' + 'No participants' + '\n')
else:
participants = soup.find(find_participant)
parti_names = ""
for parti in participants.find_next_siblings("p"):
if parti.find("strong", text=re.compile(r"(Operator)")):
break
parti_names += parti.get_text(strip=True) + ","
indexFile = open('C:/Users/.../output1/' + 'index.txt', 'a+')
indexFile.write(os.path.basename(filename) + ', ' + title.get_text(strip=True) + ', '+ ticker.get_text(strip=True) + ', ' + d_date.get_text(strip=True) + ', ' + parti_names + '\n')
indexFile.close()
ntpath is another module used to get base name from path.
>>> import ntpath
>>> ntpath.basename('C:/Users/.../output1/' + 'index.txt')
'index.txt'
Writing a script to help with data migration in renaming images. It seems as though when I try to access the variable filename from within the inner-for-loop, it's just printing .DS_Store
See commented lines for example:
#!/usr/bin/env python
import os
import csv
FILE_PATH = '/Users/admin/Desktop/data-migration/images/product'
COUNT = 0
with open('paths_formatted.csv') as csvfile:
reader = csv.reader(csvfile)
# Walk the tree.
for root, directories, files in os.walk(FILE_PATH):
for filename in files:
# Join the two strings in order to form the full filepath.
filePath = os.path.join(root, filename)
#print(filePath) - this results in the actual file path
for row in reader:
#print(filePath) - this results in .DS_Store
oldFilePath = row[1].strip()
displayName = row[0].strip()
colour = row[2].strip()
if " " in colour:
colour = colour.replace(" ", "-")
slashIndex = oldFilePath.rfind("/")
oldFileName = oldFilePath[slashIndex+1:]
if oldFileName == filename:
number = 1;
newFileName = displayName + "_" + colour + "-" + str(number) + ".jpg"
while os.path.exists(FILE_PATH + leadingPath + newFileName):
number = number + 1
newFileName = filePath, displayName + "_" + colour + "-" + str(number)
os.rename(newFileName)
COUNT = COUNT+1
print(COUNT)
Why would this be?
After changing my code as per the comments, to store the results in a list, now the for root, directories, files in os.walk(FILE_PATH): is not being executed.
I verified that the FILE_PATH exists and printed it to console, also that it has contents.
My new code is as follows:
#!/usr/bin/env python
import os
import csv
FILE_PATH = '/Users/admin/Desktop/data-migration/images/product'
COUNT = 0
productInfo = []
with open('paths_formatted.csv') as csvfile:
reader = csv.reader(csvfile)
for row in reader:
productInfo.append(row)
for root, directories, files in os.walk(FILE_PATH):
for filename in files:
for info in productInfo:
displayName = info[0]
oldFilePath = info[1]
colour = info[2]
slashIndex = oldFilePath.rfind("/")
oldFileName = oldFilePath[slashIndex+1:]
if " " in colour:
colour = colour.replace(" ", "-")
if oldFileName == filename:
number = 1;
newFileName = displayName + "_" + colour + "-" + str(number) + ".jpg"
while os.path.exists(FILE_PATH + leadingPath + newFileName):
number = number + 1
newFileName = filePath, displayName + "_" + colour + "-" + str(number) + ".jpg"
os.rename(newFileName)
COUNT = COUNT + 1
print(COUNT)
I try to create a program which can recursively traverse multiple directories and print the file listing in hieararchical way like :
Folder
----x.c
----x.bin
----Folder
---------x.c
I try to do with program like (with file/folders detail) :
#!/usr/bin/python
import os
for item in os.listdir(".") :
if os.path.isdir(item) :
print "-" + item + '\t' + str(os.stat(item).st_size) + "kb" + '\t' + str(os.stat(item).st_atime)
elif os.path.isfile(item) :
print item + '\t' + str(os.stat(item).st_size) + "kb" + '\t' + str(os.stat(item).st_atime)
else :
print "Nothing \n"
But i can't go in any directory i try with like (A is a directory here) :
#!/usr/bin/python
import os
for item in os.listdir(".") :
if os.path.isdir(item) :
print "-" + item + '\t' + str(os.stat(item).st_size) + "kb" + '\t' + str(os.stat(item).st_atime)
elif os.path.isfile(item):
print item + '\t' + str(os.stat(item).st_size) + "kb" + '\t' + str(os.stat(item).st_atime)
else :
print "Nothing"
for item in os.listdir("A") :
if os.path.isdir("A") :
print "-" + item + '\t' + str(os.stat(item).st_size) + "kb" + '\t' + str(os.stat(item).st_atime)
elif os.path.isfile("A") :
print "--" + item + '\t' + str(os.stat(item).st_size) + "kb" + '\t' + str(os.stat(item).st_atime)
else :
print "Nothing"
The listing is wrong i don't understand why i can't just go from . to A and how to do it .And worst if i go on B (the second folder here):
#!/usr/bin/python
import os
for item in os.listdir(".") :
if os.path.isdir(item) :
print "-" + item + '\t' + str(os.stat(item).st_size) + "kb" + '\t' + str(os.stat(item).st_atime)
elif os.path.isfile(item):
print item + '\t' + str(os.stat(item).st_size) + "kb" + '\t' + str(os.stat(item).st_atime)
else :
print "dunno"
for item in os.listdir("A") :
if os.path.isdir("A") :
print "-" + item + '\t' + str(os.stat(item).st_size) + "kb" + '\t' + str(os.stat(item).st_atime)
elif os.path.isfile("A") :
print "--" + item + '\t' + str(os.stat(item).st_size) + "kb" + '\t' + str(os.stat(item).st_atime)
else :
print "lulz"
for item in os.listdir("A/B") :
if os.path.isfile("A/B") :
print "---" + item + '\t' + str(os.stat(item).st_size) + "kb" + '\t' + str(os.stat(item).st_atime)
else :
print 'Nothing'
I think you want to use os.walk
for (cur, dirs, files) in os.walk('.'):
pass
This will give you the current directory, a list of directories in the current directory and a list of files in the current directory.
I think you want something like
for (cur, dirs, files) in os.walk('.'):
depth = len(cur.split('/'))
print "--" * depth, cur
for fname in files:
print "--" * (depth + 1), fname
Borrowed a bit from this answer: List directory tree structure using Python
import os
def list_files(path, spaceChar=' ', spaceWidth=4):
for root, dirs, files in os.walk(path):
level = root.replace(path, '').count(os.sep)
indent = spaceChar * (spaceWidth * level)
print('{}{}/'.format(indent, os.path.basename(root)))
subindent = spaceChar * spaceWidth * (level + 1)
for f in files:
print('{}{}'.format(subindent, f))
list_files(".", "-", 3)
I am writing a program to perform file integrity checks of files in a directory. There are 3 nested loops in the code. When I run the code, the first two loops work great but the third loop does not run more than once.
import hashlib
import logging as log
import optparse
import os
import re
import sys
import glob
import shutil
def md5(fileName):
"""Compute md5 hash of the specified file"""
try:
fileHandle = open(fileName, "rb")
except IOError:
return
m5Hash = hashlib.md5()
while True:
data = fileHandle.read(8192)
if not data:
break
m5Hash.update(data)
fileHandle.close()
return m5Hash.hexdigest()
req = open("requested.txt")
for reqline in req:
reqName = reqline[reqline.rfind('/') + 1:len(reqline) - 1]
reqDir = reqline[0:reqline.rfind('/') + 1]
ezfimlog = open("ezfimlog.txt", 'a')
actFile = open("activefile.txt")
tempFile = open("activetemp.txt", 'w')
for name in glob.glob(reqDir + reqName):
fileHash = md5(name)
actInt = 0
if fileHash != None:
print fileHash
for actLine in actFile:
actNameDir = actLine[0:actLine.rfind(' : ')]
actHash = actLine[actLine.rfind(' : ') + 3:len(actLine) -1]
print (name + " " + actHash + " " + fileHash)
if actNameDir == name and actHash == fileHash:
tempFile.write(name + " : " + fileHash + "\n")
actInt = 1
if actNameDir == name and actHash != fileHash:
tempFile.write(name + " : " + actHash + "\n")
actInt = 1
ezfimlog.write("EzFIM Log: The file " + name + " was modified: " + actHash + "\n")
if actInt == 0:
ezfimlog.write("EzFIM Log: The file " + name + " was created: " + fileHash + "\n")
tempFile.write(name + " : " + fileHash + "\n")
shutil.copyfile("activetemp.txt", "activefile.txt")
You open actFile once and then try to read it many times. You'll need to open it each time you want to read it.
Move this line:
actFile = open("activefile.txt")
to just before this line:
for actLine in actFile: