Apologies in advance for the probably easy fix, I am a college student learning c++ and am using python for the first time on a personal project.
I am writing a program that extracts the title from a media file inside a directory or subdirectory, then looks to see if there are any strings that match. If there are then it compares their resolution, and deletes the lower resolution file. If they are both the same resolution, it deletes the larger file size. All of it is working, with the exception of deleting files. When I try to, it throws an error saying the files are in use. After doing some research, I learned that it was because I have the files open inside the code, preventing them from being deleted. My problem is that I don't know what variable I need to close, or the appropriate way and location to do so.
import os
import cv2
import PTN
import json
array1 = [os.path.join(r,file) for r,d,f in os.walk("E:\Python Test Environment") for file in f]
for x in range(0, len(array1)):
print(array1[x])
array2 = array1[:] #The colon tells it to directly copy rather than do a link
for x in range(0, len(array2)):
array2[x] = (json.dumps(PTN.parse(array2[x])))
array2[x] = json.loads(array2[x])['title']
head, array2[x] = os.path.split(array2[x])
del head
y = len(array2)
for x in range(0, len(array2)):
if array2[x] == "":
break
for i in range(x, y-1): #Set to x+1 so that it does not compare against the current file
i = x + 1
if array2[i] == "":
break
if array2[x] == array2[i]:
print 'Match found!'
print array1[i]
print 'Matches: '
print array1[x]
with open(array1[x]) as f: #tried to include this to prevent error, doesn't seem to stop it
capture1 = cv2.VideoCapture(array1[x]) #Open the video
ret, frame = capture1.read() #Read the first frame
resolution1 = frame.shape #Get resolution
f.close()
with open(array1[i]) as f: #tried to include this to prevent error, doesn't seem to stop it
capture2 = cv2.VideoCapture(array1[i]) #Open the video
ret, frame = capture2.read() #Read the first frame
resolution2 = frame.shape #Get resolution
f.close()
if resolution1 > resolution2:
print array1[x]
print "Is higher resolution than"
print array1[i]
print "Would delete: "
print array1[i]
os.remove(array1[i])
array1[i] = ""
array2[i] = ""
if resolution2 > resolution1:
print array1[i]
print "Is higher resolution than"
print array1[x]
print "Would delete: "
print array1[i]
os.remove(array1[x])
array1[x] = ""
array2[x] = ""
if resolution1 == resolution2:
print "equal"
if os.path.getsize(array1[x]) <= os.path.getsize(array1[i]):
print "Would delete: "
print array1[i]
os.remove(array1[i])
array1[i] = ""
array2[i] = ""
if os.path.getsize(array1[i]) < os.path.getsize(array1[x]):
print "Would delete: "
print array1[x]
os.remove(array1[x])
array1[x] = ""
array2[x] = ""
Add capture1.release() and capture2.release() to release the resources used by the VideoCapture instances
Ive created a simple dns.query function, I am attempting to add the results into a list or potentially a dictionary. However I cant work out how to achieve it, I have tried list.append(subdomain, item), ive tried using the join function and I have tried to use the update function respectably.
Any pointers would be appreciated.
ORIGINAL
def get_brutes(subdomain):
targets = []
try:
myResolver = dns.resolver.Resolver()
myResolver.nameservers = ['8.8.8.8']
myAnswers = myResolver.query(subdomain)
for item in myAnswers.rrset:
targets.append(subdomain,item)
except Exception as e:
pass
return targets
FIX
def get_brutes(subdomain):
targets = []
try:
myResolver = dns.resolver.Resolver()
myResolver.nameservers = ['8.8.8.8']
myAnswers = myResolver.query(subdomain)
for item in myAnswers.rrset:
targets.append(subdomain + ' ' + str(item))
except Exception as e:
pass
return targets
I am trying to use "directory path" and "prefirx_pattern" from config file.
I get correct results in vdir2 and vprefix2 variable but list local_file_list is still empty.
result
vdir2 is"/home/ab_meta/abfiles/"
vprefix2 is "rp_pck."
[]
code
def get_files(self):
try:
print "vdir2 is" + os.environ['dir_path']
print "vprefix2 is "+ os.environ['prefix_pattern']
local_file_list = filter(os.path.isfile, glob.glob(os.environ['dir_path'] + os.environ['prefix_pattern'] + "*"))
print local_file_list
local_file_list.sort(key=lambda s: os.path.getmtime(os.path.join(os.environ['dir_path'], s)))
except Exception, e:
print e
self.m_logger.error("Exception: Process threw an exception " + str(e))
log.sendlog("error",50)
sys.exit(1)
return local_file_list
I have tried another way as given below but again list is coming as empty.
2nd Option :
def get_config(self):
try:
v_dir_path = os.environ['dir_path']
v_mail_prefix = os.environ['mail_prefix']
self.m_dir_path = v_dir_path
self.m_prefix_pattern = v_prefix_pattern
self.m_mail_prefix = v_mail_prefix
except KeyError, key:
self.m_logger.error("ERROR: Unable to retrieve the key " + str(key))
except Exception, e:
print e
self.m_logger.error("Error: job_prefix Unable to get variables " + str(e))
sys.exit(1)
def get_files(self):
try:
local_file_list = filter(os.path.isfile, glob.glob(self.m_dir_path + self.m_prefix_pattern + "*"))
local_file_list.sort(key=lambda s: os.path.getmtime(os.path.join(os.environ['dir_path'], s)))
except Exception, e:
print e
Thanks
Sandy
Outside of this program, wherever you set the environment variables, you are setting them incorrectly. Your environment variables have quote characters in them.
Set your environment varaibles to have the path data, but no quotes.
Assign the enviornment variable and then pass the path you are interested in into the function.
Accessing global state from within your function can make it hard to follow and debug.
Use os.walk to get the list of files, it returns a tuple of the root dir, a list of dirs, and a list of files. For me its cleaner than using os.isfile to filter.
Use a list comprehension to filter the list of files returned by os.walk.
I'm presuming the prints statements are for debugging so left them out.
vdir2 = os.environ['dir_path']
vprefix2 = os.environ['prefix_pattern']
def get_files(vpath):
for root, dirs, files in os.walk(vpath):
local_file_list = [f for f in files if f.startswith(vprefix2)]
local_file_list.sort(key=lambda x: os.path.getmtime(x))
return local_file_list
import requests
import json
def decrementList(words):
for w in [words] + [words[:-x] for x in range(1,len(words))]:
url = 'http://ws.spotify.com/search/1/track.json?q='
request = requests.get(url + "%20".join(w))
json_dict = json.loads(request.content)
track_title = ' '.join(w)
for track in json_dict["tracks"]:
if track["name"].lower() == track_title.lower() and track['href']:
return "http://open.spotify.com/track/" + track["href"][14:], words[len(w):], track["href"][14:]
return "Sorry, no more track matches found!", None
if __name__ == "__main__":
message = "baby asdf".split()
size = len(message)
while message:
href, new_list, for_playlist = decrementList(message)
message = new_list
#print href
playlist = []
playlist.append(for_playlist)
print playlist
In the code above, print playlistis returning two separate lists. I realize that this is occurring because the list append happens within a while loop. How can I make these both append to the same empty list, not two separate lists?
make declaration of list and print out of while loop:
playlist = []
while message:
#....
print playlist
That's because you reassign the name playlist to an empty list before calling append():
playlist = []
If you put it before the while loop, you should get the expected result.
playlist = []
while message:
The task:
The firm I have gotten a summer-job for has an expanding test-database that consists of an increasing number of subfolders for each project, that includes everything from .jpeg files to the .xlsx's I am interested in. As I am a bit used to Python from earlier, I decided to give it a go at this task. I want to search for exceldocuments that has "test spreadsheet" as a part of its title(for example "test spreadsheet model259"). All the docs I am interested in are built the same way(weight is always "A3" etc), looking somewhat like this:
Model: 259
Lenght: meters 27
Weight: kg 2500
Speed: m/s 25
I want the user of the finished program to be able to compare results from different tests with each other using my script. This means that the script must see if there is an x-value that fits both criteria at once:
inputlength = x*length of model 259
inputweight = x*weight of model 259
The program should loop through all the files in the main folder. If such an X exists for a model, I want the program to return it to a list of fitting models. The x-value will be a variable, different for each model.
As the result I want a list of all files that fits the input, their scale(x-value) and possibly a link to the file.
For example:
Model scale Link
ModelA 21.1 link_to_fileA
ModelB 0.78 link_to_fileB
The script
The script I have tried to get to work so far is below, but if you have other suggestions of how to deal with the task I'll happily accept them. Don't be afraid to ask if I have not explained the task well enough. XLRD is already installed, and I use Eclipse as my IDE. I've been trying to get it to work in many ways now, so most of my script is purely for testing.
Edited:
#-*- coding: utf-8 -*-
#Accepts norwegian letters
import xlrd, os, fnmatch
folder = 'C:\eclipse\TST-folder'
def excelfiles(pattern):
file_list = []
for root, dirs, files in os.walk(start_dir):
for filename in files:
if fnmatch.fnmatch(filename.lower(), pattern):
if filename.endswith(".xls") or filename.endswith(".xlsx") or filename.endswith(".xlsm"):
file_list.append(os.path.join(root, filename))
return file_list
file_list = excelfiles('*tst*') # only accept docs hwom title includes tst
print excelfiles()
How come I only get one result when I am printing excelfiles() after returning the values, but when I exchange "return os.path.join(filename)" with "print os.path.join(filename)" it shows all .xls files? Does this mean that the results from the excelfiles-function is not passed on? Answered in comments
''' Inputvals '''
inputweight = int(raw_input('legg inn vekt')) #inputbox for weight
inputlength = int(raw_input('legg inn lengd')) #inputbox for lenght
inputspeed = int(raw_input('legg inn hastighet')) #inputbox for speed
'''Location of each val from the excel spreadsheet'''
def locate_vals():
val_dict = {}
for filename in file_list:
wb = xlrd.open_workbook(os.path.join(start_dir, filename))
sheet = wb.sheet_by_index(0)
weightvalue = sheet.cell_value(1, 1)
lenghtvalue = sheet.cell_value(1, 1)
speedvalue = sheet.cell_value(1, 1)
val_dict[filename] = [weightvalue, lenghtvalue, speedvalue]
return val_dict
val_dict = locate_vals()
print locate_vals()
count = 0
Any ideas of how I can read from each of the documents found by the excelfiles-function? "funcdox" does not seem to work. When I insert a print-test, for example print weightvalue after the weightvalue = sheet.cell(3,3).value function, I get no feedback at all. Errormessages without the mentioned print-test:Edited to the script above, which creates a list of the different values + minor changes that removed the errormessages
Script works well until this point
Made some minor changes to the next part. It is supposed to scale an value from the spreadsheet by multiplying it with a constant (x1). Then I want the user to be able to define another inputvalue, which in turn defines another constant(x2) to make the spreadsheetvalue fit. Eventually, these constants will be compared to find which models will actually fit for the test.
'''Calculates vals from excel from the given dimensions'''
def dimension(): # Maybe exchange exec-statement with the function itself.
if count == 0:
if inputweight != 0:
exec scale_weight()
elif inputlenght != 0:
exec scale_lenght()
elif inputspeed != 0:
exec scale_speed()
def scale_weight(x1, x2): # Repeat for each value.
for weightvalue in locate_vals():
if count == 0:
x1 * weightvalue == inputweight
count += 1
exec criteria2
return weightvalue, x1
elif count == 2:
inputweight2 = int(raw_input('Insert weight')) #inputbox for weight
x2 * weightvalue == inputweight2
return weightvalue, x2
The x1 and x2 are what I want to find with this function, so I want them to be totally "free". Is there any way I can test this function without having to insert values for x1 and x2 ?
def scale_lenght(): # Almost identical to scale_weight
return
def scale_speed(): # Almost identical to scale_weight
return
def criteria2(weight, lenght, speed):
if count == 1:
k2 = raw_input('Criteria two, write weight, length or speed.')
if k2 == weight:
count += 1
exec scale_weight
elif k2 == lenght:
count += 1
exec scale_lenght
elif k2 == speed:
count += 1
exec scale_speed
else:
return
Do you see any easier way to deal with this problem?(Hope I managed to explain it well enough. The way I have written the code so far is quite messy, but since I'm not that experienced I'll just have to make it work first, and then clean it up if I have the time.
Since probably none of the values will exactly fit for both x-constants, I thought I'd use approx_Equal to deal with it:
def approx_Equal(x1, x2, tolerance=int(raw_input('Insert tolerance for scaling difference')),err_msg='Unacceptable tolerance', verbose = True ): # Gives the approximation for how close the two values of x must be for
if x1 == x2:
x = x1+ (x2-x1)/2
return x
Eventually, I'd like a diagram of all the variables used + a link-to-file and name for each document.
No sure how I will do this, so any tips are greatly appreciated.
Thanks!
In answer to the first question "How come I only get one result when I am printing excelfiles()" this is because your return statement is within the nested loop, so the function will stop on the first iteration. I would try building up a list instead and then return this list, you could also combine this with the issue of checking the name e.g. :
import os, fnmatch
#globals
start_dir = os.getenv('md')
def excelfiles(pattern):
file_list = []
for root, dirs, files in os.walk(start_dir):
for filename in files:
if fnmatch.fnmatch(filename.lower(), pattern):
if filename.endswith(".xls") or filename.endswith(".xlsx") or filename.endswith(".xlsm"):
file_list.append(os.path.join(root, filename))
return file_list
file_list = excelfiles('*cd*')
for i in file_list: print i
Obviously, you'll need to replace the cd with your own search text, but keep the * either side and replace the start_dir with your own. I have done the match on filename.lower() and entered the search text in lower case to make the matching case in-sensitive, just remove the .lower() if you don't want this. I have also allowed for other types of Excel files.
Regarding reading data from Excel files I have done this before to create an automated way of converting basic Excel files into csv format. You are welcome to have a look at the code below and see if there is anything you can use from this. The xl_to_csv function is where the data is read from the Excel file:
import os, csv, sys, Tkinter, tkFileDialog as fd, xlrd
# stop tinker shell from opening as only needed for file dialog
root = Tkinter.Tk()
root.withdraw()
def format_date(dt):
yyyy, mm, dd = str(dt[0]), str(dt[1]), str(dt[2])
hh, mi, ss = str(dt[3]), str(dt[4]), str(dt[5])
if len(mm) == 1:
mm = '0'+mm
if len(dd) == 1:
dd = '0'+dd
if hh == '0' and mi == '0' and ss == '0':
datetime_str = dd+'/'+mm+'/'+yyyy
else:
if len(hh) == 1:
hh = '0'+hh
if len(mi) == 1:
mi = '0'+mi
if len(ss) == 1:
ss = '0'+ss
datetime_str = dd+'/'+mm+'/'+yyyy+' '+hh+':'+mi+':'+ss
return datetime_str
def xl_to_csv(in_path, out_path):
# set up vars to read file
wb = xlrd.open_workbook(in_path)
sh1 = wb.sheet_by_index(0)
row_cnt, col_cnt = sh1.nrows, sh1.ncols
# set up vars to write file
fileout = open(out_path, 'wb')
writer = csv.writer(fileout)
# iterate through rows and cols
for r in range(row_cnt):
# make list from row data
row = []
for c in range(col_cnt):
#print "...debug - sh1.cell(",r,c,").value set to:", sh1.cell(r,c).value
#print "...debug - sh1.cell(",r,c,").ctype set to:", sh1.cell(r,c).ctype
# check data type and make conversions
val = sh1.cell(r,c).value
if sh1.cell(r,c).ctype == 2: # number data type
if val == int(val):
val = int(val) # convert to int if only no decimal other than .0
#print "...debug - res 1 (float to str), val set to:", val
elif sh1.cell(r,c).ctype == 3: # date fields
dt = xlrd.xldate_as_tuple(val, 0) # date no from excel to dat obj
val = format_date(dt)
#print "...debug - res 2 (date to str), val set to:", val
elif sh1.cell(r,c).ctype == 4: # boolean data types
val = str(bool(val)) # convert 1 or 0 to bool true / false, then string
#print "...debug - res 3 (bool to str), val set to:", val
else:
val = str(val)
#print "...debug - else, val set to:", val
row.append(val)
#print ""
# write row to csv file
try:
writer.writerow(row)
except:
print '...row failed in write to file:', row
exc_type, exc_value, exc_traceback = sys.exc_info()
lines = traceback.format_exception(exc_type, exc_value, exc_traceback)
for line in lines:
print '!!', line
print 'Data written to:', out_path, '\n'
def main():
in_path, out_path = None, None
# set current working directory to user's my documents folder
os.chdir(os.path.join(os.getenv('userprofile'),'documents'))
# ask user for path to Excel file...
while not in_path:
print "Please select the excel file to read data from ..."
try:
in_path = fd.askopenfilename()
except:
print 'Error selecting file, please try again.\n'
# get dir for output...
same = raw_input("Do you want to write the output to the same directory? (Y/N): ")
if same.upper() == 'Y':
out_path = os.path.dirname(in_path)
else:
while not out_path:
print "Please select a directory to write the csv file to ..."
try:
out_path = fd.askdirectory()
except:
print 'Error selecting file, please try again.\n'
# get file name and join to dir
f_name = os.path.basename(in_path)
f_name = f_name[:f_name.find('.')]+'.csv'
out_path = os.path.join(out_path,f_name)
# get data from file and write to csv...
print 'Attempting read data from', in_path
print ' and write csv data to', out_path, '...\n'
xl_to_csv(in_path, out_path)
v_open = raw_input("Open file (Y/N):").upper()
if v_open == 'Y':
os.startfile(out_path)
sys.exit()
if __name__ == '__main__':
main()
Let me know if you have any questions on this.
Finally, regarding the output I would consider writing this out to a html file in a table format. Let me know if you want any help with this, I will have some more sample code that you could use part of.
UPDATE
Here is some further advice on writing your output to a html file. Here is a function that I have written and used previously for this purpose. Let me know if you need any guidance on what you would need to change for your implementation (if anything). The function expects a nested object in the data argument e.g. a list of lists or list of tuples etc. but should work for any number of rows / columns:
def write_html_file(path, data, heads):
html = []
tab_attr = ' border="1" cellpadding="3" style="background-color:#FAFCFF; text-align:right"'
head_attr = ' style="background-color:#C0CFE2"'
# opening lines needed for html table
try:
html.append('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" ')
html.append('"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> ')
html.append('<html xmlns="http://www.w3.org/1999/xhtml">')
html.append('<body>')
html.append(' <table'+tab_attr+'>')
except:
print 'Error setting up html heading data'
# html table headings (if required)
if headings_on:
try:
html.append(' <tr'+head_attr+'>')
for item in heads:
html.append(' '*6+'<th>'+str(item)+'</th>')
html.append(' </tr>')
except:
exc_type, exc_value, exc_traceback = sys.exc_info()
lines = traceback.format_exception(exc_type, exc_value, exc_traceback)
print 'Error writing html table headings:'
print ''.join('!! ' + line for line in lines)
# html table content
try:
for row in data:
html.append(' <tr>')
for item in row:
html.append(' '*6+'<td>'+str(item)+'</td>')
html.append(' </tr>')
except:
print 'Error writing body of html data'
# closing lines needed
try:
html.append(' </table>')
html.append('</body>')
html.append('</html>')
except:
print 'Error closing html data'
# write html data to file
fileout = open(path, 'w')
for line in html:
fileout.write(line)
print 'Data written to:', path, '\n'
if sql_path:
os.startfile(path)
else:
v_open = raw_input("Open file (Y/N):").upper()
if v_open == 'Y':
os.startfile(path)
headings_on is a global that I have set to True in my script, you will also need to import traceback for the error handling to work as it is currently specified.