Closing Python files while using cv2 - python

Apologies in advance for the probably easy fix, I am a college student learning c++ and am using python for the first time on a personal project.
I am writing a program that extracts the title from a media file inside a directory or subdirectory, then looks to see if there are any strings that match. If there are then it compares their resolution, and deletes the lower resolution file. If they are both the same resolution, it deletes the larger file size. All of it is working, with the exception of deleting files. When I try to, it throws an error saying the files are in use. After doing some research, I learned that it was because I have the files open inside the code, preventing them from being deleted. My problem is that I don't know what variable I need to close, or the appropriate way and location to do so.
import os
import cv2
import PTN
import json
array1 = [os.path.join(r,file) for r,d,f in os.walk("E:\Python Test Environment") for file in f]
for x in range(0, len(array1)):
print(array1[x])
array2 = array1[:] #The colon tells it to directly copy rather than do a link
for x in range(0, len(array2)):
array2[x] = (json.dumps(PTN.parse(array2[x])))
array2[x] = json.loads(array2[x])['title']
head, array2[x] = os.path.split(array2[x])
del head
y = len(array2)
for x in range(0, len(array2)):
if array2[x] == "":
break
for i in range(x, y-1): #Set to x+1 so that it does not compare against the current file
i = x + 1
if array2[i] == "":
break
if array2[x] == array2[i]:
print 'Match found!'
print array1[i]
print 'Matches: '
print array1[x]
with open(array1[x]) as f: #tried to include this to prevent error, doesn't seem to stop it
capture1 = cv2.VideoCapture(array1[x]) #Open the video
ret, frame = capture1.read() #Read the first frame
resolution1 = frame.shape #Get resolution
f.close()
with open(array1[i]) as f: #tried to include this to prevent error, doesn't seem to stop it
capture2 = cv2.VideoCapture(array1[i]) #Open the video
ret, frame = capture2.read() #Read the first frame
resolution2 = frame.shape #Get resolution
f.close()
if resolution1 > resolution2:
print array1[x]
print "Is higher resolution than"
print array1[i]
print "Would delete: "
print array1[i]
os.remove(array1[i])
array1[i] = ""
array2[i] = ""
if resolution2 > resolution1:
print array1[i]
print "Is higher resolution than"
print array1[x]
print "Would delete: "
print array1[i]
os.remove(array1[x])
array1[x] = ""
array2[x] = ""
if resolution1 == resolution2:
print "equal"
if os.path.getsize(array1[x]) <= os.path.getsize(array1[i]):
print "Would delete: "
print array1[i]
os.remove(array1[i])
array1[i] = ""
array2[i] = ""
if os.path.getsize(array1[i]) < os.path.getsize(array1[x]):
print "Would delete: "
print array1[x]
os.remove(array1[x])
array1[x] = ""
array2[x] = ""

Add capture1.release() and capture2.release() to release the resources used by the VideoCapture instances

Related

Game of Life with file read for world generation - string index out of range issue

Like the title says, I've being working on a variant of Conway's Game of Life in python that can read a "world" from a file and generate the starting world from that. However, in my code, I'm being given a "string index out of range" issue at the line while(aLine[currentCharacter] != NEWLINE), and I can't figure out why.
If it helps, in the input files " " is treated as a dead cell, and "*" is treated as a living cell.
Thank you for the help and please let me know if there's any additional info I should provide
def fileReadWorld():
fileOK = False
world = []
row = 0
column = 0
while (fileOK == False):
try:
filename = input("Name of input file: ")
inputfile = open(filename,"r")
fileOK = True
aLine = inputfile.readline()
if(aLine == ""):
print("The file %s" %(filename), "is empty.")
fileOK = False
else:
aLine = inputfile.readline()
row = 0
while(aLine != ""):
currentCharacter = 0
world.append([])
while(aLine[currentCharacter] != "\n"):
world[row].append(aLine[currentCharacter])
currentCharacter = currentCharacter + 1
row = row + 1
aLine = inputfile.readline()
inputfile.close()
maxRows = row
maxColumns = len(world[0])
return(world, maxRows, maxColumns)
except IOError:
print("Problem reading from file %s" %(filename))
fileOK = False
The input file I am using is
*
*
***
(it should display as a 10x10 grid)
Consider this simpler solution with the same result:
def main():
world = []
while True:
try:
filename = input("Name of input file: ")
for aLine in open(filename,"r")
world.append(list(aLine[:-1]))
if world:
maxRows = len(world)
maxColumns = len(world[0])
return world, maxRows, maxColumns
print("The file %s" %(filename), "is empty.")
except IOError:
print("Problem reading from file %s" %filename)
print(main())

Python coding for opening and saving data to a file

I am having an issue getting the train function to work correctly in python. I can not modify the def function. I am at the point where I need to get the second file to read lines one at a time for PosList and i need to match the value of movieWordCount[z] in OpenPos. If the file is there, then I am good to incrment column 2 by one of t hat line (segmented by a space). If it is not, then I need the else to append it to the file end. It does not work. It does not append the values if it is missing and I am not sure if it will find the value if it is there. I have been stuck getting thsi to work for two days.
Here is my code segment I am working with:
with open("PosList") as OpenPos:
lines = OpenPos.readlines()
print lines
if movieWordCount[z] in lines:
print "found"
#Now use tokenize to split it apart by space and set to new array for me to call column2
else:
print "not found"
lines.append(movieWordCount[z] + " 1" + "\n")
Here is my full code:
#!/usr/bin/python
#Import Counter
import collections
from collections import Counter
#Was already here but pickle is used for data input and export
import math, os, pickle, re
class Bayes_Classifier:
def __init__(self, trainDirectory = "movie_reviews/"):
#If file listing exists skip to train
if os.path.isfile('iFileList'):
print "file found"
self.train()
#self.classify()
#If file listing does not exist skip to train
if not os.path.isfile('iFileList'):
print "no file"
newfile = 'iFileList'
tempList = set()
subDir = './movie_reviews'
for filenames in os.listdir(subDir):
my_sub_path = os.path.join(os.sep,subDir,filenames)
tempList.add(filenames)
self.save("filenames", "try3")
f = []
for fFileObj in os.walk("movie_reviews/"):
f.extend(fFileObj)
break
pickle.dump(f, open( "save.p", "wb" ))
self.save(f, "try4")
with open(newfile, 'wb') as fi:
pickle.dump(tempList, fi)
#print tempList
self.train()
#self.classify()
def train(self):
'''Trains the Naive Bayes Sentiment Classifier.'''
print "File ready for training"
#Open iFileList to use as input for opening movie files
x = 0
OpenIFileList = open('iFileList','r')
print "iFileList now Open"
#Loop through the file
for line in OpenIFileList:
#print "Ready to read lines"
#print "reading line " + line
if x > 4:
if x % 2 == 0:
#print line
s = line
if '-' in s:
comp = s.split("'")
#print comp[2]
print comp[1] #This is What you need for t he movie file
compValue1 = comp[1]
#Determine Positive/Negative.
#compType is the variable I am storing it to.
compType = compValue1.split("-",2)[1]
#print compType #Prints that middle value like 5 or 1
# This will do the work based on the value.
if compType == '5':
# print "you have a five" #Confirms the loop I am in.
#If file does not exists create it
if not os.path.exists('PosList'):
print "no file"
file('PosList', 'w').close()
#Open file that needs to be reviewed for word count
compValue2 = "movie_reviews/" + compValue1
print compValue2 #Prints the directory and file path
OpenMovieList = open(compValue2,'r')
for commentLine in OpenMovieList:
commentPositive = commentLine.split(" ")
commentPositiveCounter = Counter(commentPositive)
#print commentPositiveCounter # " Comment Pos goes here"
#if commentLine != '' or commentLine != ' ':
#Get first word, second word, ....
if commentLine and (not commentLine.isspace()):
movieWordCount = self.tokenize(commentLine)
y = len(movieWordCount) #determines length of string
print y
z = 0
#print movieWordCount[0] # Shows the zero position in the file.
while z < y:
print "position " + str(z) + " word is " + movieWordCount[z] # Shows the word we are at and position id
with open("PosList") as OpenPos:
lines = OpenPos.readlines()
print lines
if movieWordCount[z] in lines:
print "found"
else:
print "not found"
lines.append(movieWordCount)
z = z + 1
#Close the files
OpenMovieList.close()
OpenPos.close()
x += 1
#for line2 in OpenIFileList.readlines():
#for line in open('myfile','r').readlines():
#do_something(line)
#Save results
#Close the File List
OpenIFileList.close()
def loadFile(self, sFilename):
'''Given a file name, return the contents of the file as a string.'''
f = open(sFilename, "r")
sTxt = f.read()
f.close()
return sTxt
def save(self, dObj, sFilename):
'''Given an object and a file name, write the object to the file using pickle.'''
f = open(sFilename, "w")
p = pickle.Pickler(f)
p.dump(dObj)
f.close()
def load(self, sFilename):
'''Given a file name, load and return the object stored in the file.'''
f = open(sFilename, "r")
u = pickle.Unpickler(f)
dObj = u.load()
f.close()
return dObj
def tokenize(self, sText):
'''Given a string of text sText, returns a list of the individual tokens that
occur in that string (in order).'''
lTokens = []
sToken = ""
for c in sText:
if re.match("[a-zA-Z0-9]", str(c)) != None or c == "\'" or c == "_" or c == '-':
sToken += c
else:
if sToken != "":
lTokens.append(sToken)
sToken = ""
if c.strip() != "":
lTokens.append(str(c.strip()))
if sToken != "":
lTokens.append(sToken)
return lTokens
To open a file for writing, you can use
with open('PosList', 'w') as Open_Pos
As you are using the with form, you do not need to close the file; Python will do that for you at the end of the with-block.
So assuming that the way you add data to the lines variable is correct, you could remove the superfluous code OpenMovieList.close() and OpenPos.close(), and append 2 lines to your code:
with open("PosList") as OpenPos:
lines = OpenPos.readlines()
print lines
if movieWordCount[z] in lines:
print "found"
else:
print "not found"
lines.append(movieWordCount)
with open("PosList", "w") as OpenPos:
OpenPos.write(lines)

OpenCV Python Unsupported response type

I am currently writing a Bag Of Words analyser using OpenCV and Python. I have been able to extract the info from the images, learn but the problem is the training part. As a baseline, I am using this and translating it into Python, but I get to the learn part and when it should train the classifier it fails saying that it has an unsupported response type (I assume labels is not in the correct format). I have tried a bit of everything but I cannot get it to work. Any ideas?
import cv2
import os
import numpy as np
dictSize = 1000
retries = 1
flags = cv2.KMEANS_PP_CENTERS
tc = (cv2.TERM_CRITERIA_MAX_ITER, 10, 0.001)
matcher = cv2.DescriptorMatcher_create("FlannBased")
extractor = cv2.DescriptorExtractor_create("SURF")
detector = cv2.FeatureDetector_create("SURF")
bowTrainer = cv2.BOWKMeansTrainer(dictSize,tc,retries,flags)
bowDE = cv2.BOWImgDescriptorExtractor(extractor,matcher)
def extractTrainingVocabulary(path):
global bowTrainer
global extractor
lst=os.listdir(path)
for i in range(0,len(lst)):
if lst[i][0] != ".":
fullPath = path + lst[i]
print "Processing Image " + fullPath
img = cv2.imread(fullPath)
if not (len(img) == 0):
keypoints = detector.detect(img)
if (len(keypoints) == 0):
print "Warning! Could not find any keypoints in image " + fullPath
else:
# Returns 2 vars. The underscore is used to discard the first one
_,features = extractor.compute(img,keypoints)
bowTrainer.add(features)
else:
print "Could not read image " + fullPath
def extractBOWDescriptor(path, descriptors, labels):
global bowTrainer
global extractor
lst=os.listdir(path)
for i in range(0,len(lst)):
if lst[i][0] != ".":
fullPath = path + lst[i]
print "Processing Image " + fullPath
img = cv2.imread(fullPath)
if not (len(img) == 0):
keypoints = detector.detect(img)
if (len(keypoints) == 0):
print "Warning! Could not find any keypoints in image " + fullPath
else:
bowDescriptor = bowDE.compute(img,keypoints)
# descriptors.append(bowDescriptor)
#np.vstack((descriptors,bowDescriptor))
descriptors = np.vstack((descriptors,bowDescriptor))
#labels.append(lst[i][:-4])
labels = np.vstack((labels,float(lst[i][:-4])))
else:
print "Could not read image " + fullPath
return labels, descriptors
def main():
global bowDE
# LEARN
print "Creating Dict..."
extractTrainingVocabulary("./testImages/")
descriptors = bowTrainer.getDescriptors()
print "Clustering " + str(len(descriptors)) + " features. This might take a while..."
dictionary = bowTrainer.cluster()
print "Done clustering"
# EXTRACT
size1 = 0,dictSize
trainingData = np.zeros(size1,dtype=np.float32)
size2 = 0,1
labels = np.zeros(size2,dtype=np.float32)
bowDE.setVocabulary(dictionary)
labels,trainingData = extractBOWDescriptor("./evalImages/",trainingData,labels)
print(trainingData)
print(labels)
print "Training classifier"
size3 = len(trainingData),len(trainingData[0])
responseData = np.zeros(size3,dtype=np.float32)
classifier = cv2.NormalBayesClassifier()
classifier.train(trainingData,labels)
main()
EDIT
as per #berak's suggestion, I changed the following:
labels = np.vstack((labels,float(lst[i][:-4]))) -> labels = np.vstack((labels,int(lst[i][:-4])))
labels = np.zeros(size2,dtype=np.float32) -> labels = np.zeros(size2,dtype=np.int32)
Unfortunately still fails. Now I get the following:
error: (-5) There is only a single class in function cvPreprocessCategoricalResponses

Search through directories for specific Excel files and compare data from these files with inputvalues

The task:
The firm I have gotten a summer-job for has an expanding test-database that consists of an increasing number of subfolders for each project, that includes everything from .jpeg files to the .xlsx's I am interested in. As I am a bit used to Python from earlier, I decided to give it a go at this task. I want to search for exceldocuments that has "test spreadsheet" as a part of its title(for example "test spreadsheet model259"). All the docs I am interested in are built the same way(weight is always "A3" etc), looking somewhat like this:
Model: 259
Lenght: meters 27
Weight: kg 2500
Speed: m/s 25
I want the user of the finished program to be able to compare results from different tests with each other using my script. This means that the script must see if there is an x-value that fits both criteria at once:
inputlength = x*length of model 259
inputweight = x*weight of model 259
The program should loop through all the files in the main folder. If such an X exists for a model, I want the program to return it to a list of fitting models. The x-value will be a variable, different for each model.
As the result I want a list of all files that fits the input, their scale(x-value) and possibly a link to the file.
For example:
Model scale Link
ModelA 21.1 link_to_fileA
ModelB 0.78 link_to_fileB
The script
The script I have tried to get to work so far is below, but if you have other suggestions of how to deal with the task I'll happily accept them. Don't be afraid to ask if I have not explained the task well enough. XLRD is already installed, and I use Eclipse as my IDE. I've been trying to get it to work in many ways now, so most of my script is purely for testing.
Edited:
#-*- coding: utf-8 -*-
#Accepts norwegian letters
import xlrd, os, fnmatch
folder = 'C:\eclipse\TST-folder'
def excelfiles(pattern):
file_list = []
for root, dirs, files in os.walk(start_dir):
for filename in files:
if fnmatch.fnmatch(filename.lower(), pattern):
if filename.endswith(".xls") or filename.endswith(".xlsx") or filename.endswith(".xlsm"):
file_list.append(os.path.join(root, filename))
return file_list
file_list = excelfiles('*tst*') # only accept docs hwom title includes tst
print excelfiles()
How come I only get one result when I am printing excelfiles() after returning the values, but when I exchange "return os.path.join(filename)" with "print os.path.join(filename)" it shows all .xls files? Does this mean that the results from the excelfiles-function is not passed on? Answered in comments
''' Inputvals '''
inputweight = int(raw_input('legg inn vekt')) #inputbox for weight
inputlength = int(raw_input('legg inn lengd')) #inputbox for lenght
inputspeed = int(raw_input('legg inn hastighet')) #inputbox for speed
'''Location of each val from the excel spreadsheet'''
def locate_vals():
val_dict = {}
for filename in file_list:
wb = xlrd.open_workbook(os.path.join(start_dir, filename))
sheet = wb.sheet_by_index(0)
weightvalue = sheet.cell_value(1, 1)
lenghtvalue = sheet.cell_value(1, 1)
speedvalue = sheet.cell_value(1, 1)
val_dict[filename] = [weightvalue, lenghtvalue, speedvalue]
return val_dict
val_dict = locate_vals()
print locate_vals()
count = 0
Any ideas of how I can read from each of the documents found by the excelfiles-function? "funcdox" does not seem to work. When I insert a print-test, for example print weightvalue after the weightvalue = sheet.cell(3,3).value function, I get no feedback at all. Errormessages without the mentioned print-test:Edited to the script above, which creates a list of the different values + minor changes that removed the errormessages
Script works well until this point
Made some minor changes to the next part. It is supposed to scale an value from the spreadsheet by multiplying it with a constant (x1). Then I want the user to be able to define another inputvalue, which in turn defines another constant(x2) to make the spreadsheetvalue fit. Eventually, these constants will be compared to find which models will actually fit for the test.
'''Calculates vals from excel from the given dimensions'''
def dimension(): # Maybe exchange exec-statement with the function itself.
if count == 0:
if inputweight != 0:
exec scale_weight()
elif inputlenght != 0:
exec scale_lenght()
elif inputspeed != 0:
exec scale_speed()
def scale_weight(x1, x2): # Repeat for each value.
for weightvalue in locate_vals():
if count == 0:
x1 * weightvalue == inputweight
count += 1
exec criteria2
return weightvalue, x1
elif count == 2:
inputweight2 = int(raw_input('Insert weight')) #inputbox for weight
x2 * weightvalue == inputweight2
return weightvalue, x2
The x1 and x2 are what I want to find with this function, so I want them to be totally "free". Is there any way I can test this function without having to insert values for x1 and x2 ?
def scale_lenght(): # Almost identical to scale_weight
return
def scale_speed(): # Almost identical to scale_weight
return
def criteria2(weight, lenght, speed):
if count == 1:
k2 = raw_input('Criteria two, write weight, length or speed.')
if k2 == weight:
count += 1
exec scale_weight
elif k2 == lenght:
count += 1
exec scale_lenght
elif k2 == speed:
count += 1
exec scale_speed
else:
return
Do you see any easier way to deal with this problem?(Hope I managed to explain it well enough. The way I have written the code so far is quite messy, but since I'm not that experienced I'll just have to make it work first, and then clean it up if I have the time.
Since probably none of the values will exactly fit for both x-constants, I thought I'd use approx_Equal to deal with it:
def approx_Equal(x1, x2, tolerance=int(raw_input('Insert tolerance for scaling difference')),err_msg='Unacceptable tolerance', verbose = True ): # Gives the approximation for how close the two values of x must be for
if x1 == x2:
x = x1+ (x2-x1)/2
return x
Eventually, I'd like a diagram of all the variables used + a link-to-file and name for each document.
No sure how I will do this, so any tips are greatly appreciated.
Thanks!
In answer to the first question "How come I only get one result when I am printing excelfiles()" this is because your return statement is within the nested loop, so the function will stop on the first iteration. I would try building up a list instead and then return this list, you could also combine this with the issue of checking the name e.g. :
import os, fnmatch
#globals
start_dir = os.getenv('md')
def excelfiles(pattern):
file_list = []
for root, dirs, files in os.walk(start_dir):
for filename in files:
if fnmatch.fnmatch(filename.lower(), pattern):
if filename.endswith(".xls") or filename.endswith(".xlsx") or filename.endswith(".xlsm"):
file_list.append(os.path.join(root, filename))
return file_list
file_list = excelfiles('*cd*')
for i in file_list: print i
Obviously, you'll need to replace the cd with your own search text, but keep the * either side and replace the start_dir with your own. I have done the match on filename.lower() and entered the search text in lower case to make the matching case in-sensitive, just remove the .lower() if you don't want this. I have also allowed for other types of Excel files.
Regarding reading data from Excel files I have done this before to create an automated way of converting basic Excel files into csv format. You are welcome to have a look at the code below and see if there is anything you can use from this. The xl_to_csv function is where the data is read from the Excel file:
import os, csv, sys, Tkinter, tkFileDialog as fd, xlrd
# stop tinker shell from opening as only needed for file dialog
root = Tkinter.Tk()
root.withdraw()
def format_date(dt):
yyyy, mm, dd = str(dt[0]), str(dt[1]), str(dt[2])
hh, mi, ss = str(dt[3]), str(dt[4]), str(dt[5])
if len(mm) == 1:
mm = '0'+mm
if len(dd) == 1:
dd = '0'+dd
if hh == '0' and mi == '0' and ss == '0':
datetime_str = dd+'/'+mm+'/'+yyyy
else:
if len(hh) == 1:
hh = '0'+hh
if len(mi) == 1:
mi = '0'+mi
if len(ss) == 1:
ss = '0'+ss
datetime_str = dd+'/'+mm+'/'+yyyy+' '+hh+':'+mi+':'+ss
return datetime_str
def xl_to_csv(in_path, out_path):
# set up vars to read file
wb = xlrd.open_workbook(in_path)
sh1 = wb.sheet_by_index(0)
row_cnt, col_cnt = sh1.nrows, sh1.ncols
# set up vars to write file
fileout = open(out_path, 'wb')
writer = csv.writer(fileout)
# iterate through rows and cols
for r in range(row_cnt):
# make list from row data
row = []
for c in range(col_cnt):
#print "...debug - sh1.cell(",r,c,").value set to:", sh1.cell(r,c).value
#print "...debug - sh1.cell(",r,c,").ctype set to:", sh1.cell(r,c).ctype
# check data type and make conversions
val = sh1.cell(r,c).value
if sh1.cell(r,c).ctype == 2: # number data type
if val == int(val):
val = int(val) # convert to int if only no decimal other than .0
#print "...debug - res 1 (float to str), val set to:", val
elif sh1.cell(r,c).ctype == 3: # date fields
dt = xlrd.xldate_as_tuple(val, 0) # date no from excel to dat obj
val = format_date(dt)
#print "...debug - res 2 (date to str), val set to:", val
elif sh1.cell(r,c).ctype == 4: # boolean data types
val = str(bool(val)) # convert 1 or 0 to bool true / false, then string
#print "...debug - res 3 (bool to str), val set to:", val
else:
val = str(val)
#print "...debug - else, val set to:", val
row.append(val)
#print ""
# write row to csv file
try:
writer.writerow(row)
except:
print '...row failed in write to file:', row
exc_type, exc_value, exc_traceback = sys.exc_info()
lines = traceback.format_exception(exc_type, exc_value, exc_traceback)
for line in lines:
print '!!', line
print 'Data written to:', out_path, '\n'
def main():
in_path, out_path = None, None
# set current working directory to user's my documents folder
os.chdir(os.path.join(os.getenv('userprofile'),'documents'))
# ask user for path to Excel file...
while not in_path:
print "Please select the excel file to read data from ..."
try:
in_path = fd.askopenfilename()
except:
print 'Error selecting file, please try again.\n'
# get dir for output...
same = raw_input("Do you want to write the output to the same directory? (Y/N): ")
if same.upper() == 'Y':
out_path = os.path.dirname(in_path)
else:
while not out_path:
print "Please select a directory to write the csv file to ..."
try:
out_path = fd.askdirectory()
except:
print 'Error selecting file, please try again.\n'
# get file name and join to dir
f_name = os.path.basename(in_path)
f_name = f_name[:f_name.find('.')]+'.csv'
out_path = os.path.join(out_path,f_name)
# get data from file and write to csv...
print 'Attempting read data from', in_path
print ' and write csv data to', out_path, '...\n'
xl_to_csv(in_path, out_path)
v_open = raw_input("Open file (Y/N):").upper()
if v_open == 'Y':
os.startfile(out_path)
sys.exit()
if __name__ == '__main__':
main()
Let me know if you have any questions on this.
Finally, regarding the output I would consider writing this out to a html file in a table format. Let me know if you want any help with this, I will have some more sample code that you could use part of.
UPDATE
Here is some further advice on writing your output to a html file. Here is a function that I have written and used previously for this purpose. Let me know if you need any guidance on what you would need to change for your implementation (if anything). The function expects a nested object in the data argument e.g. a list of lists or list of tuples etc. but should work for any number of rows / columns:
def write_html_file(path, data, heads):
html = []
tab_attr = ' border="1" cellpadding="3" style="background-color:#FAFCFF; text-align:right"'
head_attr = ' style="background-color:#C0CFE2"'
# opening lines needed for html table
try:
html.append('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" ')
html.append('"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> ')
html.append('<html xmlns="http://www.w3.org/1999/xhtml">')
html.append('<body>')
html.append(' <table'+tab_attr+'>')
except:
print 'Error setting up html heading data'
# html table headings (if required)
if headings_on:
try:
html.append(' <tr'+head_attr+'>')
for item in heads:
html.append(' '*6+'<th>'+str(item)+'</th>')
html.append(' </tr>')
except:
exc_type, exc_value, exc_traceback = sys.exc_info()
lines = traceback.format_exception(exc_type, exc_value, exc_traceback)
print 'Error writing html table headings:'
print ''.join('!! ' + line for line in lines)
# html table content
try:
for row in data:
html.append(' <tr>')
for item in row:
html.append(' '*6+'<td>'+str(item)+'</td>')
html.append(' </tr>')
except:
print 'Error writing body of html data'
# closing lines needed
try:
html.append(' </table>')
html.append('</body>')
html.append('</html>')
except:
print 'Error closing html data'
# write html data to file
fileout = open(path, 'w')
for line in html:
fileout.write(line)
print 'Data written to:', path, '\n'
if sql_path:
os.startfile(path)
else:
v_open = raw_input("Open file (Y/N):").upper()
if v_open == 'Y':
os.startfile(path)
headings_on is a global that I have set to True in my script, you will also need to import traceback for the error handling to work as it is currently specified.

Searching a file for matches between two values and outputting search hits in Python

I am (attempting) to write a program that searches through a hex file for instances of a hex string between two values, eg. Between D4135B and D414AC, incrementing between the first value until the second is reached- D4135B, D4135C, D4135D etc etc.
I have managed to get it to increment etc, but it’s the search part I am having trouble with.
This is the code I have so far, it's been cobbled together from other places and I need to make it somehow output all search hits into the output file (file_out)
I have exceeded the limit of my Python understanding and I'm sure there's probably a much easier way of doing this. I would be very grateful for any help.
def search_process(hx): # searching for two binary strings
global FLAG
while threeByteHexPlusOne != threeByteHex2: #Keep incrementing until second value reached
If Flag:
if hx.find(threeByteHex2) != -1:
FLAG = False #If threeByteHex = ThreeByteHexPlusOne, end search
Print (“Reached the end of the search”,hx.find(threeByteHexPlusOne))
Else:
If hx.find(threeByteHexPlusOne) != -1:
FLAG = True
Return -1 #If no results found
if __name__ == '__main__':
try:
file_in = open(FILE_IN, "r") #opening input file
file_out = open(FILE_OUT, 'w') #opening output file
hx_read = file_in.read #read from input file
tmp = ''
found = ''
while hx_read: #reading from file till file is empty
hx_read = tmp + hx_read
pos = search_process(hx_read)
while pos != -1:
hex_read = hx_read[pos:]
if FLAG:
found = found + hx_read
pos = search_process(hx_read)
tmp = bytes_read[]
hx_read = file_in.read
file_out.write(found) #writing to output file
except IOError:
print('FILE NOT FOUND!!! Check your filename or directory/PATH')
Here's a program that looks through a hex string from a file 3 bytes at a time and if the 3-byte hex string is between the given hex bounds, it writes it to another file. It makes use of generators to make getting the bytes from the hex string a little cleaner.
import base64
import sys
_usage_string = 'Usage: python {} <input_file> <output_file>'.format(sys.argv[0])
def _to_base_10_int(value):
return int(value, 16)
def get_bytes(hex_str):
# Two characters equals one byte
for i in range(0, len(hex_str), 2):
yield hex_str[i:i+2]
def get_three_byte_hexes(hex_str):
bytes = get_bytes(hex_str)
while True:
try:
three_byte_hex = next(bytes) + next(bytes) + next(bytes)
except StopIteration:
break
yield three_byte_hex
def find_hexes_in_range(hex_str, lower_bound_hex, upper_bound_hex):
lower_bound = _to_base_10_int(lower_bound_hex)
upper_bound = _to_base_10_int(upper_bound_hex)
found = []
for three_byte_hex in get_three_byte_hexes(hex_str):
hex_value = _to_base_10_int(three_byte_hex)
if lower_bound <= hex_value < upper_bound:
found.append(three_byte_hex)
return found
if __name__ == "__main__":
try:
assert(len(sys.argv) == 3)
except AssertionError:
print _usage_string
sys.exit(2)
file_contents = open(sys.argv[1], 'rb').read()
hex_str = base64.decodestring(file_contents).encode('hex')
found = find_hexes_in_range(hex_str, 'D4135B', 'D414AC')
print('Found:')
print(found)
if found:
with open(sys.argv[2], 'wb') as fout:
for _hex in found:
fout.write(_hex)
Check out some more info on generators here

Categories