I'm having some difficulties while parsing a binary STL file with Python (2.7.1 32-bit and Windows 7 64). The file is a about 450k in size, but my parser suddenly stops working after parsing 244 triangles out of ~8600 with en exception of struct.unpack:
Exception unpack requires a string argument of length 12
The cursor position in the file is line 33, row 929. But the line contains about 3400 characters. So it doesn't seem to be newline problem.
This is the code:
import struct
normals = []
points = []
triangles = []
bytecount = []
fb = [] # debug list
def unpack (f, sig, l):
s = f.read (l)
fb.append(s)
return struct.unpack(sig, s)
def read_triangle(f):
n = unpack(f,"<3f", 12)
p1 = unpack(f,"<3f", 12)
p2 = unpack(f,"<3f", 12)
p3 = unpack(f,"<3f", 12)
b = unpack(f,"<h", 2)
normals.append(n)
l = len(points)
points.append(p1)
points.append(p2)
points.append(p3)
triangles.append((l, l+1, l+2))
bytecount.append(b[0])
def read_length(f):
length = struct.unpack("#i", f.read(4))
return length[0]
def read_header(f):
f.seek(f.tell()+80)
def write_as_ascii(outfilename):
f = open(outfilename, "w")
f.write ("solid "+outfilename+"\n")
for n in range(len(triangles)):
f.write ("facet normal {} {} {}\n".format(normals[n][0],normals[n][1],normals[n][2]))
f.write ("outer loop\n")
f.write ("vertex {} {} {}\n".format(points[triangles[n][0]][0],points[triangles[n][0]][1],points[triangles[n][0]][2]))
f.write ("vertex {} {} {}\n".format(points[triangles[n][1]][0],points[triangles[n][1]][1],points[triangles[n][1]][2]))
f.write ("vertex {} {} {}\n".format(points[triangles[n][2]][0],points[triangles[n][2]][1],points[triangles[n][2]][2]))
f.write ("endloop\n")
f.write ("endfacet\n")
f.write ("endsolid "+outfilename+"\n")
f.close()
def main():
infilename = r"cupHemis46_28.stl"
outfilename = r"cupHemis46_28_ascii_test.stl"
try:
f = open ( infilename, "r")
read_header(f)
l = read_length(f)
try:
while True:
read_triangle(f)
except Exception, e:
print "Exception",e[0]
print len(normals), len(points), len(triangles), l
write_as_ascii(outfilename)
except Exception, e:
print e
if __name__ == '__main__':
main()
The unpack function (not from struct) collects all the strings which will be written to a file. When I compare both file they seem equal, up to the file position where unpack stops working. I opened the binary file with Notepad++, the next character is a "SUB".
Are there any restrictions of unpack that I'm not aware of regarding file size or limitation in characters or something? Is there something wrong with my code? Thanks in advance.
Your unpack function calls f.read twice. I suspect you've walked off the end of the file.
You'll also have trouble with reading the file in text mode on Windows. Any incidental occurrences of \r\n will be read in as \n. Make the following change to avoid this problem.
f = open(infilename, "rb")
Related
I'm writing a function that sums up integers from a file.
Here's the code:
def sum_integers_from_file(file_name):
try:
file = open(name)
total = 0
for i in file:
total += int(i)
file.close()
return total
except:
print "error"
file foo.txt:
1234
The function returns 1234.
why doesn't total += int(i) add up all the integers?
It is highly recommended to read files in a with statement. That frees you from the responsibility of closing the file and is also shorter! This works:
def sum_integers_from_file(file_name):
try:
with open(file_name, 'r') as f:
s = f.read()
total = 0
for char in s:
total += int(char)
return total
except:
print("error")
Your file has one line.
You're adding all ints from each line.
If you want to add 1,2,3,4 with that method move them to new lines
Also, you can do the same thing with this
with open(name) as f:
return sum(int(line) for line in f)
Current program:
#!/usr/bin/python
lookup = 'Loop time'
with open('log.lammps') as myFile:
found = []
for num, line in enumerate(myFile, 1):
if lookup in line:
print 'found at line:', num
found.append(num)
print found
a = int(found[0])
b = int(found[1])
c = int(found[2])
d = int(found[3])
lookup = 'Memory usage per processor ='
with open('log.lammps') as myFile:
found2 = []
for num, line in enumerate(myFile, 1):
if lookup in line:
print 'found at line:', num
found2.append(num)
print found2
e = int(found2[0])
f = int(found2[1])
g = int(found2[2])
h = int(found2[3])
x = 0
while x < len(found):
a = found[x]
e = found2[x]
print "Some thing useful!"
for w,line in enumerate(open("log.lammps")):
if w >= e and w < a :
print line,
x += 1
How do i make it store the last while function into a document. I want to it make a file with the data that this program prints out so that I can have matlab read that file.
The easiest solution is to redirect the output to a file. Let script.py be your python script:
./script.py > out.txt
Otherwise you can redirect the stdout to a file directly in python.
#!/usr/bin/python
import sys
sys.stdout = open('out.txt', 'w')
lookup = 'Loop time'
...
Now every print statement will be redirected to out.txt
One very fast way is to replace sys.stdout with an open file descriptor.
f = open("file.txt","w")
old_stdout = sys.stdout
sys.stdout = f
while x < len(found):
a = found[x]
e = found2[x]
print "Some thing useful!"
for w,line in enumerate(open("log.lammps")):
if w >= e and w < a :
print line,
x += 1
sys.stdout = old_stdout
f.close()
This will automatically redirect anything you print to file.txt instead of stdout. It's also a little hackish, though. It would be a better idea just to open the write file and then replace print ... with f.write(...) wherever you see it, since you don't have any functions that do complex printing.
(I'm assuming you only want the last while loop written to a file, as your question stated.)
Open a file at the start and write the data as you go:
with open('output.txt',"w") as out:
Wherever you print data just write it to a file also:
print 'found at line: {}'.format(num)
out.write('found at line: {}\n'.format(num)) etc..
Just start your code like:
with open('output.txt',"w") as out:
with open('log.lammps') as myFile:
# continue with your code
The important thing about sys.stdout is that you have to import sys instead of from sys import stdout
i.e. this won't redirect your output to a file:
>>> from sys import stdout
>>> stdout = open('/dev/null', 'w')
>>> print 1
1 # not working
I am (attempting) to write a program that searches through a hex file for instances of a hex string between two values, eg. Between D4135B and D414AC, incrementing between the first value until the second is reached- D4135B, D4135C, D4135D etc etc.
I have managed to get it to increment etc, but it’s the search part I am having trouble with.
This is the code I have so far, it's been cobbled together from other places and I need to make it somehow output all search hits into the output file (file_out)
I have exceeded the limit of my Python understanding and I'm sure there's probably a much easier way of doing this. I would be very grateful for any help.
def search_process(hx): # searching for two binary strings
global FLAG
while threeByteHexPlusOne != threeByteHex2: #Keep incrementing until second value reached
If Flag:
if hx.find(threeByteHex2) != -1:
FLAG = False #If threeByteHex = ThreeByteHexPlusOne, end search
Print (“Reached the end of the search”,hx.find(threeByteHexPlusOne))
Else:
If hx.find(threeByteHexPlusOne) != -1:
FLAG = True
Return -1 #If no results found
if __name__ == '__main__':
try:
file_in = open(FILE_IN, "r") #opening input file
file_out = open(FILE_OUT, 'w') #opening output file
hx_read = file_in.read #read from input file
tmp = ''
found = ''
while hx_read: #reading from file till file is empty
hx_read = tmp + hx_read
pos = search_process(hx_read)
while pos != -1:
hex_read = hx_read[pos:]
if FLAG:
found = found + hx_read
pos = search_process(hx_read)
tmp = bytes_read[]
hx_read = file_in.read
file_out.write(found) #writing to output file
except IOError:
print('FILE NOT FOUND!!! Check your filename or directory/PATH')
Here's a program that looks through a hex string from a file 3 bytes at a time and if the 3-byte hex string is between the given hex bounds, it writes it to another file. It makes use of generators to make getting the bytes from the hex string a little cleaner.
import base64
import sys
_usage_string = 'Usage: python {} <input_file> <output_file>'.format(sys.argv[0])
def _to_base_10_int(value):
return int(value, 16)
def get_bytes(hex_str):
# Two characters equals one byte
for i in range(0, len(hex_str), 2):
yield hex_str[i:i+2]
def get_three_byte_hexes(hex_str):
bytes = get_bytes(hex_str)
while True:
try:
three_byte_hex = next(bytes) + next(bytes) + next(bytes)
except StopIteration:
break
yield three_byte_hex
def find_hexes_in_range(hex_str, lower_bound_hex, upper_bound_hex):
lower_bound = _to_base_10_int(lower_bound_hex)
upper_bound = _to_base_10_int(upper_bound_hex)
found = []
for three_byte_hex in get_three_byte_hexes(hex_str):
hex_value = _to_base_10_int(three_byte_hex)
if lower_bound <= hex_value < upper_bound:
found.append(three_byte_hex)
return found
if __name__ == "__main__":
try:
assert(len(sys.argv) == 3)
except AssertionError:
print _usage_string
sys.exit(2)
file_contents = open(sys.argv[1], 'rb').read()
hex_str = base64.decodestring(file_contents).encode('hex')
found = find_hexes_in_range(hex_str, 'D4135B', 'D414AC')
print('Found:')
print(found)
if found:
with open(sys.argv[2], 'wb') as fout:
for _hex in found:
fout.write(_hex)
Check out some more info on generators here
I have a file that has a unique ID number on each line. I am trying to search a different file for the occurrences of these ID numbers and return the line where these id numbers are in the second file, in this case into an output file. I am new to programming and this is what I have so far.
outlist = []
with open('readID.txt', 'r') as readID, \
open('GOlines.txt', 'w') as output, \
open('GO.txt', 'r') as GO:
x = readID.readlines()
print x
for line in GO:
if x[1:-1] in line:
outlist.append(line)
outlist.append('\n')
if x[1:-1] in line:
outlist.append(line)
outlist.append('\n')
print outlist
output.writelines(outlist)
The files look like this: readID.txt
00073810.1
00082422.1
00018647.1
00063072.1
GO.txt
#query GO reference DB reference family
HumanDistalGut_READ_00048904.2 GO:0006412 TIGRFAM TIGR00001
HumanDistalGut_READ_00043244.3 GO:0022625 TIGRFAM TIGR00001
HumanDistalGut_READ_00048644.4 GO:0000315 TIGRFAM TIGR00001
HumanDistalGut_READ_00067264.5 GO:0003735 TIGRFAM TIGR00001
The read ids match up with some but not all of the ids after READ...
#!/usr/bin/env python
# encoding: utf-8
import sys
import re
def extract_id(line):
"""
input: HumanDistalGut_READ_00048904.2 GO:0006412 TIGRFAM TIGR00001
returns: 00048904.2
"""
result = re.search(r'READ_(\d{8}\.\d)', line)
if result != None:
return result.group(1)
else:
return None
def extract_go_num(line):
"""
input: HumanDistalGut_READ_00048904.2 GO:0006412 TIGRFAM TIGR00001
returns: 0006412
"""
result = re.search(r'GO:(\d{7})', line)
if result != None:
return result.group(1)
else:
return None
def main(argv = None):
if argv is None:
argv = sys.argv
with open('readID.txt', 'r') as f:
ids = frozenset(f.readlines())
with open('GO.txt', 'r') as haystack, \
open('GOLines.txt', 'w') as output:
for line in haystack:
if extract_id(line) in ids:
output.write(extract_go_num(line) + '\n')
if __name__ == "__main__":
sys.exit(main())
I'm trading memory overhead for an O(n) solution rather than O(n^2).
I'm using regular expressions to extract the ids and go numbers, but it's brittle if the number of digits change.
Maybe something like this:
with open('readID.txt', 'r') as readID, open('GOlines.txt', 'w') as output, open('GO.txt', 'r') as GO:
for ID in readID:
for line in GO:
if ID in line:
output.write(line)
If your files are small enough to fit in your memory.
with open('/somepath/GO.txt') as f:
pool = f.readlines()
with open('/somepath/readID.txt') as f:
tokens = f.readlines()
# strip spaces/new lines
tokens = [t.strip() for t in tokens]
found = [(t, lno) for t in tokens for (lno, l) in enumerate(pool) if t in l]
You could then print your found list into your outfile.
I want to create a python program which splits up a files into segments of specified width, and then a consumer program takes the segments and creates a duplicate of the original file. The segments might be out of order so I intent to use the offset value to write to the file.
Is there a way I can achieve this with without creating a local array to hold all the data on the receiving end?
for example,
f = open(file, "wb")
f.seek(offset)
f.write(data)
The idea behind this is that the program that sends the file might not be able to finish sending the file, and will resume again once it has started.
I have a sample code below which the "combine_bytes" function throws an exception when I try placing data in the buffer location.
import sys
import os
def SplitFile(fname, start, end, width):
t_fileSize = os.path.getsize(fname)
buffData = bytearray(t_fileSize)
for line, offset in get_bytes(fname, int(start), int(end), int(width)):
combine_bytes(buffData, offset, line, width)
nums = ["%02x" % ord(c) for c in line]
print " ".join(nums)
f = open("Green_copy.jpg", "wb")
f.write(buffData)
f.close()
def combine_bytes(in_buff, in_offset, in_data, in_width):
#something like memcpy would be nice
#in_buff[in_offset:in_offset + in_width] = in_data
#this works but it's the mother of inefficiency
i = in_offset
for c in in_data:
in_buff.insert(i, c)
i = i + 1
def get_bytes(fname, start, end, width):
t_currOffset = start
t_width = width
f = open(fname, "r+b")
if end != 0:
while t_currOffset < end:
f.seek(t_currOffset)
if (t_currOffset + t_width) > end:
t_width = end - t_currOffset
t_data = f.read(t_width)
yield t_data,t_currOffset
t_currOffset += t_width
else:
f.seek(t_currOffset)
t_data = f.read(t_width)
while t_data:
yield t_data, t_currOffset
t_currOffset += t_width
f.seek(t_currOffset)
t_data = f.read(t_width)
f.close()
if __name__ == '__main__':
try:
SplitFile(*sys.argv[1:5])
except:
print "Unexpected error:", sys.exc_info()[0]
I still could nt figure out what is your intent - but this version of combine_bytes will get rid of your "mother of your inefficiency" part (which actually is exactly that)
def combine_bytes(in_buff, in_offset, in_data, in_width):
#something like memcpy would be nice
#in_buff[in_offset:in_offset + in_width] = in_data
in_buff = in_buff[:in_offset] + in_data + in_buff[in_offset:]
return in_buff
Of course this creates a new (larger) buffer for each call, and you have to replace your buffer on the caller scope with the one returned:
buffData = combine_bytes(buffData, offset, line, width)
Found it. here is a better way which produces the what I wanted and is faster. _buffData[t_offset:t_offset + len(t_data)] = bytearray(t_data)