Python write to file based on offset

Python write to file based on offset - python

I want to create a python program which splits up a files into segments of specified width, and then a consumer program takes the segments and creates a duplicate of the original file. The segments might be out of order so I intent to use the offset value to write to the file.
Is there a way I can achieve this with without creating a local array to hold all the data on the receiving end?
for example,
f = open(file, "wb")
f.seek(offset)
f.write(data)
The idea behind this is that the program that sends the file might not be able to finish sending the file, and will resume again once it has started.
I have a sample code below which the "combine_bytes" function throws an exception when I try placing data in the buffer location.
import sys
import os
def SplitFile(fname, start, end, width):
t_fileSize = os.path.getsize(fname)
buffData = bytearray(t_fileSize)
for line, offset in get_bytes(fname, int(start), int(end), int(width)):
combine_bytes(buffData, offset, line, width)
nums = ["%02x" % ord(c) for c in line]
print " ".join(nums)
f = open("Green_copy.jpg", "wb")
f.write(buffData)
f.close()
def combine_bytes(in_buff, in_offset, in_data, in_width):
#something like memcpy would be nice
#in_buff[in_offset:in_offset + in_width] = in_data
#this works but it's the mother of inefficiency
i = in_offset
for c in in_data:
in_buff.insert(i, c)
i = i + 1
def get_bytes(fname, start, end, width):
t_currOffset = start
t_width = width
f = open(fname, "r+b")
if end != 0:
while t_currOffset < end:
f.seek(t_currOffset)
if (t_currOffset + t_width) > end:
t_width = end - t_currOffset
t_data = f.read(t_width)
yield t_data,t_currOffset
t_currOffset += t_width
else:
f.seek(t_currOffset)
t_data = f.read(t_width)
while t_data:
yield t_data, t_currOffset
t_currOffset += t_width
f.seek(t_currOffset)
t_data = f.read(t_width)
f.close()
if __name__ == '__main__':
try:
SplitFile(*sys.argv[1:5])
except:
print "Unexpected error:", sys.exc_info()[0]

I still could nt figure out what is your intent - but this version of combine_bytes will get rid of your "mother of your inefficiency" part (which actually is exactly that)
def combine_bytes(in_buff, in_offset, in_data, in_width):
#something like memcpy would be nice
#in_buff[in_offset:in_offset + in_width] = in_data
in_buff = in_buff[:in_offset] + in_data + in_buff[in_offset:]
return in_buff
Of course this creates a new (larger) buffer for each call, and you have to replace your buffer on the caller scope with the one returned:
buffData = combine_bytes(buffData, offset, line, width)

Found it. here is a better way which produces the what I wanted and is faster. _buffData[t_offset:t_offset + len(t_data)] = bytearray(t_data)

Related

python serial gets data wrong

I'm getting data from a microcontroller using serial. My problem is that if I tell the MCU to send data every 20mS some data gets corrupted. For example, if the MCU sends sin(time), you'd expect all the data to range from -1 to 1, but I end up getting a range of -1 to 100 (though most of what I get is between -1 to 1). Unfortunately, I won't always know what will be sent to the computer and it might vary a lot and I need to be able to trust what python tells me it is receiving. Is there a time limit for Python and serial, if so what is it? Or is there a way to get data more reliably? I originally thought it was just a problem of the communication getting split into a few chunks (MCU sends: xxxx|xxxx|xxxx and python reads xxx and x|xxxx| and xxxx), which I've already dealt with. here is the code I'm using to get things from serial:
import serial
import serial.tools.list_ports as det
import os
import datetime
from time import gmtime, strftime
import csv
######################### Functions #########################
## Find letter or character in string
def find(s, ch):
return [i for i, ltr in enumerate(s) if ltr == ch]
## Find string in string
def find_str(s, char):
index = 0
if char in s:
c = char[0]
for ch in s:
if ch == c:
if s[index:index+len(char)] == char:
return index
index += 1
return -1
## Data saving
def DataSave(x, data):
#for i in range(x):
# DATALISTS[i].append( int( a[ find(buffer,"|")[i] : find(buffer,"|")[i+1] ] ) )
save = open(x, 'a')
save.write(data + "\n")
save.close()
######################### End #########################
######################### Filter #########################
First = True
Plot = True
NumberOfDataPoints = 1
currupt = []
TimeStamp = []
repeat = True
def GetDATA( file, path, port, baud ):
File = True
First = True
Plot = True
NumberOfDataPoints = 1
currupt = []
TimeStamp = []
repeat = True
try:
ser = serial.Serial(port, baud, timeout = 1)
except FileNotFoundError:
pass
while repeat:
x = ser.inWaiting()
# only read when something is in buffer
if x:
# convert serial.read to a string and remove b' and ' from the beginning and end, respectively
buffer = str (ser.read(x))
buffer = buffer[2:][:-1]
# Gets the time and appends to TimeStamp list
TimeStamp.append (datetime.datetime.now())
# Comfirm all data is received, if data is added to the buffer currupt
lineEnd = ['\\r','\\n']
# If there is no \r and/or \n in buffer, add buffer to currupt list
if not any(word in buffer for word in lineEnd) :
currupt.append(buffer)
# If \r and/or \n in buffer print buffer
# If there is data in currupt list, buffer becomes data in currupt + buffer
if any(word in buffer for word in lineEnd):
if len(currupt) is not 0:
buffer = ''.join(currupt) + buffer
del currupt[:]
# Remove \r and \n from buffer
while any(word in buffer for word in lineEnd):
for i in range(len(lineEnd)):
if lineEnd[i] in buffer:
buffer = buffer [: find_str(buffer, lineEnd[i] ) ]
if First:
buffer = ''
del TimeStamp[:]
First = False
Plot = True
# makes sure that there is something to write
if len(buffer) is not 0:
# Set of one datapoint
if '|' not in buffer:
if File is not False:
datafile=os.path.join(os.path.expanduser('~'), path, file)
DataSave(datafile, str(TimeStamp[0]) + '|' + buffer +'|')
#print(TimeStamp[0], buffer)
del TimeStamp[:]
## unused
def SetDataPoints(file, path, port):
GetDATA( file, path, port, False )
return NumberOfDataPoints

passing files and values as parameter to a function in python

I am a python newbie. I am trying to run this simple python example. I am wish to pass files and certain values as parameter to my function latcalc(). Could anyone suggest how I can pass my files and values as parameters. Or is there any better way/approach to do these things.
#!/usr/bin/python
# include the constants
min_length = 1
max_length = 30
# delays
delay = 100
# Speed of light
c_vaccum = 3e8
global filename1
global filename2
global filename3
def openfiles():
filename1 = open("file1.txt", "w")
filename2 = open("file2.txt", "w")
filename3 = open("file3.txt", "w")
def latcalc(filename,target_name,vf):
target_name = 0
for length in range(min_length, max_length):
if length < 2:
target_name += (length/(vf * c_vaccum))
elif length == 2:
target_name += delay
else:
target_name = target_name
myline="%s\t%s\n" % (length, target_name)
filename.write(myline)
openfiles()
latcalc(filename1,lat40,0.4)
latcalc(filename2,lat80,0.8)
latcalc(filename3,lat100,1)

I would create a little class (give it a useful name) to encapsulate your data.
If your files grow you only have to change your create_lats
min_length = 1
max_length = 30
# delays
delay = 100
# Speed of light
c_vaccum = 3e8
#Little class to keep our data in one place
class Lat:
def __init__(self, filename, factor):
self.filename = filename
self.factor = factor
self.file = open(filename, "w") #let the class open the file
#now our function needs only one parameter, neat!
def latcalc(lat):
target_name = 0
for length in range(min_length, max_length):
if length < 2:
target_name += (length / (lat.factor * c_vaccum)) #acces the class variable
elif length == 2:
target_name += delay
else:
target_name = target_name
myline = "%s\t%s\n" % (length, target_name)
lat.file.write(myline)
def create_lats():
lats = []
lats.append(Lat("file1.txt", 0.4))
lats.append(Lat("file2.txt", 0.8))
lats.append(Lat("file3.txt", 1))
return lats
#loop over your lats created in create_lats
for lat in create_lats():
latcalc(lat)
lat.file.close() #close the file

try something like this (notice the globals are gone):
def openfiles(namelist):
ret = []
for name in filelist:
fi = open(name, 'w')
ret.append(fi)
return ret
filelist = ['file1.txt', 'file2.txt', 'file3.txt']
handles = openfiles(filelist)
for handle in handles:
<do what ever you want>
handles will be a list of file handles corresponding to the filelist of names
note the file handle is what you pass around to do reads & writes with
also the opens could be done in the call to latcalc, since you would be doing one file per call apparently

As some comments point out, you don't need global variables and you should close your filehandler objects after you finished writing to them which is most conveniently done with 'with' (closing is done for you, even in case of an unexpected exception):
#!/usr/bin/python
min_length = 1
max_length = 3
delay = 100
c_vaccum = 3e8
def latcalc(filename, vf):
target_name = 0
for length in range(min_length, max_length):
if length < 2:
target_name += (length/(vf * c_vaccum))
elif length == 2:
target_name += delay
myline="%s\t%d\n" % (length, target_name)
with open(filename, "w") as f:
f.write(myline)
return target_name
latcalc(filename1,lat40,0.4)
latcalc(filename2,lat80,0.8)
latcalc(filename3,lat100,1)
The way you treat the parameter target_name, I assume, you are used to C-type pointers which do not exist in that form in Python. The parameter is pointless here if you set it to a new value in the first line of latcalc(). Also, you seem to treat target_name as a string when it is an int:
myline="%s\t%s\n" % (length, target_name)
If you need target_name after the method has finished, you would have to return it.

1) open() gives you a filehandler, and not a filename
2) Use a "with" statement for opening a file, to avoid "forgetting" closing the file when finished.
#!/usr/bin/python
# include the constants
min_length = 1
max_length = 30
# delays
delay = 100
# Speed of light
c_vaccum = 3e8
def latcalc(filename, target_name, vf):
with open(filename, "w") as openedFile:
target_name = 0
for length in range(min_length, max_length):
if length < 2:
target_name += (length/(vf * c_vaccum))
elif length == 2:
target_name += delay
else:
target_name = target_name
myline="%s\t%s\n" % (length, target_name)
openedFile.write(myline)
latcalc("file1.txt", "lat40", 0.4)
latcalc("file2.txt", "lat80", 0.8)
latcalc("file3.txt", "lat100", 1)

Output between single-threaded and multi-threaded versions of same application differs [Python]

I have written 2 versions of a program to parse a log file and return the number of strings that match a given regex. The single-threaded version return the correct output
Number of Orders ('ORDER'): 1108
Number of Replacements ('REPLACE'): 742
Number of Orders and Replacements: 1850
Time to process: 5.018553
The multithreaded program however returns erroneous values:
Number of Orders ('ORDER'): 1579
Number of Replacements ('REPLACE'): 1108
Number of Orders and Replacements: 2687
Time to process: 2.783091
The time can vary (it should be faster for the multithreaded one) but I can't seem to find why the values for orders and replacements differ between the two versions.
Here is the multithreaded version:
import re
import time
import sys
import threading
import Queue
class PythonLogParser:
queue = Queue.Queue()
class FileParseThread(threading.Thread):
def __init__(self, parsefcn, f, startind, endind, olist):
threading.Thread.__init__(self)
self.parsefcn = parsefcn
self.startind = startind
self.endind = endind
self.olist = olist
self.f = f
def run(self):
self.parsefcn(self.f, self.startind, self.endind, self.olist)
def __init__(self, filename):
assert(len(filename) != 0)
self.filename = filename
self.start = 0
self.end = 0
def open_file(self):
f = None
try:
f = open(self.filename)
except IOError as e:
print 'Unable to open file:', e.message
return f
def count_orders_from(self, f, starting, ending, offset_list):
f.seek(offset_list[starting])
order_pattern = re.compile(r'.*(IN:)(\s)*(ORDER).*(ord_type)*')
replace_pattern = re.compile(r'.*(IN:)(\s)*(REPLACE).*(ord_type)*')
order_count=replace_count = 0
for line in f:
if order_pattern.match(line) != None:
order_count+=1 # = order_count + 1
if replace_pattern.match(line) != None:
replace_count+=1 # = replace_count + 1
#return (order_count, replace_count, order_count+replace_count)
self.queue.put((order_count, replace_count, order_count+replace_count))
def get_file_data(self):
offset_list = []
offset = 0
num_lines = 0
f = 0
try:
f = open(self.filename)
for line in f:
num_lines += 1
offset_list.append(offset)
offset += len(line)
f.close()
finally:
f.close()
return (num_lines, offset_list)
def count_orders(self):
self.start = time.clock()
num_lines, offset_list = self.get_file_data()
start_t1 = 0
end_t1 = num_lines/2
start_t2 = end_t1 + 1
f = open(self.filename)
t1 = self.FileParseThread(self.count_orders_from, f, start_t1, end_t1, offset_list)
self.count_orders_from(f, start_t2, num_lines, offset_list)
t1.start()
self.end = time.clock()
tup1 = self.queue.get()
tup2 = self.queue.get()
order_count1, replace_count1, sum1 = tup1
order_count2, replace_count2, sum2 = tup2
print 'Number of Orders (\'ORDER\'): {0}\n'\
'Number of Replacements (\'REPLACE\'): {1}\n'\
'Number of Orders and Replacements: {2}\n'\
'Time to process: {3}\n'.format(order_count1+order_count2, \
replace_count1+replace_count2, \
sum1+sum2, \
self.end - self.start)
f.close()
def test2():
p = PythonLogParser('../../20150708.aggregate.log')
p.count_orders()
def main():
test2()
main()
The idea is that since the file is large, each thread will read half the file. t1 reads the first half and the main thread reads the second. The main thread then adds together the results from both iterations and displays them.
My suspicion is that somehow the order_count and replace_count in count_orders_from are being modified between threads rather than starting at 0 for each thread, but I'm not sure since I don't see why separate calls to a method from 2 separate threads would modify the same variables.

The error was occurring because even though in theory the threads were parsing individual halves, what was in fact happening is that one thread parsed halfway and the other one parsed the full file, so items were double counted. This error was fixed by adding a linecount variable to count_orders_from, in order to check whether the reader has reached the line it is supposed to read to.
def count_orders_from(self, f, starting, ending, offset_list):
f.seek(offset_list[starting])
order_pattern = re.compile(r'.*(IN:)(\s)*(ORDER).*(ord_type)*')
replace_pattern = re.compile(r'.*(IN:)(\s)*(REPLACE).*(ord_type)*')
order_count=replace_count=linecount = 0
for line in f:
if order_pattern.match(line) != None:
order_count+=1 # = order_count + 1
if replace_pattern.match(line) != None:
replace_count+=1 # = replace_count + 1
if linecount==ending:
break
linecount+=1
self.queue.put((order_count, replace_count, order_count+replace_count))

Getting the memory layout out of an (avr)elf file by useing python + pyElftools

I am creating my own bootloader for an ATXmega128A4U. To use the bootloader I want to transform the ELF-file of the firmware into a memory map used in the the ATXmega.
For that I use python and the modul "pyelftools". The documentation of it is poor and so I run into a problem: I do not know what information I can use to get the address, offset etc. from the data at the sections.
My goal is to create a bytearray, copy the data/code into it and transfer it to the bootlaoder. Below is my code:
import sys
# If pyelftools is not installed, the example can also run from the root or
# examples/ dir of the source distribution.
sys.path[0:0] = ['.', '..']
from elftools.common.py3compat import bytes2str
from elftools.elf.elffile import ELFFile
# 128k flash for the ATXmega128a4u
flashsize = 128 * 1024
def process_file(filename):
with open(filename, 'rb') as f:
# get the data
elffile = ELFFile(f)
dataSec = elffile.get_section_by_name(b'.data')
textSec = elffile.get_section_by_name(b'.text')
# prepare the memory
flashMemory = bytearray(flashsize)
# the data section
startAddr = dataSec.header.sh_offset
am = dataSec.header.sh_size
i = 0
while i < am:
val = dataSec.stream.read(1)
flashMemory[startAddr] = val[0]
startAddr += 1
i += 1
# the text section
startAddr = textSec.header.sh_offset
am = textSec.header.sh_size
i = 0
while i < am:
print(str(startAddr) + ' : ' + str(i))
val = textSec.stream.read(1)
flashMemory[startAddr] = val[0]
startAddr += 1
i += 1
print('finished')
if __name__ == '__main__':
process_file('firmware.elf')
Hope someone can tell me how to solve this problem.

I manged to solve the problem.
don't read the data manualy from the stream by "textSec.stream.read" use "textSec.data()" instead. Internaly (see "sections.py") a seek operation in the file is done. Afterwards the data is read. The result will be the valid data chunk.
The following code reads the code(text) section of a atxmega firmware and copies it into a bytearray which has the layout of the flash of an atxmega128a4u device.
#vlas_tepesch: the hex conversation is not needed and the the 64k pitfall is avoided.
sys.path[0:0] = ['.', '..']
from elftools.common.py3compat import bytes2str
from elftools.elf.elffile import ELFFile
# 128k flash for the ATXmega128a4u
flashsize = 128 * 1024
def __printSectionInfo (s):
print ('[{nr}] {name} {type} {addr} {offs} {size}'.format(
nr = s.header['sh_name'],
name = s.name,
type = s.header['sh_type'],
addr = s.header['sh_addr'],
offs = s.header['sh_offset'],
size = s.header['sh_size']
)
)
def process_file(filename):
print('In file: ' + filename)
with open(filename, 'rb') as f:
# get the data
elffile = ELFFile(f)
print ('sections:')
for s in elffile.iter_sections():
__printSectionInfo(s)
print ('get the code from the .text section')
textSec = elffile.get_section_by_name(b'.text')
# prepare the memory
flashMemory = bytearray(flashsize)
# the text section
startAddr = textSec.header['sh_addr']
val = textSec.data()
flashMemory[startAddr:startAddr+len(val)] = val
# print memory
print('finished')
if __name__ == '__main__':
process_file('firmware.elf')
Tanks for the comments!

Searching a file for matches between two values and outputting search hits in Python

I am (attempting) to write a program that searches through a hex file for instances of a hex string between two values, eg. Between D4135B and D414AC, incrementing between the first value until the second is reached- D4135B, D4135C, D4135D etc etc.
I have managed to get it to increment etc, but it’s the search part I am having trouble with.
This is the code I have so far, it's been cobbled together from other places and I need to make it somehow output all search hits into the output file (file_out)
I have exceeded the limit of my Python understanding and I'm sure there's probably a much easier way of doing this. I would be very grateful for any help.
def search_process(hx): # searching for two binary strings
global FLAG
while threeByteHexPlusOne != threeByteHex2: #Keep incrementing until second value reached
If Flag:
if hx.find(threeByteHex2) != -1:
FLAG = False #If threeByteHex = ThreeByteHexPlusOne, end search
Print (“Reached the end of the search”,hx.find(threeByteHexPlusOne))
Else:
If hx.find(threeByteHexPlusOne) != -1:
FLAG = True
Return -1 #If no results found
if __name__ == '__main__':
try:
file_in = open(FILE_IN, "r") #opening input file
file_out = open(FILE_OUT, 'w') #opening output file
hx_read = file_in.read #read from input file
tmp = ''
found = ''
while hx_read: #reading from file till file is empty
hx_read = tmp + hx_read
pos = search_process(hx_read)
while pos != -1:
hex_read = hx_read[pos:]
if FLAG:
found = found + hx_read
pos = search_process(hx_read)
tmp = bytes_read[]
hx_read = file_in.read
file_out.write(found) #writing to output file
except IOError:
print('FILE NOT FOUND!!! Check your filename or directory/PATH')

Here's a program that looks through a hex string from a file 3 bytes at a time and if the 3-byte hex string is between the given hex bounds, it writes it to another file. It makes use of generators to make getting the bytes from the hex string a little cleaner.
import base64
import sys
_usage_string = 'Usage: python {} <input_file> <output_file>'.format(sys.argv[0])
def _to_base_10_int(value):
return int(value, 16)
def get_bytes(hex_str):
# Two characters equals one byte
for i in range(0, len(hex_str), 2):
yield hex_str[i:i+2]
def get_three_byte_hexes(hex_str):
bytes = get_bytes(hex_str)
while True:
try:
three_byte_hex = next(bytes) + next(bytes) + next(bytes)
except StopIteration:
break
yield three_byte_hex
def find_hexes_in_range(hex_str, lower_bound_hex, upper_bound_hex):
lower_bound = _to_base_10_int(lower_bound_hex)
upper_bound = _to_base_10_int(upper_bound_hex)
found = []
for three_byte_hex in get_three_byte_hexes(hex_str):
hex_value = _to_base_10_int(three_byte_hex)
if lower_bound <= hex_value < upper_bound:
found.append(three_byte_hex)
return found
if __name__ == "__main__":
try:
assert(len(sys.argv) == 3)
except AssertionError:
print _usage_string
sys.exit(2)
file_contents = open(sys.argv[1], 'rb').read()
hex_str = base64.decodestring(file_contents).encode('hex')
found = find_hexes_in_range(hex_str, 'D4135B', 'D414AC')
print('Found:')
print(found)
if found:
with open(sys.argv[2], 'wb') as fout:
for _hex in found:
fout.write(_hex)
Check out some more info on generators here

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Python write to file based on offset - python

Found it. here is a better way which produces the what I wanted and is faster. _buffData[t_offset:t_offset + len(t_data)] = bytearray(t_data)

Related

python serial gets data wrong

passing files and values as parameter to a function in python

Output between single-threaded and multi-threaded versions of same application differs [Python]

Getting the memory layout out of an (avr)elf file by useing python + pyElftools

Searching a file for matches between two values and outputting search hits in Python

Categories

Resources