Python - Windows Raw Disk unable to read final sectors

Python - Windows Raw Disk unable to read final sectors - python

When accessing a Raw Disk on Windows via Python open(), it for whatever reason does not allow me to read the last 10240 bytes (aka last 5 sectors at 2048 bytes/sector).
When dumping the disc image by other means and comparing the images I can see that the data cannot be assumed to be empty either. In fact, the first of the missing sectors has a UDF Anchor Tag present with related metadata in it. The following sectors are entirely blank.
This is how I dumped the disc contents:
out = open("test.iso", "wb")
with open(r"\\.\D:", "rb") as f:
while True:
data = f.read(512)
if len(data) == 0:
break
out.write(data)
If I take that same open() object and tell it to seek to the very end of the disc, it does. So it can clearly reach the sectors at least in terms of seeking. If I then seek back 10240 bytes then attempt to f.read(...), it returns b'' (empty result) and not an error. It doesn't matter what size I tell it to read either. I tried all kinds of sizes, no-arg/default, 1, 12, 255, 512, 2048, 999999, etc.
Another StackOverflow answer on a different (but related) question also reported similar findings on Enhanced Audio Discs but seemingly no discussion was brought up since.
I have tested this on multiple DVD discs from varying kinds of studios and creators, all of which are in great condition with it still occurring.
Example reproducing code:
I don't know if its gonna happen to you on your system config/disc/reader).
PyPI Dependencies: wmic
WMIC reports the disc size-10240 as well, perhaps it's a Windows issue?
import os
from wmi import WMI
DISC_LETTER = "D:"
c = WMI()
disc_info = next(iter(c.Win32_CDROMDrive(Drive=DISC_LETTER)), None)
if not disc_info:
raise("Disc %s not found...", DISC_LETTER)
disc_size = int(disc_info.size)
disc_size += 10240 # WMIC also reports the size without 10240, but it is real!
f = open(r"\\.\%s" % DISC_LETTER, "rb")
f.seek(disc_size)
if f.tell() == disc_size:
print("Seeked to the end of the disc...")
f.seek(-10240, os.SEEK_CUR)
if f.tell() == disc_size - (2048 * 5):
print("Seeked 5 sectors before the end of the disc...")
data = f.read(2048 * 5):
print("Data read (len: %d): %b" % (len(data), data))
Any ideas on why this might be would great as I have tried everywhere I could.

It seems this occurs as open(r'\\.\N:') opens the device with restricted boundaries.
My solution was to open the disc with IOCTL instead of open(). Specifically with CreateFile, DeviceIoControl, and FSCTL_ALLOW_EXTENDED_DASD_IO.
handle = win32file.CreateFile(
r"\\.\D:",
win32con.MAXIMUM_ALLOWED,
win32con.FILE_SHARE_READ | win32con.FILE_SHARE_WRITE,
None,
win32con.OPEN_EXISTING,
win32con.FILE_ATTRIBUTE_NORMAL,
None
)
if handle == win32file.INVALID_HANDLE_VALUE:
raise RuntimeError("Failed to obtain device handle...")
win32file.DeviceIoControl(handle, winioctlcon.FSCTL_ALLOW_EXTENDED_DASD_IO, None, None)
From here I can use ReadFile and SetFilePointer as replacements for read and seek respectively.
I even worked on a new class that loads it all and allows you to dynamically read and seek without having to worry about sector alignment.
class Win32Device:
"""
Class to read and seek a Windows Raw Device IO object without bother.
It deals with getting the full size, allowing full access to all sectors,
and alignment with the discs sector size.
Author: PHOENiX <pragma.exe#gmail.com>
License: Free, enjoy! This should be a thing open() does by default.
"""
def __init__(self, target):
# type: (str) -> None
self.target = target
self.sector_size = None
self.disc_size = None
self.position = 0
self.handle = self.get_handle()
self.geometry = self.get_geometry()
def __enter__(self):
return self
def __exit__(self, *_, **__):
self.dispose()
def __len__(self) -> int:
return self.geometry[-2]
def dispose(self):
if self.handle != win32file.INVALID_HANDLE_VALUE:
win32file.CloseHandle(self.handle)
def get_target(self):
# type: () -> str
"""Get UNC target name. Can be `E:` or `PhysicalDriveN`."""
target = self.target
if not target.startswith("\\\\.\\"):
target += rf"\\.\{target}"
return target
def get_handle(self):
# type: () -> int
"""Get a direct handle to the raw UNC target, and unlock its IO capabilities."""
handle = win32file.CreateFile(
# https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-createfilea
self.get_target(), # target
win32con.MAXIMUM_ALLOWED, # desired access
win32con.FILE_SHARE_READ | win32con.FILE_SHARE_WRITE, # share mode, write needed
None, # security attributes
win32con.OPEN_EXISTING, # creation disposition
win32con.FILE_ATTRIBUTE_NORMAL, # flags and attributes
None # template file
)
if handle == win32file.INVALID_HANDLE_VALUE:
raise RuntimeError("Failed to obtain device handle...")
# elevate accessible sectors, without this the last 5 sectors (in my case) will not be readable
win32file.DeviceIoControl(handle, winioctlcon.FSCTL_ALLOW_EXTENDED_DASD_IO, None, None)
return handle
def get_geometry(self):
# type: () -> tuple[int, ...]
"""
Retrieves information about the physical disk's geometry.
https://learn.microsoft.com/en-us/windows/win32/api/winioctl/ns-winioctl-disk_geometry_ex
Returns a tuple of:
Cylinders-Lo
Cylinders-Hi
Media Type
Tracks Per Cylinder
Sectors Per Track
Bytes Per Sector
Disk Size
Extra Data
"""
return struct.unpack("8L", win32file.DeviceIoControl(
self.handle, # handle
winioctlcon.IOCTL_DISK_GET_DRIVE_GEOMETRY_EX, # ioctl api
b"", # in buffer
32 # out buffer
))
def tell(self):
# type: () -> int
"""Get current (spoofed) position."""
return self.position
def _tell(self):
# type: () -> int
"""Get current real position."""
if not self.handle:
self.handle = self.get_handle()
return win32file.SetFilePointer(self.handle, 0, win32file.FILE_CURRENT)
def seek(self, offset, whence=os.SEEK_SET):
# type: (int, int) -> int
"""Seek at any point in the stream, in an aligned way."""
if whence == os.SEEK_CUR:
whence = self.tell()
elif whence == os.SEEK_END:
whence = len(self)
to = whence + offset
closest = self.align(to) # get as close as we can while being aligned
if not self.handle:
self.handle = self.get_handle()
pos = win32file.SetFilePointer(self.handle, closest, win32file.FILE_BEGIN)
if pos != closest:
raise IOError(f"Seek was not precise...")
self.position = to # not actually at this location, read will deal with it
return to
def read(self, size=-1):
# type: (int) -> Optional[bytes]
"""Read any amount of bytes in the stream, in an aligned way."""
if not self.handle:
self.handle = self.get_handle()
sector_size = self.geometry[-3]
offset = abs(self._tell() - self.tell())
has_data = b''
while self._tell() < self.tell() + size:
res, data = win32file.ReadFile(self.handle, sector_size, None)
if res != 0:
raise IOError(f"An error occurred: {res} {data}")
if len(data) < sector_size:
raise IOError(f"Read {sector_size - len(data)} less bytes than requested...")
has_data += data
# seek to the position wanted + size read, which will then be re-aligned
self.seek(self.tell() + size)
return has_data[offset:offset + size]
def align(self, size, to=None):
# type: (int, Optional[int]) -> int
"""
Align size to the closest but floor mod `to` value.
Examples:
align(513, to=512)
>>>512
align(1023, to=512)
>>>512
align(1026, to=512)
>>>1024
align(12, to=10)
>>>10
"""
if not to:
to = self.geometry[-3] # logical bytes per sector value
return math.floor(size / to) * to

Related

Reading Memory Address from a process with a Static Address and Offsets in Python

I'm trying to read another process' memory in Python and I have the static address of the program and all the offsets. I'm using the win32api to do this. I can already read a process' memory with an address without offsets but I don't know how to use offsets.
I've already tried the script in this answer but it returns -1. I've changed the PROCESS_ALL_ACCESS to win32con.PROCESS_VM_READ and even then it returns -1.
How do I use the offsets with ReadProcessMemory?
Here is the code I'm using:
import win32api
import win32process
import win32con
import ctypes
import ctypes.wintypes as wintypes
def get_process_by_name(process_name):
"""Find the process id of the given
process name and returns the process id."""
process_name = process_name.lower()
# Enumerate all processes
processes = win32process.EnumProcesses()
for process_id in processes:
# If process_id is the same as this program, skip it
if process_id == -1:
continue
# Try to read the process memory
try:
p_handle = win32api.OpenProcess(win32con.PROCESS_QUERY_INFORMATION | win32con.PROCESS_VM_READ, True, process_id)
# Try to read the modules of the process
try:
modules = win32process.EnumProcessModules(p_handle)
for module_id in modules:
name = str(win32process.GetModuleFileNameEx(p_handle, module_id))
if name.lower().find(process_name) != -1:
return process_id
finally:
win32api.CloseHandle(p_handle)
except:
pass
def read_process_memory(process_id, address, offsets, size_of_data=4):
p_handle = ctypes.windll.kernel32.OpenProcess(win32con.PROCESS_VM_READ, False, p_id)
data = ctypes.c_uint(size_of_data)
bytesRead = ctypes.c_uint(size_of_data)
current_address = address
if offsets:
# Do something to the offsets
ctypes.windll.kernel32.ReadProcessMemory(p_handle, current_address, ctypes.byref(data), ctypes.sizeof(data), ctypes.byref(bytesRead))
else:
ctypes.windll.kernel32.ReadProcessMemory(p_handle, current_address, ctypes.byref(data), ctypes.sizeof(data), ctypes.byref(bytesRead))
# Close the handle to the process
ctypes.windll.kernel32.CloseHandle(p_handle)
return data.value
p_id = get_process_by_name("program.exe")
# Without offsets it works fine
address = 0x2ADB1818
val = read_process_memory(p_id, address, None)
print(val)
# Does not point to the correct address
address = 0x00571160
offsets = [0xD84, 0x1B8, 0x38, 0x5C, 0x24, 0xF4, 0x1D08]
for offset in offsets:
address += offset
val = read_process_memory(p_id, address, offsets)
print(val)

I've figured out what I was missing. I've been interpreting the addresses wrong. They are pointers to the addresses and so, when using offsets I need to read them, and add them to one another to get access to the value I want to read. Also, I needed to use the base address of the program, for which I just needed to return the value of the module.
Here is the above script with the necessary changes to read a process' memory with offsets:
import win32api
import win32process
import win32con
import ctypes
def get_process_by_name(process_name):
"""Finds the process id of the given
process name and returns the process id and its base address."""
process_name = process_name.lower()
# Enumerate all processes
processes = win32process.EnumProcesses()
for process_id in processes:
# If process_id is the same as this program, skip it
if process_id == -1:
continue
# Try to read the process memory
try:
h_process = win32api.OpenProcess(win32con.PROCESS_QUERY_INFORMATION | win32con.PROCESS_VM_READ, True, process_id)
# Try to read the modules of the process
try:
# modules is an array of base addresses of each module
modules = win32process.EnumProcessModules(h_process)
for base_address in modules:
# Get the name of the module
name = str(win32process.GetModuleFileNameEx(h_process, base_address))
# Compare it to the name of your program
if name.lower().find(process_name) != -1:
return process_id, base_address
finally:
win32api.CloseHandle(h_process)
except:
pass
def read_process_memory(process_id, address, offsets=[]):
"""Read a process' memory based on its process id, address and offsets.
Returns the address without offsets and the value."""
# The handle to the program's process
# This will allow to use ReadProcessMemory
h_process = ctypes.windll.kernel32.OpenProcess(win32con.PROCESS_VM_READ, False, p_id)
# This is a pointer to the data you want to read
# Use `data.value` to get the value at this pointer
# In this case, this value is an Integer with 4 bytes
data = ctypes.c_uint(0)
# Size of the variable, it usually is 4 bytes
bytesRead = ctypes.c_uint(0)
# Starting address
current_address = address
if offsets:
# Append a new element to the offsets array
# This will allow you to get the value at the last offset
offsets.append(None)
for offset in offsets:
# Read the memory of current address using ReadProcessMemory
ctypes.windll.kernel32.ReadProcessMemory(h_process, current_address, ctypes.byref(data), ctypes.sizeof(data), ctypes.byref(bytesRead))
# If current offset is `None`, return the value of the last offset
if not offset:
return current_address, data.value
else:
# Replace the address with the new data address
current_address = data.value + offset
else:
# Just read the single memory address
ctypes.windll.kernel32.ReadProcessMemory(h_process, current_address, ctypes.byref(data), ctypes.sizeof(data), ctypes.byref(bytesRead))
# Close the handle to the process
ctypes.windll.kernel32.CloseHandle(h_process)
# Return a pointer to the value and the value
# The pointer will be used to write to the memory
return current_address, data.value
# Open the process
p_id, base_address = get_process_by_name("program.exe")
# The static address needs the program base_address
address = base_address + 0x00571160
offsets = [0xD84, 0x1B8, 0x38, 0x5C, 0x24, 0xF4, 0x1D08]
pointer_value, value = read_process_memory(p_id, address, offsets)
print(f"(Static Address) Value: {value}")
# Re-reading the memory with the last pointer
pointer_value, value = read_process_memory(p_id, pointer_value, None)
print(f"(Dynamic Address) Value: {value}")

serialwin32.py and serialutil.py error when run Invensense demo python client

Good days, I'm new to python and trying to run a demo provided by Invensense.(9-axis MPU9250 connects a STM32F407G discovery board, I used code and python client in motion_driver_6.12 which downloaded from Invensense website.)
the whole python part are python2.7, pysearil, pygame.
I searched my issues in Stackoverflow, but the specific situations are a little different, and most of the solutions are useless for me.
First, I show my issues.
UART connects the PC, run Invensense's python client through cmd.exe, pygame window appears briefly and disappear and I get the following error
D:\motion_driver_6.12\eMPL-pythonclient>python eMPL-client.py 7
Traceback (most recent call last):
File "eMPL-client.py", line 273, in <module>
def four_bytes(d1, d2, d3, d4):
File "eMPL-client.py", line 12, in __init__
File "C:\Python27\lib\site-packages\serial\serialwin32.py", line 31, in
__init__
super(Serial, self).__init__(*args, **kwargs)
File "C:\Python27\lib\site-packages\serial\serialutil.py", line 218, in
__init__
self.port = port
File "C:\Python27\lib\site-packages\serial\serialutil.py", line 264, in port
raise ValueError('"port" must be None or a string, not
{}'.format(type(port)))
ValueError: "port" must be None or a string, not <type 'int'>
Second, through the similar questions, what I have done until now:
open the file "serialwin32.py" .Change port = self.name to port = str(self.name). It doesn't work, same error messages.
uninstall the pyserial3.3(the lastest version), using a pyserial2.7. The error meesages were gone but Pygmae now just sits there with a black screen.The old answer said "Invensense tells me that means it is connected and waiting for data".
----------------followed is eMPL-client.py, line 21 and line 273 are marked-----
#!/usr/bin/python
# eMPL_client.py
# A PC application for use with Embedded MotionApps.
# Copyright 2012 InvenSense, Inc. All Rights Reserved.
import serial, sys, time, string, pygame
from ponycube import *
class eMPL_packet_reader:
//*********************line 21 __init__ begins********************//
def __init__(self, port, quat_delegate=None, debug_delegate=None, data_delegate=None ):
self.s = serial.Serial(port,115200)
self.s.setTimeout(0.1)
self.s.setWriteTimeout(0.2)
# TODO: Will this break anything?
##Client attempts to write to eMPL.
#try:
#self.s.write("\n")
#except serial.serialutil.SerialTimeoutException:
#pass # write will timeout if umpl app is already started.
if quat_delegate:
self.quat_delegate = quat_delegate
else:
self.quat_delegate = empty_packet_delegate()
if debug_delegate:
self.debug_delegate = debug_delegate
else:
self.debug_delegate = empty_packet_delegate()
if data_delegate:
self.data_delegate = data_delegate
else:
self.data_delegate = empty_packet_delegate()
self.packets = []
self.length = 0
self.previous = None
def read(self):
NUM_BYTES = 23
p = None
while self.s.inWaiting() >= NUM_BYTES:
rs = self.s.read(NUM_BYTES)
if ord(rs[0]) == ord('$'):
pkt_code = ord(rs[1])
if pkt_code == 1:
d = debug_packet(rs)
self.debug_delegate.dispatch(d)
elif pkt_code == 2:
p = quat_packet(rs)
self.quat_delegate.dispatch(p)
elif pkt_code == 3:
d = data_packet(rs)
self.data_delegate.dispatch(d)
else:
print "no handler for pkt_code",pkt_code
else:
c = ' '
print "serial misaligned!"
while not ord(c) == ord('$'):
c = self.s.read(1)
self.s.read(NUM_BYTES-1)
def write(self,a):
self.s.write(a)
def close(self):
self.s.close()
def write_log(self,fname):
f = open(fname,'w')
for p in self.packets:
f.write(p.logfile_line())
f.close()
# =========== PACKET DELEGATES ==========
class packet_delegate(object):
def loop(self,event):
print "generic packet_delegate loop w/event",event
def dispatch(self,p):
print "generic packet_delegate dispatched",p
class empty_packet_delegate(packet_delegate):
def loop(self,event):
pass
def dispatch(self,p):
pass
class cube_packet_viewer (packet_delegate):
def __init__(self):
self.screen = Screen(480,400,scale=1.5)
self.cube = Cube(30,60,10)
self.q = Quaternion(1,0,0,0)
self.previous = None # previous quaternion
self.latest = None # latest packet (get in dispatch, use in loop)
def loop(self,event):
packet = self.latest
if packet:
q = packet.to_q().normalized()
self.cube.erase(self.screen)
self.cube.draw(self.screen,q)
pygame.display.flip()
self.latest = None
def dispatch(self,p):
if isinstance(p,quat_packet):
self.latest = p
class debug_packet_viewer (packet_delegate):
def loop(self,event):
pass
def dispatch(self,p):
assert isinstance(p,debug_packet);
p.display()
class data_packet_viewer (packet_delegate):
def loop(self,event):
pass
def dispatch(self,p):
assert isinstance(p,data_packet);
p.display()
# =============== PACKETS =================
# For 16-bit signed integers.
def two_bytes(d1,d2):
d = ord(d1)*256 + ord(d2)
if d > 32767:
d -= 65536
return d
# For 32-bit signed integers.
//**************************273 begins*********************************//
def four_bytes(d1, d2, d3, d4):
d = ord(d1)*(1<<24) + ord(d2)*(1<<16) + ord(d3)*(1<<8) + ord(d4)
if d > 2147483648:
d-= 4294967296
return d
----------------followed is serialutil.py(version 3.3) from line1 to line272, 218 and 264 are marked-------------
#! python
#
# Base class and support functions used by various backends.
#
# This file is part of pySerial. https://github.com/pyserial/pyserial
# (C) 2001-2016 Chris Liechti <cliechti#gmx.net>
#
# SPDX-License-Identifier: BSD-3-Clause
import io
import time
# ``memoryview`` was introduced in Python 2.7 and ``bytes(some_memoryview)``
# isn't returning the contents (very unfortunate). Therefore we need special
# cases and test for it. Ensure that there is a ``memoryview`` object for older
# Python versions. This is easier than making every test dependent on its
# existence.
try:
memoryview
except (NameError, AttributeError):
# implementation does not matter as we do not really use it.
# it just must not inherit from something else we might care for.
class memoryview(object): # pylint: disable=redefined-builtin,invalid-name
pass
try:
unicode
except (NameError, AttributeError):
unicode = str # for Python 3, pylint: disable=redefined-builtin,invalid-name
try:
basestring
except (NameError, AttributeError):
basestring = (str,) # for Python 3, pylint: disable=redefined-builtin,invalid-name
# "for byte in data" fails for python3 as it returns ints instead of bytes
def iterbytes(b):
"""Iterate over bytes, returning bytes instead of ints (python3)"""
if isinstance(b, memoryview):
b = b.tobytes()
i = 0
while True:
a = b[i:i + 1]
i += 1
if a:
yield a
else:
break
# all Python versions prior 3.x convert ``str([17])`` to '[17]' instead of '\x11'
# so a simple ``bytes(sequence)`` doesn't work for all versions
def to_bytes(seq):
"""convert a sequence to a bytes type"""
if isinstance(seq, bytes):
return seq
elif isinstance(seq, bytearray):
return bytes(seq)
elif isinstance(seq, memoryview):
return seq.tobytes()
elif isinstance(seq, unicode):
raise TypeError('unicode strings are not supported, please encode to bytes: {!r}'.format(seq))
else:
# handle list of integers and bytes (one or more items) for Python 2 and 3
return bytes(bytearray(seq))
# create control bytes
XON = to_bytes([17])
XOFF = to_bytes([19])
CR = to_bytes([13])
LF = to_bytes([10])
PARITY_NONE, PARITY_EVEN, PARITY_ODD, PARITY_MARK, PARITY_SPACE = 'N', 'E', 'O', 'M', 'S'
STOPBITS_ONE, STOPBITS_ONE_POINT_FIVE, STOPBITS_TWO = (1, 1.5, 2)
FIVEBITS, SIXBITS, SEVENBITS, EIGHTBITS = (5, 6, 7, 8)
PARITY_NAMES = {
PARITY_NONE: 'None',
PARITY_EVEN: 'Even',
PARITY_ODD: 'Odd',
PARITY_MARK: 'Mark',
PARITY_SPACE: 'Space',
}
class SerialException(IOError):
"""Base class for serial port related exceptions."""
class SerialTimeoutException(SerialException):
"""Write timeouts give an exception"""
writeTimeoutError = SerialTimeoutException('Write timeout')
portNotOpenError = SerialException('Attempting to use a port that is not open')
class Timeout(object):
"""\
Abstraction for timeout operations. Using time.monotonic() if available
or time.time() in all other cases.
The class can also be initialized with 0 or None, in order to support
non-blocking and fully blocking I/O operations. The attributes
is_non_blocking and is_infinite are set accordingly.
"""
if hasattr(time, 'monotonic'):
# Timeout implementation with time.monotonic(). This function is only
# supported by Python 3.3 and above. It returns a time in seconds
# (float) just as time.time(), but is not affected by system clock
# adjustments.
TIME = time.monotonic
else:
# Timeout implementation with time.time(). This is compatible with all
# Python versions but has issues if the clock is adjusted while the
# timeout is running.
TIME = time.time
def __init__(self, duration):
"""Initialize a timeout with given duration"""
self.is_infinite = (duration is None)
self.is_non_blocking = (duration == 0)
self.duration = duration
if duration is not None:
self.target_time = self.TIME() + duration
else:
self.target_time = None
def expired(self):
"""Return a boolean, telling if the timeout has expired"""
return self.target_time is not None and self.time_left() <= 0
def time_left(self):
"""Return how many seconds are left until the timeout expires"""
if self.is_non_blocking:
return 0
elif self.is_infinite:
return None
else:
delta = self.target_time - self.TIME()
if delta > self.duration:
# clock jumped, recalculate
self.target_time = self.TIME() + self.duration
return self.duration
else:
return max(0, delta)
def restart(self, duration):
"""\
Restart a timeout, only supported if a timeout was already set up
before.
"""
self.duration = duration
self.target_time = self.TIME() + duration
class SerialBase(io.RawIOBase):
"""\
Serial port base class. Provides __init__ function and properties to
get/set port settings.
"""
# default values, may be overridden in subclasses that do not support all values
BAUDRATES = (50, 75, 110, 134, 150, 200, 300, 600, 1200, 1800, 2400, 4800,
9600, 19200, 38400, 57600, 115200, 230400, 460800, 500000,
576000, 921600, 1000000, 1152000, 1500000, 2000000, 2500000,
3000000, 3500000, 4000000)
BYTESIZES = (FIVEBITS, SIXBITS, SEVENBITS, EIGHTBITS)
PARITIES = (PARITY_NONE, PARITY_EVEN, PARITY_ODD, PARITY_MARK, PARITY_SPACE)
STOPBITS = (STOPBITS_ONE, STOPBITS_ONE_POINT_FIVE, STOPBITS_TWO)
def __init__(self,
port=None,
baudrate=9600,
bytesize=EIGHTBITS,
parity=PARITY_NONE,
stopbits=STOPBITS_ONE,
timeout=None,
xonxoff=False,
rtscts=False,
write_timeout=None,
dsrdtr=False,
inter_byte_timeout=None,
exclusive=None,
**kwargs):
"""\
Initialize comm port object. If a "port" is given, then the port will be
opened immediately. Otherwise a Serial port object in closed state
is returned.
"""
self.is_open = False
self.portstr = None
self.name = None
# correct values are assigned below through properties
self._port = None
self._baudrate = None
self._bytesize = None
self._parity = None
self._stopbits = None
self._timeout = None
self._write_timeout = None
self._xonxoff = None
self._rtscts = None
self._dsrdtr = None
self._inter_byte_timeout = None
self._rs485_mode = None # disabled by default
self._rts_state = True
self._dtr_state = True
self._break_state = False
self._exclusive = None
# assign values using get/set methods using the properties feature
//**************218**************//
self.port = port
//**************218**************//
self.baudrate = baudrate
self.bytesize = bytesize
self.parity = parity
self.stopbits = stopbits
self.timeout = timeout
self.write_timeout = write_timeout
self.xonxoff = xonxoff
self.rtscts = rtscts
self.dsrdtr = dsrdtr
self.inter_byte_timeout = inter_byte_timeout
self.exclusive = exclusive
# watch for backward compatible kwargs
if 'writeTimeout' in kwargs:
self.write_timeout = kwargs.pop('writeTimeout')
if 'interCharTimeout' in kwargs:
self.inter_byte_timeout = kwargs.pop('interCharTimeout')
if kwargs:
raise ValueError('unexpected keyword arguments: {!r}'.format(kwargs))
if port is not None:
self.open()
# - - - - - - - - - - - - - - - - - - - - - - - -
# to be implemented by subclasses:
# def open(self):
# def close(self):
# - - - - - - - - - - - - - - - - - - - - - - - -
#property
def port(self):
"""\
Get the current port setting. The value that was passed on init or using
setPort() is passed back.
"""
return self._port
#port.setter
def port(self, port):
"""\
Change the port.
"""
//*************************line 263**********************//
if port is not None and not isinstance(port, basestring):
raise ValueError('"port" must be None or a string, not {}'.format(type(port)))
was_open = self.is_open
if was_open:
self.close()
self.portstr = port
self._port = port
self.name = self.portstr
if was_open:
self.open()
-------------followed is serialwin32.py(version 3.3), 31 is marked-----------------------------------------
#! python
#
# backend for Windows ("win32" incl. 32/64 bit support)
#
# (C) 2001-2015 Chris Liechti <cliechti#gmx.net>
#
# This file is part of pySerial. https://github.com/pyserial/pyserial
# SPDX-License-Identifier: BSD-3-Clause
#
# Initial patch to use ctypes by Giovanni Bajo <rasky#develer.com>
# pylint: disable=invalid-name,too-few-public-methods
import ctypes
import time
from serial import win32
import serial
from serial.serialutil import SerialBase, SerialException, to_bytes, portNotOpenError, writeTimeoutError
class Serial(SerialBase):
"""Serial port implementation for Win32 based on ctypes."""
BAUDRATES = (50, 75, 110, 134, 150, 200, 300, 600, 1200, 1800, 2400, 4800,
9600, 19200, 38400, 57600, 115200)
def __init__(self, *args, **kwargs):
self._port_handle = None
self._overlapped_read = None
self._overlapped_write = None
//**************31**************//
super(Serial, self).__init__(*args, **kwargs)
//**************31**************//
Questions:
Any ideas how to solve the error?
Anyone who has used the MPU9250 with STM32F407G discovery board? Any suggestion?
two related issues on Stackoverflow
issue one: visit pyserial serialwin32.py has attribute error
issue two: visit Invensense Motion Driver 6.12 STM32 demo python don't work

Looking online, I found this Github repo that appears to correspond to the code you are working with. It appears eMPL-client.py is incompatible with newer versions of pyserial. Specifically, the __main__ routine requires numeric port identifiers, but the pyserial 3.3 serial.Serial requires textual port identifiers. I do not have the setup to test this, but you can try the following.
Install a fresh copy of Python 2.7, which is what eMPL-client.py targets. This is unrelated to pyserial 2.7.
In the fresh copy, install pyserial 2.7 and the other dependencies. Per the source, pyserial 2.7 uses numbers for ports. Pyserial 3.3 uses names for ports, whence the "port must be a string" error.
That should get you past the initial error, which is similar to this answer to the question you linked. At that point, it's probably time to pull out your oscilloscope and make sure the board is generating signals. If so, check the speed/baud/parity. I see that the source runs at 115200bps; maybe try 57600 instead, if your hardware supports it.
An alternative
To use eMPL-client.py with pyserial 3.3, in eMPL-client.py, look for the lines:
if __name__ == "__main__":
if len(sys.argv) == 2:
comport = int(sys.argv[1]) - 1 #### This is the line that triggers the issue
else:
print "usage: " + sys.argv[0] + " port"
sys.exit(-1)
Change the ####-marked line to
comport = sys.argv[1]
(make sure to keep the indentation the same!)
Then, in cmd.exe, run
python eMPL-client.py COM7
with the string port name, e.g., COM7, instead of a port number, e.g., 7.

Python 2.7 ProcessPoolExecutor throwing IOError: [Errno 32] Broken pipe

I am streaming data into a class in chunks. For each chunk of data, two different types of np.convolve() are executed on the same ProcessPoolExecutor. The type of convolve that was called is determined by a return variable.
The order of the data must be maintained, so each future has an associated sequence number. The output function enforces that only data from contiguous futures is returned (not shown below). From what I understand I am properly calling the ProcessPoolExecutor.shutdown() function, but I am still getting a IOError:
The errors is:
$ python processpoolerror.py
ran 5000000 samples in 3.70395112038 sec: 1.34990982265 Msps
Traceback (most recent call last):
File "/usr/lib/python2.7/multiprocessing/queues.py", line 268, in _feed
send(obj)
IOError: [Errno 32] Broken pipe
Sorry it's a bit long, but I have pruned this class down as much as possible while keeping the error. On my machine Ubuntu 16.04.2 with a Intel(R) Core(TM) i7-6700K CPU # 4.00GHz the paired down code always gives this error. In the non-pruned version of this code, the Broken pipe occurs 25% of the time.
If you edit line 78 to True, and print during the execution, the error is not thrown. If you reduce the amount of data on line 100, the error is not thrown. What am I doing wrong here? Thanks.
import numpy as np
from concurrent.futures import ProcessPoolExecutor
import time
def _do_xcorr3(rev_header, packet_chunk, seq):
r1 = np.convolve(rev_header, packet_chunk, 'full')
return 0, seq, r1
def _do_power3(power_kernel, packet_chunk, seq):
cp = np.convolve(power_kernel, np.abs(packet_chunk) ** 2, 'full')
return 1, seq, cp
class ProcessPoolIssues():
## Constructor
# #param chunk_size how many samples to feed in during input() stage
def __init__(self,header,chunk_size=500,poolsize=5):
self.chunk_size = chunk_size ##! How many samples to feed
# ProcessPool stuff
self.poolsize = poolsize
self.pool = ProcessPoolExecutor(poolsize)
self.futures = []
# xcr stage stuff
self.results0 = []
self.results0.append((0, -1, np.zeros(chunk_size)))
# power stage stuff
self.results1 = []
self.results1.append((1, -1, np.zeros(chunk_size)))
self.countin = 0
self.countout = -1
def shutdown(self):
self.pool.shutdown(wait=True)
## Returns True if all data has been extracted for given inputs
def all_done(self):
return self.countin == self.countout+1
## main function
# #param packet_chunk an array of chunk_size samples to be computed
def input(self, packet_chunk):
assert len(packet_chunk) == self.chunk_size
fut0 = self.pool.submit(_do_xcorr3, packet_chunk, packet_chunk, self.countin)
self.futures.append(fut0)
fut1 = self.pool.submit(_do_power3, packet_chunk, packet_chunk, self.countin)
self.futures.append(fut1)
self.countin += 1
# loops through thread pool, copying any results from done threads into results0/1 (and then terminating them)
def cultivate_pool(self):
todel = []
for i, f in enumerate(self.futures):
# print "checking", f
if f.done():
a, b, c = f.result()
if a == 0:
self.results0.append((a,b,c)) # results from one type of future
elif a == 1:
self.results1.append((a,b,c)) # results from another type of future
todel.append(i)
# now we need to remove items from futures that are done
# we need do it in reverse order so we remove items from the end first (thereby not affecting indices as we go)
for i in sorted(todel, reverse=True):
del self.futures[i]
if False: # change this to true and error goes away
print "deleting future #", i
# may return None
def output(self):
self.cultivate_pool() # modifies self.results list
# wait for both results to be done before clearing
if len(self.results0) and len(self.results1):
del self.results0[0]
del self.results1[0]
self.countout += 1
return None
def testRate():
chunk = 500
# a value of 10000 will throw: IOError: [Errno 32] Broken pipe
# smaller values like 1000 do not
din = chunk * 10000
np.random.seed(666)
search = np.random.random(233) + np.random.random(233) * 1j
input = np.random.random(din) + np.random.random(din) * 1j
pct = ProcessPoolIssues(search, chunk, poolsize=8)
st = time.time()
for x in range(0, len(input), chunk):
slice = input[x:x + chunk]
if len(slice) != chunk:
break
pct.input(slice)
pct.output()
while not pct.all_done():
pct.output()
ed = time.time()
dt = ed - st
print "ran", din, "samples in", dt, "sec:", din / dt / 1E6, "Msps"
pct.shutdown()
if __name__ == '__main__':
testRate()

This is probably happening because you're exceeding the buffer size of the pipe when you try sending in larger chunks at once.
def _do_xcorr3(rev_header, packet_chunk, seq):
r1 = np.convolve(rev_header, packet_chunk, 'full')
return 0, seq, r1
def _do_power3(power_kernel, packet_chunk, seq):
cp = np.convolve(power_kernel, np.abs(packet_chunk) ** 2, 'full')
return 1, seq, cp
the values r1 and cp are very large because you are convolving with the square of the chunks.
Hence, when you try to run this with larger chunk sizes, the buffer of IO Pipe can't handle it. Refer this for clearer understanding.
As for the second part of the question,
if False: # change this to true and error goes away
print "deleting future #", i
Found this in the py3 docs:
16.2.4.4. Reentrancy
Binary buffered objects (instances of BufferedReader, BufferedWriter, BufferedRandom and BufferedRWPair) are not reentrant. While reentrant calls will not happen in normal situations, they can arise from doing I/O in a signal handler. If a thread tries to re-enter a buffered object which it is already accessing, a RuntimeError is raised. Note this doesn’t prohibit a different thread from entering the buffered object.
The above implicitly extends to text files, since the open() function will wrap a buffered object inside a TextIOWrapper. This includes standard streams and therefore affects the built-in function print() as well.

Multipart upload to Amazon Glacier: Content-Range incompatible with Content-Length

I'm trying to upload a file around 1gb in size to Amazon Glacier. Somewhat arbitrarily, I've decided to break it into 32mb parts and upload them in serial.
import math
import boto3
from botocore.utils import calculate_tree_hash
client = boto3.client('glacier')
vault_name = 'my-vault'
size = 1073745600 # in bytes
size_mb = size / (2**20) # Convert to megabytes for readability
local_file = 'filename'
multi_up = client.initiate_multipart_upload(vaultName=vault_name,
archiveDescription=local_file,
partSize=str(2**25)) # 32 mb in bytes
parts = math.floor(size_mb / 32)
with open("/Users/alexchase/Desktop/{}".format(local_file), 'rb') as upload:
for p in range(parts):
# Calculate lower and upper bounds for the byte ranges. The last range
# is bigger than the ones that come before.
lower = (p * (2**25))
upper = (((p + 1) * (2**25)) - 1) if (p + 1 < parts) else (size)
up_part = client.upload_multipart_part(vaultName=vault_name,
uploadId=multi_up['uploadId'],
range='bytes {}-{}/*'.format(lower, upper),
body=upload)
checksum = calculate_tree_hash(upload)
complete_up = client.complete_multipart_upload(archiveSize=str(size),
checksum=checksum,
uploadId=multi_up['uploadId'],
vaultName=vault_name)
This generates an error about the first byte range.
---------------------------------------------------------------------------
InvalidParameterValueException Traceback (most recent call last)
<ipython-input-2-9dd3ac986601> in <module>()
93 uploadId=multi_up['uploadId'],
94 range='bytes {}-{}/*'.format(lower, upper),
---> 95 body=upload)
96 upload_info.append(up_part)
97 checksum = calculate_tree_hash(upload)
~/anaconda/lib/python3.5/site-packages/botocore/client.py in _api_call(self, *args, **kwargs)
251 "%s() only accepts keyword arguments." % py_operation_name)
252 # The "self" in this scope is referring to the BaseClient.
--> 253 return self._make_api_call(operation_name, kwargs)
254
255 _api_call.__name__ = str(py_operation_name)
~/anaconda/lib/python3.5/site-packages/botocore/client.py in _make_api_call(self, operation_name, api_params)
555 error_code = parsed_response.get("Error", {}).get("Code")
556 error_class = self.exceptions.from_code(error_code)
--> 557 raise error_class(parsed_response, operation_name)
558 else:
559 return parsed_response
InvalidParameterValueException: An error occurred (InvalidParameterValueException) when calling the UploadMultipartPart operation:
Content-Range: bytes 0-33554431/* is incompatible with Content-Length: 1073745600
Can anyone see what I'm doing wrong?

#Michael-sqlbot is quite right, the issue with the Content-Range was that I was passing the whole file instead of a part. I fixed this by using the read() method, but then I discovered a separate issue, which is that (per the docs), the final part has to be the same size or smaller than the preceding parts. This means using math.ceil() instead of math.floor() to define the number of parts.
The working code is:
import math
import boto3
from botocore.utils import calculate_tree_hash
client = boto3.client('glacier')
vault_name = 'my-vault'
size = 1073745600 # in bytes
size_mb = size / (2**20) # Convert to megabytes for readability
local_file = 'filename'
partSize=(2**25)
multi_up = client.initiate_multipart_upload(vaultName=vault_name,
archiveDescription=local_file,
partSize=str(partSize)) # 32 mb in bytes
parts = math.ceil(size_mb / 32) # The number of <=32mb parts we need
with open("/Users/alexchase/Desktop/{}".format(local_file), 'rb') as upload:
for p in range(parts):
# Calculate lower and upper bounds for the byte ranges. The last range
# is now smaller than the ones that come before.
lower = (p * (partSize))
upper = (((p + 1) * (partSize)) - 1) if (p + 1 < parts) else (size-1)
read_size = upper-lower+1
file_part = upload.read(read_size)
up_part = client.upload_multipart_part(vaultName=vault_name,
uploadId=multi_up['uploadId'],
range='bytes {}-{}/*'.format(lower, upper),
body=file_part)
checksum = calculate_tree_hash(upload)
complete_up = client.complete_multipart_upload(archiveSize=str(size),
checksum=checksum,
uploadId=multi_up['uploadId'],
vaultName=vault_name)

Content-Range: bytes 0-33554431/* is incompatible with Content-Length: 1073745600
You're telling the API that you're sending the first 32 MiB, but you're actually sending (proposing to send) the entire file, since body=upload and upload isn't just the first part, it's the entire file. The Content-Length refers to the size of this part upload, which should be 33554432 (32 MiB).
The docs are admittedly ambiguous...
body (bytes or seekable file-like object) -- The data to upload.
...but the "data to upload" seems to refer to the data for only this part, in spite of the word "seekable."

Since the follow up answer from Alex claims it "works", I'm posting another version that worked for me under Python 3.5 and Ubuntu 16.04. I also added some environment variables from our production end to end solution.
The original post gave me an error so I tweaked it and provided some clean up. Hope this helps someone needing this Glacier functionality. Using a Shell script with awscli commands was not as clean.
import math
import boto3
import os
from botocore.utils import calculate_tree_hash
vault_name = os.getenv('GLACIER_VAULT_NAME')
file_name = os.getenv('GLACIER_UPLOAD_FILE')
if vault_name is None:
print('GLACIER_VAULT_NAME environment variable is required. Exiting.')
exit(1)
if file_name is None:
print('GLACIER_UPLOAD_FILE environment variable is required. Exiting.')
exit(2)
chunk_size = 2 ** 25
client = boto3.client('glacier')
client.create_vault(vaultName=vault_name)
upload_obj = client.initiate_multipart_upload(vaultName=vault_name,
archiveDescription=file_name,
partSize=str(chunk_size))
file_size = os.path.getsize(file_name)
parts = math.ceil(file_size / chunk_size)
with open(file_name, 'rb') as upload:
for p in range(parts):
lower = p * chunk_size
upper = lower + chunk_size - 1
if upper > file_size:
upper = (file_size - lower) + lower - 1
file_part = upload.read(chunk_size)
up_part = client.upload_multipart_part(vaultName=vault_name,
uploadId=upload_obj['uploadId'],
range='bytes {}-{}/{}'.format(lower,
upper,
file_size),
body=file_part)
# this needs a new file handler because calculate_tree_hash() processes
# the handler in a similar way to the loop above
checksum = calculate_tree_hash(open(file_name, 'rb'))
complete_up = client.complete_multipart_upload(vaultName=vault_name,
uploadId=upload_obj['uploadId'],
archiveSize=str(file_size),
checksum=checksum)
print(complete_up)

C more reliable for networking than Python?

What I'm trying in Python
I am trying to copy a large file over a TCP connection in python (3.6). I have two functions: send_chunk (sends a little header, then some data) and recv_chunk (parses that header, returns the data). I split the file I'm sending into chunks and put them on the network as fast as it lets me. Until around 4-5 MB, everything works. Then, recv_chunk receives some incorrect data, and everything is toast.
What works in C
The same operation in C (as demonstrated by netcat) has no problem sending a 100MB file with no errors (also much lower CPU usage). I looked in the netcat code, and I just see normal-old select and read/write calls.
Question of the day:
What could be going wrong? Why does it seem so simple in C but it isn't working in python?
code, for reference:
def send_chunk(data, sock):
if type(data) is str:
data = bytes(data, 'utf8')
len_str = "{}".format(len(data))
len_str_size = chr(len(len_str))
send_data = bytes(len_str_size+len_str, 'ascii')+data
total_sent = 0
total_len = len(send_data)
while total_sent < total_len:
data_sent = sock.send(send_data[total_sent:])
print('%f sending %d'%(time(),total_len))
if data_sent < total_len:
print('only sent %d'%data_sent,flush=True)
total_sent += data_sent
def recv_chunk(sock):
payload_data = b''; size = 0
len_data = b''; len_size = 0
# get the length field size
len_size = ord(sock.recv(1))
# get the length field
while len(len_data) < len_size:
len_data += sock.recv(len_size-len(len_data))
size = int(len_data)
# get the data
while len(payload_data) < size:
payload_data += sock.recv(min(size-len(payload_data), 2048))
return payload_data

Your code works for me, but copying your data many times make this slow.
Simply use sendall:
def send_chunk(data, sock):
if isinstance(data, str):
data = bytes(data, 'utf8')
sock.sendall(len(data).to_bytes(4, 'little'))
sock.sendall(data)
def recv_chunk(sock):
size = b""
while len(size) < 4:
size += sock.recv(4-len(size))
bytes_left = int.from_bytes(size, 'little')
# get the data
data = []
while bytes_left:
d = sock.recv(bytes_left)
data.append(d)
bytes_left -= len(d)
return b''.join(data)

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.