What does Python multirpocessing.reduction do internally?

What does Python multirpocessing.reduction do internally? - python

I found Python multiprocessing.reduction module while suffering from sharing a file descriptor between processes in here.
My question is, what does reduction module do internally to share the file descriptor between processes.(reduce_handle(), rebuild_handle() method)
Could you explain in detail?
#reduction.py
def reduce_handle(handle):
if Popen.thread_is_spawning():
return (None, Popen.duplicate_for_child(handle), True)
dup_handle = duplicate(handle)
_cache.add(dup_handle)
sub_debug('reducing handle %d', handle)
return (_get_listener().address, dup_handle, False)
def rebuild_handle(pickled_data):
address, handle, inherited = pickled_data
if inherited:
return handle
sub_debug('rebuilding handle %d', handle)
conn = Client(address, authkey=current_process().authkey)
conn.send((handle, os.getpid()))
new_handle = recv_handle(conn)
conn.close()
return new_handle

Related

Redis is loading the dataset in memory

I'm using Redis sentinel with three nodes. One is the master and the other two are slaves.
I have gone through this, but it expect that the error comes less frequently and can be re-tried, but my approach might be wrong here.
Here I am handling the reconfiguration of the nodes.
import redis
# Initialize on system boot
slave_node_1: redis.Redis = None
slave_node_2: redis.Redis = None
master_node: redis.Redis = None
# Handling the reconfiguration of the nodes.
def reconfig_redis_nodes():
Sentinel = redis.Sentinel([
(REDIS_HOST_0, SENTINEL_PORT),
(REDIS_HOST_1, SENTINEL_PORT),
(REDIS_HOST_2, SENTINEL_PORT)
], sentinel_kwargs={'password': REDIS_SENTINEL_PASSWORD})
host, port = Sentinel.discover_master(REDIS_MASTER_NAME)
globals()['master_node'] = redis.Redis(host=host, port=port, db=REDIS_DB, username=REDIS_USER, password=REDIS_PASSWORD, decode_responses=True)
slave_nodes = Sentinel.discover_slaves(REDIS_MASTER_NAME)
try:
host, port = slave_nodes[0]
globals()['slave_node_1'] = redis.Redis(host=host, port=port, db=REDIS_DB, username=REDIS_USER, password=REDIS_PASSWORD, decode_responses=True)
except IndexError as e:
pass
try:
host, port = slave_nodes[1]
globals()['slave_node_2'] = redis.Redis(host=host, port=port, db=REDIS_DB, username=REDIS_USER, password=REDIS_PASSWORD, decode_responses=True)
except IndexError as e:
pass
# Decorator to handle config change
def handle_redis_failover_master_switch(func):
def inner(*args, **kwargs):
retries = 0
max_retry = 5
while True:
try:
return func(*args, **kwargs)
except Exception as e:
reconfig_redis_nodes()
retries += 1
if retries > max_retry:
logger.critical(str(e))
raise Exception(e)
return inner
# And this is the Redis method I am using to set lock.
#handle_redis_failover_master_switch
def setnx(key: str, value: str, ttl_secs: int = 10):
return master_node.set(key, value, nx=True, ex=ttl_secs)
When I manually called these functions from the shell, they worked fine. But once the deployed (10 requests per second) Redis is throwing Redis is loading the dataset in memory
What is the cause of the issue here and how can I handle it gracefully?
Is it a bad idea for using sentinel for locking system?

Python3: cassandra.cluster.NoHostAvailable: ("Unable to connect to any servers using keyspace 'test'", ['127.0.0.1']) when using execute_async future

I am trying to fetch data from Cassandra from a specific table and trying to insert it into another table in Cassandra after making some changes. Both the tables are located in keyspace "test". When I am trying to get the data from the first table everything works fine and it is able to fetch the data. However, in the future handler which handles the output of the first query, I am trying to insert the data into another table under the same Cassandra instance and it is gettingting failed. I am getting an error from the application stating "cassandra.cluster.NoHostAvailable: ("Unable to connect to any servers using keyspace 'test'", ['127.0.0.1'])" . I am not sure where I am going wrong
import threading
from threading import Event
from cassandra.query import SimpleStatement
from cassandra.cluster import Cluster
hosts=['127.0.0.1']
keyspace="test"
thread_local = threading.local()
cluster_ = Cluster(hosts)
def get_session():
if hasattr(thread_local, "cassandra_session"):
print("got session from threadlocal")
return thread_local.cassandra_session
print(" Connecting to Cassandra Host " + str(hosts))
session_ = cluster_.connect(keyspace)
print(" Connecting and creating session to Cassandra KeySpace " + keyspace)
thread_local.cassandra_session = session_
return session_
class PagedResultHandler(object):
def __init__(self, future):
self.error = None
self.finished_event = Event()
self.future = future
self.future.add_callbacks(
callback=self.handle_page,
errback=self.handle_error)
def handle_page(self, rows):
for row in rows:
process_row(row)
if self.future.has_more_pages:
self.future.start_fetching_next_page()
else:
self.finished_event.set()
def handle_error(self, exc):
self.error = exc
self.finished_event.set()
def process_row(row):
print(row)
session_ = get_session()
stmt = session_.prepare(
"INSERT INTO test.data(customer,snr,rttt, event_time) VALUES (?,?,?,?)")
results = session_.execute(stmt,
[row.customer, row.snr, row.rttt,row.created_time])
print("Done")
session = get_session()
query = "select * from test.data_log"
statement = SimpleStatement(query, fetch_size=1000)
future = session.execute_async(statement)
handler = PagedResultHandler(future)
handler.finished_event.wait()
if handler.error:
raise handler.error
cluster_.shutdown()
However, when I try to execute the python file the application is throwing an error "cassandra.cluster.NoHostAvailable: ("Unable to connect to any servers using keyspace 'test'", ['127.0.0.1'])" from getSession() call from "process_row" method. Clearly, the first call to Cassandra is getting succeeded without any issues. There is no connectivity issue and the Cassandra instance is running fine locally. I am able to query the data using cqlsh. If I call the process_row method outside the future handler everything is working fine, I am not sure what needs to be done to make it happen from the Future Handler.
Connecting to Cassandra Host ['127.0.0.1']
Connecting and creating session to Cassandra KeySpace test
Row(customer='abcd', snr=100, rttt=121, created_time=datetime.datetime(2020, 8, 8, 2, 26, 51))
Connecting to Cassandra Host ['127.0.0.1']
Traceback (most recent call last):
File "test/check.py", , in <module>
raise handler.error
File "cassandra/cluster.py", line 4579, in cassandra.cluster.ResponseFuture._set_result
File "cassandra/cluster.py", line 4777, in cassandra.cluster.ResponseFuture._set_final_result
File "test/check.py"", in handle_page
process_row(row)
File "test/check.py"", in process_row
session_ = get_session()
File "/test/check.py"", in get_session
session_ = cluster_.connect(keyspace)
File "cassandra/cluster.py", line 1715, in cassandra.cluster.Cluster.connect
File "cassandra/cluster.py", line 1772, in cassandra.cluster.Cluster._new_session
File "cassandra/cluster.py", line 2553, in cassandra.cluster.Session.__init__
cassandra.cluster.NoHostAvailable: ("Unable to connect to any servers using keyspace 'test'", ['127.0.0.1'])
Process finished with exit code 1

Ok so Cassandra recommends the following:
Use at most one Session per keyspace, or use a single Session and explicitely specify the keyspace in your queries
https://www.datastax.com/blog/4-simple-rules-when-using-datastax-drivers-cassandra
In your code you try to create a session every time the read query has retrieved some rows.
To force the code to use at most one session we can create a queue where the child thread sends the row to the main thread and the main thread handles it further by executing the insert query. We do this in the main thread because I've experienced issues by executing queries in child thread.
callback_queue = Queue()
session = cluster_.connect(keyspace)
session.row_factory = dict_factory # because queue doesn't accept a Row instance
class PagedResultHandler(object):
...
def handle_page(self, rows):
for row in rows:
callback_queue.put(row) # here we pass the row as a dict to the queue
...
def process_rows():
while True:
try:
row = callback_queue.get() # here we retrieve the row as a dict from the child thread
stmt = session.prepare(
"INSERT INTO test.data(customer,snr,rttt, event_time) VALUES (?,?,?,?,?)")
results = session.execute(stmt,
[row['customer'], row['snr'], row['rttt'], row['created_time']])
print("Done")
except Empty:
pass
query = "select * from test.data_log"
statement = SimpleStatement(query, fetch_size=1000)
future = session.execute_async(statement)
handler = PagedResultHandler(future)
process_rows() # for now the code will hang here because we have an infinite loop in this function
handler.finished_event.wait()
if handler.error:
raise handler.error
cluster_.shutdown()
This will get it to work, but I would replace the while True else you will get into an infinite loop.

Ok so in that case we do 2 things, we can use multithreading and batch inserting. I think if we batch insert parallelism is not required, because that will speed things up from the client side fast enough. multithreading wouldn't add much more speed to it as it is not a cpu intensive task.
session = cluster_.connect(keyspace)
session.row_factory = dict_factory
class Fetcher:
def __init__(self, session):
self.session = session
query = "select * from test.data_log"
self.statement = SimpleStatement(query, fetch_size=1000)
def run(self):
rows = self.session.execute(self.statement)
temp_rows = []
total = 0
for row in rows:
temp_rows.append(row)
if len(temp_rows) == 1000:
handler = PagedResultHandler(self.session, temp_rows)
handler.start()
temp_rows = []
handler = PagedResultHandler(self.session, temp_rows)
handler.start()
def handle_error(self, err=None):
print(err)
class PagedResultHandler(threading.Thread):
def __init__(self, session, rows):
super().__init__()
self.session = session
self.error = None
self.rows = rows
self.finished_event = Event()
def run(self):
batch = BatchStatement(consistency_level=ConsistencyLevel.QUORUM)
stmt = session.prepare("INSERT INTO test.data(id, customer,snr,rttt, event_time) VALUES (?,?,?,?,?)")
for row in self.rows:
batch.add(stmt, [1, row['customer'], row['snr'], row['rttt'], row['created_time']])
results = session.execute(batch)
print(results)
Fetcher(session).run()
This does script does both batch inserting and multithreading, but again multithreading seems unnecessary.

Multiprocessing for a RabbitMQ Queue Pika

I am working with RabbitMQ queues. I want to run multiple consumer object instance through a single program. Below is my Operator class that creates 1 producer and 1 consumer
class Operator(object):
def __init__(self, delegate: callable, identifier):
"""
Create a new instance of the Operator and initialize the connections
"""
self._queue_details = self._get_queue_details()
self._host_ip = self._queue_details['IP']
self._port = self._queue_details['Port']
self._username = self._queue_details['Username']
self._password = self._queue_details['Password']
self._input_queue_name = self._queue_details['ReadQueueName']
self._output_queue_name = self._queue_details['WriteQueueName']
self._error_queue_name = self._queue_details['ErrorQueueName']
self._delegate = delegate
self._identifier = identifier
self._queue_connection = None
self._input_channel = None
self._output_channel = None
self._error_channel = None
self.is_busy = False
self.mark_to_terminate = False
def __del__(self):
# close connections
self._queue_connection.close()
#staticmethod
def _initialize_channel(connection, queue_name, durable):
channel = connection.channel()
channel.queue_declare(queue=queue_name, durable=durable)
return channel
#staticmethod
def _get_queue_details() -> dict:
return ConfigurationManager().get_value('queueDetails')
#staticmethod
def _get_connection(username, password, host_ip, port):
connection = pika.BlockingConnection(pika.ConnectionParameters(
credentials=pika.PlainCredentials(username, password), host=host_ip, port=port))
return connection
def initialize_operator(self):
connection = self._get_connection(self._username, self._password, self._host_ip, self._port)
self._queue_connection = connection
self._input_channel = self._initialize_channel(connection, self._input_queue_name, durable=False)
self._output_channel = self._initialize_channel(connection, self._output_queue_name, durable= True)
self._error_channel = self._initialize_channel(connection, self._error_queue_name, durable=True)
def consume(self):
self._input_channel.basic_qos(prefetch_count=1)
self._input_channel.basic_consume(self._process_incoming_message, queue=self._input_queue_name)
self._input_channel.start_consuming()
def _push_to_queue(self, channel, response):
channel.basic_publish(exchange='', routing_key=self._output_queue_name, body=response,
properties=pika.BasicProperties(delivery_mode=2)) # make message persistent
def _process_incoming_message(self, channel, method, properties, message):
self.is_busy = True
processed_result, is_error = self._delegate(message)
if is_error:
self._error_channel.basic_publish(exchange='', routing_key=self._output_queue_name, body=processed_result,
properties=pika.BasicProperties(delivery_mode=2))
else:
self._output_channel.basic_publish(exchange='', routing_key=self._output_queue_name, body=processed_result,
properties=pika.BasicProperties(delivery_mode=2))
# send in the final ack of the process.
channel.basic_ack(delivery_tag=method.delivery_tag)
# close connection if to avoid receiving messages
if self.mark_to_terminate:
self._queue_connection.close()
self.is_busy = False
And from my main script I spin up the agents like below:
# spins up the agent
for count in range(spin_up_count):
instance = Operator(self._translate_and_parse, f'Operator: {time.time()}')
instance.initialize_operator()
process = Process(target=instance.consume)
process.start()
self._online_agents.append((instance, process))
The problem is when I go for a process.start() it throws me a TypeError
TypeError: can't pickle _thread.lock objects
Complete Stack trace
File "C:/Users/adity/Documents/PythonProjects/Caligo/Caligo/QueueService.py", line 201, in _scale_up
process.start()
File "C:\Users\adity\AppData\Local\Programs\Python\Python36-32\lib\multiprocessing\process.py", line 105, in start
self._popen = self._Popen(self)
File "C:\Users\adity\AppData\Local\Programs\Python\Python36-32\lib\multiprocessing\context.py", line 223, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "C:\Users\adity\AppData\Local\Programs\Python\Python36-32\lib\multiprocessing\context.py", line 322, in _Popen
return Popen(process_obj)
File "C:\Users\adity\AppData\Local\Programs\Python\Python36-32\lib\multiprocessing\popen_spawn_win32.py", line 65, in __init__
reduction.dump(process_obj, to_child)
File "C:\Users\adity\AppData\Local\Programs\Python\Python36-32\lib\multiprocessing\reduction.py", line 60, in dump
ForkingPickler(file, protocol).dump(obj)
TypeError: can't pickle _thread.lock objects

The RabbitMQ team monitors the rabbitmq-users mailing list and only sometimes answers questions on StackOverflow.
Don't instantiate your Operator objects prior to starting the forked processes. You also can't make instance.consume the target of the forked process.
The target method of the Process instance is what should create the Operator instance and then call the consume method.
If you need to manage the forked processes you should keep track of the process IDs and use signals to communicate with them.

Watchdog compatibility: A workaround for "CancelIoEx"

Using the python watchdog file system events watching library I noticed that when being used under Windows Server 2003 it entered into "Polling Mode" thus stoping using asynchronous OS notification and, therefore, heavily reducing system performance under big amount of file changes.
I traced the problem to watchdog/observers/winapi.py file where CancelIoEx system call is used in order to stop ReadDirectoryChangesW call lock when the user wants to stop monitoring the watched directory or file:
(winapi.py)
CancelIoEx = ctypes.windll.kernel32.CancelIoEx
CancelIoEx.restype = ctypes.wintypes.BOOL
CancelIoEx.errcheck = _errcheck_bool
CancelIoEx.argtypes = (
ctypes.wintypes.HANDLE, # hObject
ctypes.POINTER(OVERLAPPED) # lpOverlapped
)
...
...
...
def close_directory_handle(handle):
try:
CancelIoEx(handle, None) # force ReadDirectoryChangesW to return
except WindowsError:
return
The problem with CancelIoEx call is that it is not available until Windows Server 2008:
http://msdn.microsoft.com/en-us/library/windows/desktop/aa363792(v=vs.85).aspx
One possible alternative is to change close_directory_handle in order to make it create a mock file within the monitored directory, thus unlocking the thread waiting for ReadDirectoryChangesW to return.
However, I noticed that CancelIo system call is in fact available in Windows Server 2003:
Cancels all pending input and output (I/O) operations that are issued
by the calling thread for the specified file. The function does not
cancel I/O operations that other threads issue for a file handle. To
cancel I/O operations from another thread, use the CancelIoEx
function.
But calling CancelIo won't affect the waiting thread.
Do you have any idea on how to solve this problem?
May be threading.enumerate() could be used issue a signal to be handled by each thread being CancelIo called from these handlers?

The natural approach is to implement a completion routine and call to ReadDirectoryChangesW using its overlapped mode. The following example shows the way to do that:
RDCW_CALLBACK_F = ctypes.WINFUNCTYPE(None, ctypes.wintypes.DWORD, ctypes.wintypes.DWORD, ctypes.POINTER(OVERLAPPED))
First, create a WINFUNCTYPE factory which will be used to generate (callable from Windows API) C like functions from python methods. In this case, no return value and 3 parameters corresponding to
VOID CALLBACK FileIOCompletionRoutine(
_In_ DWORD dwErrorCode,
_In_ DWORD dwNumberOfBytesTransfered,
_Inout_ LPOVERLAPPED lpOverlapped
);
FileIOCompletionRoutine header.
The callback reference as well as the overlapped structure need to be added to ReadDirectoryChangesW arguments list:
ReadDirectoryChangesW = ctypes.windll.kernel32.ReadDirectoryChangesW
ReadDirectoryChangesW.restype = ctypes.wintypes.BOOL
ReadDirectoryChangesW.errcheck = _errcheck_bool
ReadDirectoryChangesW.argtypes = (
ctypes.wintypes.HANDLE, # hDirectory
LPVOID, # lpBuffer
ctypes.wintypes.DWORD, # nBufferLength
ctypes.wintypes.BOOL, # bWatchSubtree
ctypes.wintypes.DWORD, # dwNotifyFilter
ctypes.POINTER(ctypes.wintypes.DWORD), # lpBytesReturned
ctypes.POINTER(OVERLAPPED), # lpOverlapped
RDCW_CALLBACK_F # FileIOCompletionRoutine # lpCompletionRoutine
)
From here, we are ready to perform the overlapped system call.
This is a simple call bacl just usefult to test that everything works fine:
def dir_change_callback(dwErrorCode,dwNumberOfBytesTransfered,p):
print("dir_change_callback! PID:" + str(os.getpid()))
print("CALLBACK THREAD: " + str(threading.currentThread()))
Prepare and perform the call:
event_buffer = ctypes.create_string_buffer(BUFFER_SIZE)
nbytes = ctypes.wintypes.DWORD()
overlapped_read_dir = OVERLAPPED()
call2pass = RDCW_CALLBACK_F(dir_change_callback)
hand = get_directory_handle(os.path.abspath("/test/"))
def docall():
ReadDirectoryChangesW(hand, ctypes.byref(event_buffer),
len(event_buffer), False,
WATCHDOG_FILE_NOTIFY_FLAGS,
ctypes.byref(nbytes),
ctypes.byref(overlapped_read_dir), call2pass)
print("Waiting!")
docall()
If you load and execute all this code into a DreamPie interactive shell you can check the system call is done and that the callback executes thus printing the thread and pid numbers after the first change done under c:\test directory. Besides, you will notice those are the same than the main thread and process: Despite the event is raised by a separated thread, the callback runs in the same process and thread as our main program thus providing an undesired behaviour:
lck = threading.Lock()
def dir_change_callback(dwErrorCode,dwNumberOfBytesTransfered,p):
print("dir_change_callback! PID:" + str(os.getpid()))
print("CALLBACK THREAD: " + str(threading.currentThread()))
...
...
...
lck.acquire()
print("Waiting!")
docall()
lck.acquire()
This program will lock the main thread and the callback will never execute.
I tried many synchronization tools, even Windows API semaphores always getting the same behaviour so, finally, I decided to implement the ansynchronous call using the synchronous configuration for ReadDirectoryChangesW within a separate process managed and synchronized using multiprocessing python library:
Calls to get_directory_handle won't return the handle number given by windows API but one managed by winapi library, for that I implemented a handle generator:
class FakeHandleFactory():
_hl = threading.Lock()
_next = 0
#staticmethod
def next():
FakeHandleFactory._hl.acquire()
ret = FakeHandleFactory._next
FakeHandleFactory._next += 1
FakeHandleFactory._hl.release()
return ret
Each generated handle has to be globally associated with a file system path:
handle2file = {}
Each call to read_directory_changes will now generate ReadDirectoryRequest (derived from multiprocessing.Process) object:
class ReadDirectoryRequest(multiprocessing.Process):
def _perform_and_wait4request(self, path, recursive, event_buffer, nbytes):
hdl = CreateFileW(path, FILE_LIST_DIRECTORY, WATCHDOG_FILE_SHARE_FLAGS,
None, OPEN_EXISTING, WATCHDOG_FILE_FLAGS, None)
#print("path: " + path)
aux_buffer = ctypes.create_string_buffer(BUFFER_SIZE)
aux_n = ctypes.wintypes.DWORD()
#print("_perform_and_wait4request! PID:" + str(os.getpid()))
#print("CALLBACK THREAD: " + str(threading.currentThread()) + "\n----------")
try:
ReadDirectoryChangesW(hdl, ctypes.byref(aux_buffer),
len(event_buffer), recursive,
WATCHDOG_FILE_NOTIFY_FLAGS,
ctypes.byref(aux_n), None, None)
except WindowsError as e:
print("!" + str(e))
if e.winerror == ERROR_OPERATION_ABORTED:
nbytes = 0
event_buffer = []
else:
nbytes = 0
event_buffer = []
# Python 2/3 compat
nbytes.value = aux_n.value
for i in xrange(self.int_class(aux_n.value)):
event_buffer[i] = aux_buffer[i]
CloseHandle(hdl)
try:
self.lck.release()
except:
pass
def __init__(self, handle, recursive):
buffer = ctypes.create_string_buffer(BUFFER_SIZE)
self.event_buffer = multiprocessing.Array(ctypes.c_char, buffer)
self.nbytes = multiprocessing.Value(ctypes.wintypes.DWORD, 0)
targetPath = handle2file.get(handle, None)
super(ReadDirectoryRequest, self).__init__(target=self._perform_and_wait4request, args=(targetPath, recursive, self.event_buffer, self.nbytes))
self.daemon = True
self.lck = multiprocessing.Lock()
self.result = None
try:
self.int_class = long
except NameError:
self.int_class = int
if targetPath is None:
self.result = ([], -1)
def CancelIo(self):
try:
self.result = ([], 0)
self.lck.release()
except:
pass
def read_changes(self):
#print("read_changes! PID:" + str(os.getpid()))
#print("CALLBACK THREAD: " + str(threading.currentThread()) + "\n----------")
if self.result is not None:
raise Exception("ReadDirectoryRequest object can be used only once!")
self.lck.acquire()
self.start()
self.lck.acquire()
self.result = (self.event_buffer, self.int_class(self.nbytes.value))
return self.result
This class specifies Process providing a process which perform the system call and waits until (or):
A change event has been raised.
The main thread cancels the request by calling to the ReadDirectoryRequest object CancelIo method.
Note that:
get_directory_handle
close_directory_handle
read_directory_changes
Roles are now to manage requests. For that, thread locks and auxiliary data structures are needed:
rqIndexLck = threading.Lock() # Protects the access to `rqIndex`
rqIndex = {} # Maps handles to request objects sets.
get_directory_handle
def get_directory_handle(path):
rqIndexLck.acquire()
ret = FakeHandleFactory.next()
handle2file[ret] = path
rqIndexLck.release()
return ret
close_directory_handle
def close_directory_handle(handle):
rqIndexLck.acquire()
rqset4handle = rqIndex.get(handle, None)
if rqset4handle is not None:
for rq in rqset4handle:
rq.CancelIo()
del rqIndex[handle]
if handle in handle2file:
del handle2file[handle]
rqIndexLck.release()
And last but not least: read_directory_changes
def read_directory_changes(handle, recursive):
rqIndexLck.acquire()
rq = ReadDirectoryRequest(handle, recursive)
set4handle = None
if handle in rqIndex:
set4handle = rqIndex[handle]
else:
set4handle = set()
rqIndex[handle] = set4handle
set4handle.add(rq)
rqIndexLck.release()
ret = rq.read_changes()
rqIndexLck.acquire()
if rq in set4handle:
set4handle.remove(rq)
rqIndexLck.release()
return ret

Accessing unregistered COM objects from python via a registered TLB

I have three pieces of code that i'm working with at the moment:
A closed source application (Main.exe)
A closed source VB COM object implemented as a dll (comobj.dll)
Code that I am developing in Python
comobj.dll hosts a COM object (lets say, 'MainInteract') that I would like to use from Python. I can already use this object perfectly fine from IronPython, but due to other requirements I need to use it from regular Python. I believe the best method here is to use win32com, but I can't quite make any headway at all.
First, some working IronPython code:
import clr
import os
import sys
__dir__ = os.path.dirname(os.path.realpath(__file__))
sys.path.insert(0, __dir__)
sys.path.append(r"C:\Path\To\comobj.dll") #This is where the com object dll actually is
clr.AddReferenceToFileAndPath(os.path.join(__dir__, r'comobj_1_1.dll')) #This is the .NET interop assembly that was created automatically via SharpDevelop's COM Inspector
from comobj_1_1 import clsMainInteract
o = clsMainInteract()
o.DoStuff(True)
And now the code that I attempted in regular Python:
>>> import win32com.client
>>> win32com.client.Dispatch("{11111111-comobj_guid_i_got_from_com_inspector}")
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "C:\Python26\lib\site-packages\win32com\client\__init__.py", line 95, in Dispatch
dispatch, userName = dynamic._GetGoodDispatchAndUserName(dispatch,userName,clsctx)
File "C:\Python26\lib\site-packages\win32com\client\dynamic.py", line 104, in _GetGoodDispatchAndUserName
return (_GetGoodDispatch(IDispatch, clsctx), userName)
File "C:\Python26\lib\site-packages\win32com\client\dynamic.py", line 84, in _GetGoodDispatch
IDispatch = pythoncom.CoCreateInstance(IDispatch, None, clsctx, pythoncom.IID_IDispatch)
pywintypes.com_error: (-2147221164, 'Class not registered', None, None)
I have also attempted using the friendly name of the TLB:
>>> import win32com.client
>>> win32com.client.Dispatch("Friendly TLB Name I Saw")
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "C:\Python26\lib\site-packages\win32com\client\__init__.py", line 95, in Dispatch
dispatch, userName = dynamic._GetGoodDispatchAndUserName(dispatch,userName,clsctx)
File "C:\Python26\lib\site-packages\win32com\client\dynamic.py", line 104, in _GetGoodDispatchAndUserName
return (_GetGoodDispatch(IDispatch, clsctx), userName)
File "C:\Python26\lib\site-packages\win32com\client\dynamic.py", line 84, in _GetGoodDispatch
IDispatch = pythoncom.CoCreateInstance(IDispatch, None, clsctx, pythoncom.IID_IDispatch)
pywintypes.com_error: (-2147221005, 'Invalid class string', None, None)
In fact, the only success I've had was this:
import pythoncom
tlb = pythoncom.LoadRegTypeLib("{11111111-comobj_guid_i_got_from_com_inspector}",1,1,0)
>>> tlb
<PyITypeLib at 0x00AD7D78 with obj at 0x0025EDF0>
>>> tlb.GetDocumentation(1)
(u'clsMainInteract', None, 0, None)
But i'm not sure how to go from there to getting an object. I think my problem is that I need to load the dll into my process and get it to register itself with my process's COM source, so I can properly CoCreateInstance / win32com.client.Dispatch() on it.
I have also seen Activation Contexts referenced, especially when talking about 'no registration COM', but typically in a sentences like "Windows will create a context for you if you specify the right stuff in your .manifest files". I'd like to avoid manifest files if possible, as one would be required in the same folder as the (closed source) COM object dll, and i'd rather not drop any files in that directory if I can avoid it.
Thanks for the help.

What I did to access Free Download Manager's type library was the following:
import pythoncom, win32com.client
fdm = pythoncom.LoadTypeLib('fdm.tlb')
downloads_stat = None
for index in xrange(0, fdm.GetTypeInfoCount()):
type_name = fdm.GetDocumentation(index)[0]
if type_name == 'FDMDownloadsStat':
type_iid = fdm.GetTypeInfo(index).GetTypeAttr().iid
downloads_stat = win32com.client.Dispatch(type_iid)
break
downloads_stat.BuildListOfDownloads(True, True)
print downloads_stat.Download(0).Url
The code above will print the URL of the first download.

Here is a method I devised to load a COM object from a DLL. It was based on a lot of reading about COM, etc. I'm not 100% sure about the last lines, specifically d=. I think that only works if IID_Dispatch is passed in (which you can see if the default param).
In addition, I believe this code leaks - for one, the DLL is never unloaded (use ctypes.windll.kernel32.FreeLibraryW) and I believe the COM ref counts for the initial class factory are off by one, and thus never get released. But still, this works for my application.
import pythoncom
import win32com.client
def CreateInstanceFromDll(dll, clsid_class, iid_interface=pythoncom.IID_IDispatch, pUnkOuter=None, dwClsContext=pythoncom.CLSCTX_SERVER):
from uuid import UUID
from ctypes import OleDLL, c_long, byref
e = OleDLL(dll)
clsid_class = UUID(clsid_class).bytes_le
iclassfactory = UUID(str(pythoncom.IID_IClassFactory)).bytes_le
com_classfactory = c_long(0)
hr = e.DllGetClassObject(clsid_class, iclassfactory, byref(com_classfactory))
MyFactory = pythoncom.ObjectFromAddress(com_classfactory.value, pythoncom.IID_IClassFactory)
i = MyFactory.CreateInstance(pUnkOuter, iid_interface)
d = win32com.client.__WrapDispatch(i)
return d

For a useful utility module that wraps the object-from-DLL case, as well as others, see https://gist.github.com/4219140
__all__ = (
####### Class Objects
#CoGetClassObject - Normal, not wrapped
'CoDllGetClassObject', #Get ClassObject from a DLL file
####### ClassFactory::CreateInstance Wrappers
'CoCreateInstanceFromFactory', #Create an object via IClassFactory::CreateInstance
'CoCreateInstanceFromFactoryLicenced', #Create a licenced object via IClassFactory2::CreateInstanceLic
###### Util
'CoReleaseObject', #Calls Release() on a COM object
###### Main Utility Methods
#'CoCreateInstance', #Not wrapped, normal call
'CoCreateInstanceLicenced', #CoCreateInstance, but with a licence key
###### Hacky DLL methods for reg-free COM without Activation Contexts, manifests, etc
'CoCreateInstanceFromDll', #Given a dll, a clsid, and an iid, create an object
'CoCreateInstanceFromDllLicenced', #Given a dll, a clsid, an iid, and a license key, create an object
)
IID_IClassFactory2 = "{B196B28F-BAB4-101A-B69C-00AA00341D07}"
from uuid import UUID
from ctypes import OleDLL, WinDLL, c_ulong, byref, WINFUNCTYPE, POINTER, c_char_p, c_void_p
from ctypes.wintypes import HRESULT
import pythoncom
import win32com.client
import logging
log = logging.getLogger(__name__)
def _raw_guid(guid):
"""Given a string GUID, or a pythoncom IID, return the GUID laid out in memory suitable for passing to ctypes"""
return UUID(str(guid)).bytes_le
proto_icf2_base = WINFUNCTYPE(HRESULT,
c_ulong,
c_ulong,
c_char_p,
c_ulong,
POINTER(c_ulong),
)
IClassFactory2__CreateInstanceLic = proto_icf2_base(7, 'CreateInstanceLic', (
(1, 'pUnkOuter'),
(1 | 4, 'pUnkReserved'),
(1, 'riid'),
(1, 'bstrKey'),
(2, 'ppvObj'),
), _raw_guid(IID_IClassFactory2))
#--------------------------------
#--------------------------------
def _pc_wrap(iptr, resultCLSID=None):
#return win32com.client.__WrapDispatch(iptr)
log.debug("_pc_wrap: %s, %s"%(iptr, resultCLSID))
disp = win32com.client.Dispatch(iptr, resultCLSID=resultCLSID)
log.debug("_pc_wrap: %s (%s)", disp.__class__.__name__, disp)
return disp
def CoCreateInstanceFromFactory(factory_ptr, iid_interface=pythoncom.IID_IDispatch, pUnkOuter=None):
"""Given a factory_ptr whose interface is IClassFactory, create the instance of clsid_class with the specified interface"""
ClassFactory = pythoncom.ObjectFromAddress(factory_ptr.value, pythoncom.IID_IClassFactory)
i = ClassFactory.CreateInstance(pUnkOuter, iid_interface)
return i
def CoCreateInstanceFromFactoryLicenced(factory_ptr, key, iid_interface=pythoncom.IID_IDispatch, pUnkOuter=None):
"""Given a factory_ptr whose interface is IClassFactory2, create the instance of clsid_class with the specified interface"""
requested_iid = _raw_guid(iid_interface)
ole_aut = WinDLL("OleAut32.dll")
key_bstr = ole_aut.SysAllocString(unicode(key))
try:
obj = IClassFactory2__CreateInstanceLic(factory_ptr, pUnkOuter or 0, c_char_p(requested_iid), key_bstr)
disp_obj = pythoncom.ObjectFromAddress(obj, iid_interface)
return disp_obj
finally:
if key_bstr:
ole_aut.SysFreeString(key_bstr)
#----------------------------------
def CoReleaseObject(obj_ptr):
"""Calls Release() on a COM object. obj_ptr should be a c_void_p"""
if not obj_ptr:
return
IUnknown__Release = WINFUNCTYPE(HRESULT)(2, 'Release', (), pythoncom.IID_IUnknown)
IUnknown__Release(obj_ptr)
#-----------------------------------
def CoCreateInstanceLicenced(clsid_class, key, pythoncom_iid_interface=pythoncom.IID_IDispatch, dwClsContext=pythoncom.CLSCTX_SERVER, pythoncom_wrapdisp=True, wrapas=None):
"""Uses IClassFactory2::CreateInstanceLic to create a COM object given a licence key."""
IID_IClassFactory2 = "{B196B28F-BAB4-101A-B69C-00AA00341D07}"
ole = OleDLL("Ole32.dll")
clsid_class_raw = _raw_guid(clsid_class)
iclassfactory2 = _raw_guid(IID_IClassFactory2)
com_classfactory = c_void_p(0)
ole.CoGetClassObject(clsid_class_raw, dwClsContext, None, iclassfactory2, byref(com_classfactory))
try:
iptr = CoCreateInstanceFromFactoryLicenced(
factory_ptr = com_classfactory,
key=key,
iid_interface=pythoncom_iid_interface,
pUnkOuter=None,
)
if pythoncom_wrapdisp:
return _pc_wrap(iptr, resultCLSID=wrapas or clsid_class)
return iptr
finally:
if com_classfactory:
CoReleaseObject(com_classfactory)
#-----------------------------------------------------------
#DLLs
def CoDllGetClassObject(dll_filename, clsid_class, iid_factory=pythoncom.IID_IClassFactory):
"""Given a DLL filename and a desired class, return the factory for that class (as a c_void_p)"""
dll = OleDLL(dll_filename)
clsid_class = _raw_guid(clsid_class)
iclassfactory = _raw_guid(iid_factory)
com_classfactory = c_void_p(0)
dll.DllGetClassObject(clsid_class, iclassfactory, byref(com_classfactory))
return com_classfactory
def CoCreateInstanceFromDll(dll, clsid_class, iid_interface=pythoncom.IID_IDispatch, pythoncom_wrapdisp=True, wrapas=None):
iclassfactory_ptr = CoDllGetClassObject(dll, clsid_class)
try:
iptr = CoCreateInstanceFromFactory(iclassfactory_ptr, iid_interface)
if pythoncom_wrapdisp:
return _pc_wrap(iptr, resultCLSID=wrapas or clsid_class)
return iptr
finally:
CoReleaseObject(iclassfactory_ptr)
def CoCreateInstanceFromDllLicenced(dll, clsid_class, key, iid_interface=pythoncom.IID_IDispatch, pythoncom_wrapdisp=True, wrapas=None):
iclassfactory2_ptr = CoDllGetClassObject(dll, clsid_class, iid_factory=IID_IClassFactory2)
try:
iptr = CoCreateInstanceFromFactoryLicenced(iclassfactory2_ptr, key, iid_interface)
if pythoncom_wrapdisp:
return _pc_wrap(iptr, resultCLSID=wrapas or clsid_class)
return iptr
finally:
CoReleaseObject(iclassfactory2_ptr)

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

What does Python multirpocessing.reduction do internally? - python

Related

Redis is loading the dataset in memory

Python3: cassandra.cluster.NoHostAvailable: ("Unable to connect to any servers using keyspace 'test'", ['127.0.0.1']) when using execute_async future

Multiprocessing for a RabbitMQ Queue Pika

Watchdog compatibility: A workaround for "CancelIoEx"

Accessing unregistered COM objects from python via a registered TLB

Categories

Resources