I try to use manager and queue to share the object of selenium in two subprocess, but it shows me the error "AttributeError: Can't pickle local object '_createenviron..encodekey'"
from selenium.webdriver import Chrome
from multiprocessing import Queue, Manager, Process
import time
def find(q):
driver = Chrome()
driver.get('https://querycourse.ntust.edu.tw/querycourse/api/courses')
q.put(driver)
def refresh(q):
driver = q.get()
while True:
time.sleep(9)
driver.refresh()
if __name__=='__main__':
with Manager() as manager:
q = manager.Queue()
p1 = Process(target=find, args=(q,))
p2 = Process(target=refresh, args=(q,))
p1.start()
time.sleep(3)
p2.start()
p1.join()
p2.join()
Process Process-2:
Traceback (most recent call last):
File "C:\Users\USER\AppData\Local\Programs\Python\Python310\lib\multiprocessing\process.py", line 314, in _bootstrap
self.run()
File "C:\Users\USER\AppData\Local\Programs\Python\Python310\lib\multiprocessing\process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "D:\python_training\web crawler\kk_manager_queue.py", line 8, in find
q.put(driver)
File "<string>", line 2, in put
File "C:\Users\USER\AppData\Local\Programs\Python\Python310\lib\multiprocessing\managers.py", line 817, in _callmethod
conn.send((self._id, methodname, args, kwds))
File "C:\Users\USER\AppData\Local\Programs\Python\Python310\lib\multiprocessing\connection.py", line 211, in send
self._send_bytes(_ForkingPickler.dumps(obj))
File "C:\Users\USER\AppData\Local\Programs\Python\Python310\lib\multiprocessing\reduction.py", line 51, in dumps
cls(buf, protocol).dump(obj)
AttributeError: Can't pickle local object '_createenviron.<locals>.encodekey'
Does any one know what's wrong with my code? Any feetbeck are appreciated.
I simplified the code, I originally expected to use the code grabbing the course I want. One subprocess is to refresh the course selecting system to prevent Connect Timeouts, the other is to detect if someone drops the course and then click the “select” button, so the two subprocesses need to share the object “driver”.
This is my original code:
import urllib.request as req
import json
from selenium import webdriver
from selenium.webdriver import Chrome
from selenium.webdriver.common.by import By
import time
from selenium.webdriver.support.wait import WebDriverWait
import time
from multiprocessing import Manager, Queue, Process
def find(url, requestData, chooseStudent, q):
while True:
request=req.Request(url, headers={
"content-type":"application/json; charset=utf-8"
}, data=json.dumps(requestData).encode("utf-8"))
with req.urlopen(request) as response:
result=response.read().decode("utf-8")
result=json.loads(result)
if int(result[0]["ChooseStudent"])<chooseStudent:
driver = q.get()
ADDgo = driver.find_element(By.ID, "SingleAdd")
ADDgo.click()
break
def refresh(q):
account='XXXXXXXX'
password='XXXXXXXX'
classid='GE3710302'
driver = Chrome()
driver.get("https://stuinfosys.ntust.edu.tw/NTUSTSSOServ/SSO/Login/CourseSelection")
UserName = WebDriverWait(driver, timeout=10).until(lambda d: d.find_element(By.NAME,"UserName"))
UserName.send_keys(account)
Password = driver.find_element(By.NAME, "Password")
Password.send_keys(password)
btnLogIn = driver.find_element(By.NAME, "btnLogIn")
btnLogIn.click()
q.put(driver)
while True:
time.sleep(10)
driver.refresh()
if __name__=='__main__':
url="https://querycourse.ntust.edu.tw/querycourse/api/courses"
requestData={"Semester":"1112","CourseNo":"GE3710302","CourseName":"","CourseTeacher":"","Dimension":"","CourseNotes":"","ForeignLanguage":0,"OnlyGeneral":0,"OnleyNTUST":0,"OnlyMaster":0,"OnlyUnderGraduate":0,"OnlyNode":0,"Language":"zh"}
chooseStudent=51
with Manager() as manager:
q = manager.Queue()
p1 = Process(target=find, args=(url, requestData, chooseStudent, q,))
p2 = Process(target=refresh, args=(q,))
p1.start()
p2.start()
p1.join()
p2.join()
Related
I'm following an example from the official python documentation here:
I'm trying to make it so that I spin up a BaseManager at localhost:50000 which registers a queue, then a bunch of workers that read from that queue. I can get it to work if I use the method in the official python docs that has three files (one server, one put client, one get client), but I can't get it to work all in one file where I spawn the clients via multiprocessing.Process(target=...).
Here is my full code. The issue is that when the clients attempt to connect they get a ConnectionRefused (stack trace below)
from typing import Dict, Optional, Any, List
from multiprocessing.managers import BaseManager, SyncManager
import time
import multiprocessing as mp
import argparse
import queue
q = queue.Queue()
def parse_args() -> argparse.Namespace:
a = argparse.ArgumentParser()
a.add_argument("--n-workers", type=int, default=2)
return a.parse_args()
def run_queue_server(args: argparse.Namespace) -> None:
class QueueManager(BaseManager): pass
QueueManager.register("get_queue", lambda: q)
m = QueueManager(address=('', 50000), authkey=b'abracadabra')
m.start()
def _worker_process(worker_uid: str) -> None:
class QueueManager(BaseManager): pass
QueueManager.register("get_queue")
m = QueueManager(address=('', 50000), authkey=b'abracadabra')
# <-- This line fails with ConnectionRefused -->
m.connect()
queue: queue.Queue = m.get_queue()
def spawn_workers(args: argparse.Namespace) -> None:
time.sleep(2)
worker_procs = dict()
for i in range(args.n_workers):
print(f"Spawning worker process {i}..")
p = mp.Process(target=_worker_process, args=[str(i)])
p.start()
worker_procs[str(i)] = p
def main():
args = parse_args()
run_queue_server(args)
spawn_workers(args)
while True:
time.sleep(1)
if __name__ == '__main__':
main()
The error is here
$ python minimal.py
Spawning worker process 0..
Spawning worker process 1..
Process Process-2:
Process Process-3:
Traceback (most recent call last):
File "/usr/lib/python3.8/multiprocessing/process.py", line 313, in _bootstrap
self.run()
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "minimal.py", line 26, in _worker_process
m.connect()
File "/usr/lib/python3.8/multiprocessing/managers.py", line 548, in connect
conn = Client(self._address, authkey=self._authkey)
File "/usr/lib/python3.8/multiprocessing/connection.py", line 502, in Client
c = SocketClient(address)
File "/usr/lib/python3.8/multiprocessing/connection.py", line 629, in SocketClient
s.connect(address)
ConnectionRefusedError: [Errno 111] Connection refused
Traceback (most recent call last):
File "/usr/lib/python3.8/multiprocessing/process.py", line 313, in _bootstrap
self.run()
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "minimal.py", line 26, in _worker_process
m.connect()
File "/usr/lib/python3.8/multiprocessing/managers.py", line 548, in connect
conn = Client(self._address, authkey=self._authkey)
File "/usr/lib/python3.8/multiprocessing/connection.py", line 502, in Client
c = SocketClient(address)
File "/usr/lib/python3.8/multiprocessing/connection.py", line 629, in SocketClient
s.connect(address)
ConnectionRefusedError: [Errno 111] Connection refused
However, If I spawn another process that targets the manager creatoin step and run m.get_server().serve_forever() then I do not get the connection-refused error, see this code below which works
from typing import Dict, Optional, Any, List
from multiprocessing.managers import BaseManager, SyncManager
import time
import multiprocessing as mp
import argparse
import queue
q = queue.Queue()
def parse_args() -> argparse.Namespace:
a = argparse.ArgumentParser()
a.add_argument("--n-workers", type=int, default=2)
return a.parse_args()
def run_queue_server(args: argparse.Namespace) -> None:
class QueueManager(BaseManager): pass
QueueManager.register("get_queue", lambda: q)
m = QueueManager(address=('', 50000), authkey=b'abracadabra')
#m.start()
# This works!!
m.get_server().serve_forever()
def _worker_process(worker_uid: str) -> None:
class QueueManager(BaseManager): pass
QueueManager.register("get_queue")
m = QueueManager(address=('', 50000), authkey=b'abracadabra')
m.connect()
queue: queue.Queue = m.get_queue()
print(f"Gotten queue: {queue}")
def spawn_workers(args: argparse.Namespace) -> None:
time.sleep(2)
worker_procs = dict()
for i in range(args.n_workers):
print(f"Spawning worker process {i}..")
p = mp.Process(target=_worker_process, args=[str(i)])
p.start()
worker_procs[str(i)] = p
def main():
args = parse_args()
#run_queue_server(args)
# I don't want to run this in another process?
mp.Process(target=run_queue_server, args=(args,)).start()
spawn_workers(args)
while True:
time.sleep(1)
if __name__ == '__main__':
main()
The thing is, I don't want to have to start another process to be the manager.. why can't it just be this process?
Edit - I'm an idiot who was programming too late into the night. The issue is that my run_queue_server when calling m.start() and returning was... then losing the reference to the QueueManager which I'm sure caused it to be garbage collected.
All I did was change
def run_queue_server(args: argparse.Namespace) -> None:
class QueueManager(BaseManager): pass
QueueManager.register("get_queue", lambda: q)
m = QueueManager(address=('', 50000), authkey=b'abracadabra')
m.start()
return m
and change the caller to accept the return value and everything works..
I am using the following code to search google and click on first search result.
from selenium import webdriver
import urllib.parse
import time
from selenium.webdriver.firefox.options import Options
options = Options()
options.set_preference("dom.popup_maximum", 100)
options.add_argument("-profile")
options.add_argument("/home/blueray/.mozilla/firefox/5ertyoox.default-release")
options.page_load_strategy = 'eager'
# options.add_extension('fhnegjjodccfaliddboelcleikbmapik.crx')
browser = webdriver.Firefox(options=options)
with open("google-search-terms.adoc") as fin:
for line_no, line in enumerate(fin):
line = line.strip()
query = urllib.parse.urlencode({'q': line + " site:amazon.com"})
browser.execute_script(f"window.open('https://www.google.com/search?{query}');")
time.sleep(5)
for x in range(1, len(browser.window_handles)):
browser.switch_to.window(browser.window_handles[x])
try:
elm = browser.find_elements_by_xpath(
'/html/body/div[7]/div/div[9]/div[1]/div/div[2]/div[2]/div/div/div[1]/div/div/div[1]/a/h3')
if not elm:
elm = browser.find_elements_by_xpath(
'/html/body/div[7]/div/div[9]/div[1]/div/div[2]/div[2]/div/div/div[1]/div/div/div/div[1]/a/h3')
elm[0].click()
except Exception as e:
print("Error", str(e))
However, if one instance of firefox is open and I run the script it gives the message:
Firefox is already running, but is not responding. To use Firefox, you
must first close the existing Firefox process, restart your device, or
use a different profile.
And the program is terminated with the following error:
Traceback (most recent call last):
File "google-search-amazon-less-captcha.py", line 13, in <module>
browser = webdriver.Firefox(options=options)
File "/home/blueray/.local/lib/python3.8/site-packages/selenium/webdriver/firefox/webdriver.py", line 170, in __init__
RemoteWebDriver.__init__(
File "/home/blueray/.local/lib/python3.8/site-packages/selenium/webdriver/remote/webdriver.py", line 157, in __init__
self.start_session(capabilities, browser_profile)
File "/home/blueray/.local/lib/python3.8/site-packages/selenium/webdriver/remote/webdriver.py", line 252, in start_session
response = self.execute(Command.NEW_SESSION, parameters)
File "/home/blueray/.local/lib/python3.8/site-packages/selenium/webdriver/remote/webdriver.py", line 321, in execute
self.error_handler.check_response(response)
File "/home/blueray/.local/lib/python3.8/site-packages/selenium/webdriver/remote/errorhandler.py", line 242, in check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.WebDriverException: Message: Process unexpectedly closed with status 0
What should i do so that there is no error even if an instance of firefox is already open?
I'm having the same issue but only if the open firefox instance has the same profile that I'm loading in the script. If you remove the profile from this script it should run. It should also work if your code uses a different profile that the current open window is using.
You can also use the deprecated selenium 3 way of loading a profile and this avoids the error for me.
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.firefox.firefox_profile import FirefoxProfile
ffOptions = Options()
ffProfile = FirefoxProfile(r'C:\Users\Tyler\AppData\Roaming\Mozilla\Firefox\Profiles\0753x1pz.default')
ffOptions.profile = ffProfile
driver = webdriver.Firefox(options=ffOptions)
driver.get("http://www.google.com")
I'm still looking for a viable solution using the selenium 4 way of setting a profile.
Sometime, Selenium scripts are leaving web drivers open, not closing them properly.
A good pratice would be a good try/except/finally block:
driver = webdriver.Firefox()
try:
# Your code
except Exception as e:
# Log or print error
finally:
driver.close()
driver.quit()
Also, you should trying to kill any firefox processes running on your system as part of a python script, using something like this:
.....
.....
import os
os.system("taskkill /im geckodriver.exe /f")
os.system("taskkill /im firefox.exe /f")
I'm trying to prepare a bot for a platform with using python and selenium. My codes are below. When I runned I saw a error like that:
Traceback (most recent call last):
File "C:\Users\doguk\Desktop\Python\eksi_01.py", line 76, in <module>
WebDriverWait(driver,30).until(recaptchaSolved.is_displayed())
File "C:\Users\doguk\AppData\Local\Programs\Python\Python39\lib\site-packages\selenium\webdriver\support\wait.py", line 71, in until
value = method(self._driver)
TypeError: 'bool' object is not callable
## necessary import codes
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.keys import Keys
import time
import xlrd
import random
## random waiting time
random_Get_Time = random.randint(1,5)
## importing data file
data_File_Path = "C:/Users/doguk/Desktop/Python/DataFile.xls"
## targeting the data
dataFile = xlrd.open_workbook(data_File_Path)
accountInfo = dataFile.sheet_by_name("account_Ids_And_Passwords")
usernameCount = accountInfo.nrows
passwordCount = accountInfo.ncols
for curr_row in range(1, usernameCount):
username = accountInfo.cell_value(1, 0)
password = accountInfo.cell_value(1, 1)
## setting up recaptcha solver plugin to browser
recaptcha_Solver_Plugin_Path = "C:/Users/doguk/Desktop/Python/plugin.zip"
## adding recaptcha solver plugin to browser
recaptchaSolverAddOptions = webdriver.ChromeOptions()
recaptchaSolverAddOptions.add_extension(recaptcha_Solver_Plugin_Path)
## browser driver code
chrome_Driver_Path = "C:/Users/doguk/Desktop/Python/chromedriver"
driver = webdriver.Chrome(chrome_Driver_Path, options=recaptchaSolverAddOptions)
## enter the browser
driver.get("**URL**")
time.sleep(random_Get_Time)
## fullscreen code
driver.maximize_window()
## filling account infos
login_Form_Username = driver.find_element_by_id('username')
login_Form_Username.send_keys(username)
time.sleep(random_Get_Time)
login_Form_Password = driver.find_element_by_id('password')
login_Form_Password.send_keys(password)
## waiting for captcha confirmation
recaptchaDisplayed = driver.find_element_by_class_name('g-recaptcha')
if(recaptchaDisplayed.is_displayed()):
recaptchaSolved = driver.find_element_by_partial_link_text('Solved')
WebDriverWait(driver,30).until(recaptchaSolved.is_displayed())
else:
time.sleep(random_Get_Time)
## log in to platform
login_Button = driver.find_element_by_class_name('btn')
login_Button.click()
time.sleep(random_Get_Time)
## logout from driver
driver.quit()
How can I fix that?
What I'm trying to do is there is a Google Captcha confirmation on login page. If there isn't don't wait much. If there is, wait until recaptcha solver plugin fix it.
If I remove "## waiting for captcha confirmation" section and add time.sleep(110) it will solved. But I need to make that similar with human. I'm waiting for your helps.
Instead of
WebDriverWait(driver,30).until(recaptchaSolved.is_displayed())
I tried that
WebDriverWait(driver,30).until(EC.visibility_of_element_located((By.XPATH, "//*[contains(text(),'Solved')]")))
But it gave an error again. Like that:
Traceback (most recent call last):
File "C:\Users\doguk\Desktop\Python\eksi_01.py", line 75, in <module>
recaptchaSolved = driver.find_element_by_partial_link_text('Solved')
File "C:\Users\doguk\AppData\Local\Programs\Python\Python39\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 462, in find_element_by_partial_link_text
return self.find_element(by=By.PARTIAL_LINK_TEXT, value=link_text)
File "C:\Users\doguk\AppData\Local\Programs\Python\Python39\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 976, in find_element
return self.execute(Command.FIND_ELEMENT, {
File "C:\Users\doguk\AppData\Local\Programs\Python\Python39\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 321, in execute
self.error_handler.check_response(response)
File "C:\Users\doguk\AppData\Local\Programs\Python\Python39\lib\site-packages\selenium\webdriver\remote\errorhandler.py", line 242, in check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.NoSuchElementException: Message: no such element: Unable to locate element: {"method":"partial link text","selector":"Solved"}
(Session info: chrome=92.0.4515.159)
How to terminate loop.run_in_executor with ProcessPoolExecutor gracefully? Shortly after starting the program, SIGINT (ctrl + c) is sent.
def blocking_task():
sleep(3)
async def main():
exe = concurrent.futures.ProcessPoolExecutor(max_workers=4)
loop = asyncio.get_event_loop()
tasks = [loop.run_in_executor(exe, blocking_task) for i in range(3)]
await asyncio.gather(*tasks)
if __name__ == "__main__":
try:
asyncio.run(main())
except KeyboardInterrupt:
print('ctrl + c')
With max_workers equal or lesser than the the number of tasks everything works. But if max_workers is greater, the output of the above code is as follows:
Process ForkProcess-4:
Traceback (most recent call last):
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
self.run()
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "/usr/lib/python3.8/concurrent/futures/process.py", line 233, in _process_worker
call_item = call_queue.get(block=True)
File "/usr/lib/python3.8/multiprocessing/queues.py", line 97, in get
res = self._recv_bytes()
File "/usr/lib/python3.8/multiprocessing/connection.py", line 216, in recv_bytes
buf = self._recv_bytes(maxlength)
File "/usr/lib/python3.8/multiprocessing/connection.py", line 414, in _recv_bytes
buf = self._recv(4)
File "/usr/lib/python3.8/multiprocessing/connection.py", line 379, in _recv
chunk = read(handle, remaining)
KeyboardInterrupt
ctrl + c
I would like to catch the exception (KeyboardInterrupt) only once and ignore or mute the other exception(s) in the process pool, but how?
Update extra credit:
Can you explain (the reason for) the multi exception?
Does adding a signal handler work on Windows?
If not, is there a solution that works without a signal handler?
You can use the initializer parameter of ProcessPoolExecutor to install a handler for SIGINT in each process.
Update:
On Unix, when the process is created, it becomes a member of the process group of its parent. If you are generating the SIGINT with Ctrl+C, then the signal is being sent to the entire process group.
import asyncio
import concurrent.futures
import os
import signal
import sys
from time import sleep
def handler(signum, frame):
print('SIGINT for PID=', os.getpid())
sys.exit(0)
def init():
signal.signal(signal.SIGINT, handler)
def blocking_task():
sleep(15)
async def main():
exe = concurrent.futures.ProcessPoolExecutor(max_workers=5, initializer=init)
loop = asyncio.get_event_loop()
tasks = [loop.run_in_executor(exe, blocking_task) for i in range(2)]
await asyncio.gather(*tasks)
if __name__ == "__main__":
try:
asyncio.run(main())
except KeyboardInterrupt:
print('ctrl + c')
Ctrl-C shortly after start:
^CSIGINT for PID= 59942
SIGINT for PID= 59943
SIGINT for PID= 59941
SIGINT for PID= 59945
SIGINT for PID= 59944
ctrl + c
I am trying to implement a generic "timeout" function which allows me to send a function to be run, and if it doesn't complete after a certain amount of time, kill it. Here is the current implementation:
from multiprocessing import Process, Queue
import multiprocessing as mp
mp.allow_connection_pickling()
from fn.monad import Full, Empty
import traceback
def timeout(timeout, func, args=()):
'''
Calls function, and if it times out returns an Empty()
:param timeout: int | The amount of time to wait for the function
:param func: () => Any | The function to call (must take no arguments)
:param queue: Queue | The multiprocessing queue to put the result into
:return Option | Full(Result) if we get the result, Empty() if it times out
'''
queue = Queue()
p = Process(target=_helper_func, args=(func, queue, args,))
p.daemon = True
p.start()
p.join(timeout)
if p.is_alive() or queue.empty():
p.terminate()
return Empty()
else:
out = queue.get()
# if 'rebuild_handle' in dir(out):
# out.rebuild_handle()
return Full(out)
def _helper_func(func, queue, args):
try:
func(*args, queue)
except Exception as e:
pass
The function must put the "return value" into the multiprocessing queue. However, when timeout is run, and a socket is put into the queue, I get the following error.
Traceback (most recent call last):
File "socket_test.py", line 27, in <module>
print(q.get())
File "/usr/lib/python3.6/multiprocessing/queues.py", line 113, in get
return _ForkingPickler.loads(res)
File "/usr/lib/python3.6/multiprocessing/reduction.py", line 239, in _rebuild_socket
fd = df.detach()
File "/usr/lib/python3.6/multiprocessing/resource_sharer.py", line 57, in detach
with _resource_sharer.get_connection(self._id) as conn:
File "/usr/lib/python3.6/multiprocessing/resource_sharer.py", line 87, in get_connection
c = Client(address, authkey=process.current_process().authkey)
File "/usr/lib/python3.6/multiprocessing/connection.py", line 487, in Client
c = SocketClient(address)
File "/usr/lib/python3.6/multiprocessing/connection.py", line 614, in SocketClient
s.connect(address)
ConnectionRefusedError: [Errno 111] Connection refused
I have tried various previous stack overflow posts, such as the following: Python3 Windows multiprocessing passing socket to process
Please let me know if you know of a solution to this issue, as it is throwing a giant wrench into my code. Thanks!