How to check if Pyppeteer browser has closed? - python

I can't seem to find any information regarding Python's version
of Puppeteer on how to check if my browser has closed properly, following browser.close().
I have limited knowledge of JavaScript, so can't properly follow the answer puppeteer : how check if browser is still open and working.
printing((browser.on('disconnected')) seems to return a function object, which when called requires something called f.
What is the proper way to check if the browser has closed properly?
from pyppeteer import launch
async def get_browser():
return await launch({"headless": False})
async def get_page():
browser = await get_browser()
url = 'https://www.wikipedia.org/'
page = await browser.newPage()
await page.goto(url)
content = await page.content()
await browser.close()
print(browser.on('disconnected'))
#assert browser is None
#assert print(html)
loop = asyncio.get_event_loop()
result = loop.run_until_complete(get_page())
print(result)

.on methods register a callback to be fired on a particular event. For example:
import asyncio
from pyppeteer import launch
async def get_page():
browser = await launch({"headless": True})
browser.on("disconnected", lambda: print("disconnected"))
url = "https://www.wikipedia.org/"
page, = await browser.pages()
await page.goto(url)
content = await page.content()
print("disconnecting...")
await browser.disconnect()
await browser.close()
return content
loop = asyncio.get_event_loop()
result = loop.run_until_complete(get_page())
Output:
disconnecting...
disconnected
From the callback, you could flip a flag to indicate closure or (better yet) take whatever other action you want to take directly.
There's also browser.process.returncode (browser.process is a Popen instance). It's 1 after the browser has been closed, but not after disconnect.
Here's an example of the above:
import asyncio
from pyppeteer import launch
async def get_page():
browser = await launch({"headless": True})
connected = True
async def handle_disconnected():
nonlocal connected
connected = False
browser.on(
"disconnected",
lambda: asyncio.ensure_future(handle_disconnected())
)
print("connected?", connected)
print("return code?", browser.process.returncode)
print("disconnecting...")
await browser.disconnect()
print("connected?", connected)
print("return code?", browser.process.returncode)
print("closing...")
await browser.close()
print("return code?", browser.process.returncode)
asyncio.get_event_loop().run_until_complete(get_page())
Output:
connected? True
return code? None
disconnecting...
connected? False
return code? None
closing...
return code? 1

You can use browser. on('disconnected') to listen for when the browser is closed or crashed, or if the browser. disconnect() method was called. Then, you can automatically relaunch the browser, and continue with your program

Related

Pyppeteer connection closed after a minute

Good day everyone. I ran this code and it works perfectly well.the main purpose is to capture websocket traffic and the problem is that it closes after a minute or there about.. please how can I fix this.. I want it to stay alive forever
import asyncio
from pyppeteer import launch
async def main():
browser = await launch(
headless=True,
args=['--no-sandbox'],
autoClose=False
)
page = await browser.newPage()
await page.goto('https://www.tradingview.com/symbols/BTCUSD/')
cdp = await page.target.createCDPSession()
await cdp.send('Network.enable')
await cdp.send('Page.enable')
def printResponse(response):
print(response)
cdp.on('Network.webSocketFrameReceived', printResponse) # Calls printResponse when a websocket is received
cdp.on('Network.webSocketFrameSent', printResponse) # Calls printResponse when a websocket is sent
await asyncio.sleep(100)
asyncio.get_event_loop().run_until_complete(main())

intercepting response with substring in url using playwright

I have been learning playwright on python, but it appears that I cannot get it to successfully find a response whose URL contains a substring, while on node I am indeed able to do so, is there anything I am doing wrong?
async with page.expect_response("*") as response:
if "getVerify" in response.url:
print("found")
i have also tried using getVerify and in to no avail.
node code:
page.on('response', response => {
if (response.url().includes('getVerify')) {
console.log(response.url())
With the node case, it's a bit different as you're passively subscribing to an event. In the python snippet, you are basically doing the equivalent of page.waitForResponse. Typically you'll do that in conjunction with some action that triggers the response (such as submitting a form).
Try using the python page.on API. Like this:
import asyncio
from playwright.async_api import async_playwright
def check_response(response):
print(response.url)
if 'getVerify' in response.url:
print("Response URL: ", response.url)
async def main():
async with async_playwright() as p:
browser = await p.chromium.launch()
page = await browser.new_page()
page.on("response", check_response)
await page.goto("http://playwright.dev")
print(await page.title())
await browser.close()
asyncio.run(main())

python coroutine, perform tasks periodically and cancel

For every 10 minutes, do the following tasks.
- generate list of image urls to download
- (if previous download is not finished, we have to cancel the download)
- download images concurrently
I'm relatively new to coroutines..
Can I structure the above with coroutines?
I think coroutine is essentially sequential flow..
So having problem thinking about it..
Actually, come to think of it myself, following would work?
async def generate_urls():
await sleep(10)
result = _generate_urls()
return result
async def download_image(url):
# download images
image = await _download_image()
return image
async def main():
while True:
urls = await generate_urls()
for url in urls:
download_task = asyncio.create_task(download_image(url))
await download_task
asyncio.run(main())
You current code is quite close. Below are some modifications to make it more closely align with your original spec:
import asyncio
def generate_urls():
return _generate_urls() #no need to sleep in the URL generation function
async def download_image(url):
image = await _download_image()
return image
async def main():
tasks = []
while True:
tasks.extend(t:=[asyncio.create_task(download_image(url)) for url in generate_urls()])
await asyncio.gather(*t) #run downloads concurrently
await asyncio.sleep(10) #sleep after creating tasks
for i in d: #after 10 seconds, check if any of the downloads are still running
if not i.done():
i.cancel() #cancel if task is not complete

Python: keep open browser in pyppeteer and create CDPSession

I've got two issues that I can't solve it at them moment.
1. I would like to keep the browser running so I could just re-connect using pyppeteer.launcher.connect() function but it seems to be closed imidiately even if I don't call pyppeteer.browser.Browser.close().
test01.py:
import asyncio
from pyppeteer import launch, connect
async def fetch():
browser = await launch(
headless=False,
args=['--no-sandbox']
)
print(f'Endpoint: {browser.wsEndpoint}')
await browser.disconnect()
loop = asyncio.get_event_loop()
loop.run_until_complete(fetch())
$ python test01.py
Endpoint: ws://127.0.0.1:51757/devtools/browser/00e917a9-c031-499a-a8ee-ca4090ebd3fe
$ curl -i -N -H "Connection: Upgrade" -H "Upgrade: websocket" http://127.0.0.1:51757
curl: (7) Failed to connect to 127.0.0.1 port 51757: Connection refused
2. How do I create CDP session. This code should open another browser window but it doesn't work as expected:
test02.py
import asyncio
import time
from pyppeteer import launch, connect
async def fetch():
browser = await launch(
headless=False,
args=['--no-sandbox']
)
page = await browser.newPage()
cdp = await page.target.createCDPSession()
await cdp.send('Target.createBrowserContext')
time.sleep(5)
await browser.disconnect()
loop = asyncio.get_event_loop()
loop.run_until_complete(fetch())
$ python test02.py
Future exception was never retrieved
future: <Future finished exception=NetworkError('Protocol error Target.sendMessageToTarget: Target closed.',)>
pyppeteer.errors.NetworkError: Protocol error Target.sendMessageToTarget: Target closed.
How to keep the browser running
You just need to use autoClose flag, here's the docs:
autoClose (bool): Automatically close browser process when script
completed. Defaults to True.
In this case you test01.py would look as follows:
import asyncio
from pyppeteer import launch, connect
async def fetch():
browser = await launch(
headless=False,
args=['--no-sandbox'],
autoClose=False
)
print(f'Endpoint: {browser.wsEndpoint}')
await browser.disconnect()
loop = asyncio.get_event_loop()
loop.run_until_complete(fetch())
CDP session
Here it is:
import asyncio
import time
from pprint import pprint
from pyppeteer import launch, connect
from pyppeteer.browser import BrowserContext
async def fetch():
browser = await launch(
headless=False,
args=['--no-sandbox'],
autoClose=False
)
page = await browser.newPage()
cdp = await page.target.createCDPSession()
raw_context = await cdp.send('Target.createBrowserContext')
pprint(raw_context)
context = BrowserContext(browser, raw_context['browserContextId'])
new_page = await context.newPage()
await cdp.detach()
await browser.disconnect()
loop = asyncio.get_event_loop()
loop.run_until_complete(fetch())
Inspired by Browser.createIncognitoBrowserContext from pyppeteer itself.
Notice creating additional sessions via CDP doesn't seem to be such a great idea because browser._contexts won't be updated and will become inconsistent. It's also likely that Browser.createIncognitoBrowserContext might fit your needs without resorting to CDP whatsoever

Discord.py background task breaks but task is still pending (using websocket)

I'm very inexperienced with asyncio and asynchronous programming in general, so i've been having a hard time trying to use a synchronous websocket module with the async discord.py module. I am trying to setup a bot which constantly listens for websocket messages, and if it receives data, it performs some calculations and sends a message in discord. This should run indefinitely, and the websocket changes its origin routinely. Here is some commented code of what i'm trying to accomplish:
import requests
import websocket
import discord
import asyncio
from time import sleep
client = discord.Client() # Initialize the discord client
class Wrapper: # This just holds some variables so that I can use them without worrying about scope
data = None
first_time = True
is_running = False
socket_url = None
async def init_bot(): # The problem begins here
def on_message(ws, message):
if is_valid(message['data']): # is_valid is just a name for a special comparison I do with the message
Wrapper.data = message['data']
print('Received valid data')
def on_error(ws, error):
print('There was an error connecting to the websocket.')
ws = websocket.WebSocketApp(Wrapper.socket_url,
on_message=on_message,
on_error=on_error)
while True:
ws.run_forever() # I believe this is connected to the discord event loop?
# Using ws.close() here breaks the program when it receives data for the second time
async def start():
await client.wait_until_ready()
def get_response(hq_id, headers):
response = requests.get('my_data_source')
try:
return json.loads(response.text)
except:
return None
print('Starting websocket')
await init_bot()
while True: # This should run forever, but the outer coroutine gets task pending errors
print('Running main loop')
if Wrapper.first_time:
Wrapper.first_time = False
on_login = await client.send_message(Config.public_channel, embed=Config.waiting_embed()) # Sends an embed to a channel
while Wrapper.socket_url is None:
response = get_response(ID, HEADERS)
try:
Wrapper.socket_url = response['socket_url'] # Assume this sets the socket url to a websocket in the form ws://anyhost.com
await client.edit_message(on_login, embed=Config.connect_embed())
Wrapper.is_running = True
await asyncio.sleep(3)
except:
await asyncio.sleep(60) # The response will only sometimes include a proper socket_url, so we wait for one
if Wrapper.is_running:
while Wrapper.data is None: # Is this blocking? I essentially want this while loop to end when we have data from line 18
await asyncio.sleep(1)
if Wrapper.data is not None:
data_message = await client.send_message(Config.public_channel, embed=Wrapper.data[0])
await client.add_reaction(ans_message, '👍')
await client.add_reaction(ans_message, '👎')
if Wrapper.data[1] == True: # Marks the last message, so we want to reset the bot to its initial state
Wrapper.is_running = False
Wrapper.first_time = True
Wrapper.socket_url = None
await asyncio.sleep(100) # Sleep for a period of time in order to make sure the socket url is closed
Wrapper.data = None
await asyncio.sleep(3)
print('While loop ended?')
#client.event
async def on_ready():
print(f'My Bot\n')
client.loop.create_task(start())
client.run('<TOKEN>')
I've tried several variations of the above, but the error I typically get is something along these lines:
File "mybot.py", line 247, in <module>
client.loop.create_task(start())
task: <Task pending coro=<start() running at mybot.py> wait_for=<Future pending cb=[BaseSelectorEventLoop._sock_connect_done(1016)(), <TaskWakeupMethWrapper object at 0x000002545FFFC8B8>()]
You cannot just mix asyncio-aware code, such as discord, with synchronous websockets code. Since nothing is awaited in init_bot, calling await init_bot() completely stops the event loop.
Instead, you need to run the websocket code (init_bot function in your case) in a separate thread and await an appropriate event. For example:
def init_bot(loop, w):
def on_message(ws, message):
w.data = message['data']
loop.call_soon_threadsafe(w.event.set)
# ...
async def start():
# ...
loop = asyncio.get_event_loop()
w = Wrapper()
w.event = asyncio.Event()
threading.Thread(target=lambda: init_bot(loop, w)).start()
# ...
# instead of while Wrapper.data is None ...
await w.event.wait()
# ... process data
w.event.clear()

Categories