Updated script that can be controled by Nodejs web app
This commit is contained in:
509
lib/python3.13/site-packages/selenium/webdriver/common/bidi/cdp.py
Executable file
509
lib/python3.13/site-packages/selenium/webdriver/common/bidi/cdp.py
Executable file
@ -0,0 +1,509 @@
|
||||
# The MIT License(MIT)
|
||||
#
|
||||
# Copyright(c) 2018 Hyperion Gray
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files(the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
#
|
||||
# This code comes from https://github.com/HyperionGray/trio-chrome-devtools-protocol/tree/master/trio_cdp
|
||||
|
||||
# flake8: noqa
|
||||
|
||||
import contextvars
|
||||
import importlib
|
||||
import itertools
|
||||
import json
|
||||
import logging
|
||||
import pathlib
|
||||
import typing
|
||||
from collections import defaultdict
|
||||
from contextlib import asynccontextmanager
|
||||
from contextlib import contextmanager
|
||||
from dataclasses import dataclass
|
||||
|
||||
import trio
|
||||
from trio_websocket import ConnectionClosed as WsConnectionClosed
|
||||
from trio_websocket import connect_websocket_url
|
||||
|
||||
logger = logging.getLogger("trio_cdp")
|
||||
T = typing.TypeVar("T")
|
||||
MAX_WS_MESSAGE_SIZE = 2**24
|
||||
|
||||
devtools = None
|
||||
version = None
|
||||
|
||||
|
||||
def import_devtools(ver):
|
||||
"""Attempt to load the current latest available devtools into the module
|
||||
cache for use later."""
|
||||
global devtools
|
||||
global version
|
||||
version = ver
|
||||
base = "selenium.webdriver.common.devtools.v"
|
||||
try:
|
||||
devtools = importlib.import_module(f"{base}{ver}")
|
||||
return devtools
|
||||
except ModuleNotFoundError:
|
||||
# Attempt to parse and load the 'most recent' devtools module. This is likely
|
||||
# because cdp has been updated but selenium python has not been released yet.
|
||||
devtools_path = pathlib.Path(__file__).parents[1].joinpath("devtools")
|
||||
versions = tuple(f.name for f in devtools_path.iterdir() if f.is_dir())
|
||||
latest = max(int(x[1:]) for x in versions)
|
||||
selenium_logger = logging.getLogger(__name__)
|
||||
selenium_logger.debug("Falling back to loading `devtools`: v%s", latest)
|
||||
devtools = importlib.import_module(f"{base}{latest}")
|
||||
return devtools
|
||||
|
||||
|
||||
_connection_context: contextvars.ContextVar = contextvars.ContextVar("connection_context")
|
||||
_session_context: contextvars.ContextVar = contextvars.ContextVar("session_context")
|
||||
|
||||
|
||||
def get_connection_context(fn_name):
|
||||
"""Look up the current connection.
|
||||
|
||||
If there is no current connection, raise a ``RuntimeError`` with a
|
||||
helpful message.
|
||||
"""
|
||||
try:
|
||||
return _connection_context.get()
|
||||
except LookupError:
|
||||
raise RuntimeError(f"{fn_name}() must be called in a connection context.")
|
||||
|
||||
|
||||
def get_session_context(fn_name):
|
||||
"""Look up the current session.
|
||||
|
||||
If there is no current session, raise a ``RuntimeError`` with a
|
||||
helpful message.
|
||||
"""
|
||||
try:
|
||||
return _session_context.get()
|
||||
except LookupError:
|
||||
raise RuntimeError(f"{fn_name}() must be called in a session context.")
|
||||
|
||||
|
||||
@contextmanager
|
||||
def connection_context(connection):
|
||||
"""This context manager installs ``connection`` as the session context for
|
||||
the current Trio task."""
|
||||
token = _connection_context.set(connection)
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
_connection_context.reset(token)
|
||||
|
||||
|
||||
@contextmanager
|
||||
def session_context(session):
|
||||
"""This context manager installs ``session`` as the session context for the
|
||||
current Trio task."""
|
||||
token = _session_context.set(session)
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
_session_context.reset(token)
|
||||
|
||||
|
||||
def set_global_connection(connection):
|
||||
"""Install ``connection`` in the root context so that it will become the
|
||||
default connection for all tasks.
|
||||
|
||||
This is generally not recommended, except it may be necessary in
|
||||
certain use cases such as running inside Jupyter notebook.
|
||||
"""
|
||||
global _connection_context
|
||||
_connection_context = contextvars.ContextVar("_connection_context", default=connection)
|
||||
|
||||
|
||||
def set_global_session(session):
|
||||
"""Install ``session`` in the root context so that it will become the
|
||||
default session for all tasks.
|
||||
|
||||
This is generally not recommended, except it may be necessary in
|
||||
certain use cases such as running inside Jupyter notebook.
|
||||
"""
|
||||
global _session_context
|
||||
_session_context = contextvars.ContextVar("_session_context", default=session)
|
||||
|
||||
|
||||
class BrowserError(Exception):
|
||||
"""This exception is raised when the browser's response to a command
|
||||
indicates that an error occurred."""
|
||||
|
||||
def __init__(self, obj):
|
||||
self.code = obj.get("code")
|
||||
self.message = obj.get("message")
|
||||
self.detail = obj.get("data")
|
||||
|
||||
def __str__(self):
|
||||
return f"BrowserError<code={self.code} message={self.message}> {self.detail}"
|
||||
|
||||
|
||||
class CdpConnectionClosed(WsConnectionClosed):
|
||||
"""Raised when a public method is called on a closed CDP connection."""
|
||||
|
||||
def __init__(self, reason):
|
||||
"""Constructor.
|
||||
|
||||
:param reason:
|
||||
:type reason: wsproto.frame_protocol.CloseReason
|
||||
"""
|
||||
self.reason = reason
|
||||
|
||||
def __repr__(self):
|
||||
"""Return representation."""
|
||||
return f"{self.__class__.__name__}<{self.reason}>"
|
||||
|
||||
|
||||
class InternalError(Exception):
|
||||
"""This exception is only raised when there is faulty logic in TrioCDP or
|
||||
the integration with PyCDP."""
|
||||
|
||||
|
||||
@dataclass
|
||||
class CmEventProxy:
|
||||
"""A proxy object returned by :meth:`CdpBase.wait_for()``.
|
||||
|
||||
After the context manager executes, this proxy object will have a
|
||||
value set that contains the returned event.
|
||||
"""
|
||||
|
||||
value: typing.Any = None
|
||||
|
||||
|
||||
class CdpBase:
|
||||
def __init__(self, ws, session_id, target_id):
|
||||
self.ws = ws
|
||||
self.session_id = session_id
|
||||
self.target_id = target_id
|
||||
self.channels = defaultdict(set)
|
||||
self.id_iter = itertools.count()
|
||||
self.inflight_cmd = {}
|
||||
self.inflight_result = {}
|
||||
|
||||
async def execute(self, cmd: typing.Generator[dict, T, typing.Any]) -> T:
|
||||
"""Execute a command on the server and wait for the result.
|
||||
|
||||
:param cmd: any CDP command
|
||||
:returns: a CDP result
|
||||
"""
|
||||
cmd_id = next(self.id_iter)
|
||||
cmd_event = trio.Event()
|
||||
self.inflight_cmd[cmd_id] = cmd, cmd_event
|
||||
request = next(cmd)
|
||||
request["id"] = cmd_id
|
||||
if self.session_id:
|
||||
request["sessionId"] = self.session_id
|
||||
request_str = json.dumps(request)
|
||||
if logger.isEnabledFor(logging.DEBUG):
|
||||
logger.debug(f"Sending CDP message: {cmd_id} {cmd_event}: {request_str}")
|
||||
try:
|
||||
await self.ws.send_message(request_str)
|
||||
except WsConnectionClosed as wcc:
|
||||
raise CdpConnectionClosed(wcc.reason) from None
|
||||
await cmd_event.wait()
|
||||
response = self.inflight_result.pop(cmd_id)
|
||||
if logger.isEnabledFor(logging.DEBUG):
|
||||
logger.debug(f"Received CDP message: {response}")
|
||||
if isinstance(response, Exception):
|
||||
if logger.isEnabledFor(logging.DEBUG):
|
||||
logger.debug(f"Exception raised by {cmd_event} message: {type(response).__name__}")
|
||||
raise response
|
||||
return response
|
||||
|
||||
def listen(self, *event_types, buffer_size=10):
|
||||
"""Return an async iterator that iterates over events matching the
|
||||
indicated types."""
|
||||
sender, receiver = trio.open_memory_channel(buffer_size)
|
||||
for event_type in event_types:
|
||||
self.channels[event_type].add(sender)
|
||||
return receiver
|
||||
|
||||
@asynccontextmanager
|
||||
async def wait_for(self, event_type: typing.Type[T], buffer_size=10) -> typing.AsyncGenerator[CmEventProxy, None]:
|
||||
"""Wait for an event of the given type and return it.
|
||||
|
||||
This is an async context manager, so you should open it inside
|
||||
an async with block. The block will not exit until the indicated
|
||||
event is received.
|
||||
"""
|
||||
sender: trio.MemorySendChannel
|
||||
receiver: trio.MemoryReceiveChannel
|
||||
sender, receiver = trio.open_memory_channel(buffer_size)
|
||||
self.channels[event_type].add(sender)
|
||||
proxy = CmEventProxy()
|
||||
yield proxy
|
||||
async with receiver:
|
||||
event = await receiver.receive()
|
||||
proxy.value = event
|
||||
|
||||
def _handle_data(self, data):
|
||||
"""Handle incoming WebSocket data.
|
||||
|
||||
:param dict data: a JSON dictionary
|
||||
"""
|
||||
if "id" in data:
|
||||
self._handle_cmd_response(data)
|
||||
else:
|
||||
self._handle_event(data)
|
||||
|
||||
def _handle_cmd_response(self, data):
|
||||
"""Handle a response to a command. This will set an event flag that
|
||||
will return control to the task that called the command.
|
||||
|
||||
:param dict data: response as a JSON dictionary
|
||||
"""
|
||||
cmd_id = data["id"]
|
||||
try:
|
||||
cmd, event = self.inflight_cmd.pop(cmd_id)
|
||||
except KeyError:
|
||||
logger.warning("Got a message with a command ID that does not exist: %s", data)
|
||||
return
|
||||
if "error" in data:
|
||||
# If the server reported an error, convert it to an exception and do
|
||||
# not process the response any further.
|
||||
self.inflight_result[cmd_id] = BrowserError(data["error"])
|
||||
else:
|
||||
# Otherwise, continue the generator to parse the JSON result
|
||||
# into a CDP object.
|
||||
try:
|
||||
_ = cmd.send(data["result"])
|
||||
raise InternalError("The command's generator function did not exit when expected!")
|
||||
except StopIteration as exit:
|
||||
return_ = exit.value
|
||||
self.inflight_result[cmd_id] = return_
|
||||
event.set()
|
||||
|
||||
def _handle_event(self, data):
|
||||
"""Handle an event.
|
||||
|
||||
:param dict data: event as a JSON dictionary
|
||||
"""
|
||||
global devtools
|
||||
event = devtools.util.parse_json_event(data)
|
||||
logger.debug("Received event: %s", event)
|
||||
to_remove = set()
|
||||
for sender in self.channels[type(event)]:
|
||||
try:
|
||||
sender.send_nowait(event)
|
||||
except trio.WouldBlock:
|
||||
logger.error('Unable to send event "%r" due to full channel %s', event, sender)
|
||||
except trio.BrokenResourceError:
|
||||
to_remove.add(sender)
|
||||
if to_remove:
|
||||
self.channels[type(event)] -= to_remove
|
||||
|
||||
|
||||
class CdpSession(CdpBase):
|
||||
"""Contains the state for a CDP session.
|
||||
|
||||
Generally you should not instantiate this object yourself; you should call
|
||||
:meth:`CdpConnection.open_session`.
|
||||
"""
|
||||
|
||||
def __init__(self, ws, session_id, target_id):
|
||||
"""Constructor.
|
||||
|
||||
:param trio_websocket.WebSocketConnection ws:
|
||||
:param devtools.target.SessionID session_id:
|
||||
:param devtools.target.TargetID target_id:
|
||||
"""
|
||||
super().__init__(ws, session_id, target_id)
|
||||
|
||||
self._dom_enable_count = 0
|
||||
self._dom_enable_lock = trio.Lock()
|
||||
self._page_enable_count = 0
|
||||
self._page_enable_lock = trio.Lock()
|
||||
|
||||
@asynccontextmanager
|
||||
async def dom_enable(self):
|
||||
"""A context manager that executes ``dom.enable()`` when it enters and
|
||||
then calls ``dom.disable()``.
|
||||
|
||||
This keeps track of concurrent callers and only disables DOM
|
||||
events when all callers have exited.
|
||||
"""
|
||||
global devtools
|
||||
async with self._dom_enable_lock:
|
||||
self._dom_enable_count += 1
|
||||
if self._dom_enable_count == 1:
|
||||
await self.execute(devtools.dom.enable())
|
||||
|
||||
yield
|
||||
|
||||
async with self._dom_enable_lock:
|
||||
self._dom_enable_count -= 1
|
||||
if self._dom_enable_count == 0:
|
||||
await self.execute(devtools.dom.disable())
|
||||
|
||||
@asynccontextmanager
|
||||
async def page_enable(self):
|
||||
"""A context manager that executes ``page.enable()`` when it enters and
|
||||
then calls ``page.disable()`` when it exits.
|
||||
|
||||
This keeps track of concurrent callers and only disables page
|
||||
events when all callers have exited.
|
||||
"""
|
||||
global devtools
|
||||
async with self._page_enable_lock:
|
||||
self._page_enable_count += 1
|
||||
if self._page_enable_count == 1:
|
||||
await self.execute(devtools.page.enable())
|
||||
|
||||
yield
|
||||
|
||||
async with self._page_enable_lock:
|
||||
self._page_enable_count -= 1
|
||||
if self._page_enable_count == 0:
|
||||
await self.execute(devtools.page.disable())
|
||||
|
||||
|
||||
class CdpConnection(CdpBase, trio.abc.AsyncResource):
|
||||
"""Contains the connection state for a Chrome DevTools Protocol server.
|
||||
|
||||
CDP can multiplex multiple "sessions" over a single connection. This
|
||||
class corresponds to the "root" session, i.e. the implicitly created
|
||||
session that has no session ID. This class is responsible for
|
||||
reading incoming WebSocket messages and forwarding them to the
|
||||
corresponding session, as well as handling messages targeted at the
|
||||
root session itself. You should generally call the
|
||||
:func:`open_cdp()` instead of instantiating this class directly.
|
||||
"""
|
||||
|
||||
def __init__(self, ws):
|
||||
"""Constructor.
|
||||
|
||||
:param trio_websocket.WebSocketConnection ws:
|
||||
"""
|
||||
super().__init__(ws, session_id=None, target_id=None)
|
||||
self.sessions = {}
|
||||
|
||||
async def aclose(self):
|
||||
"""Close the underlying WebSocket connection.
|
||||
|
||||
This will cause the reader task to gracefully exit when it tries
|
||||
to read the next message from the WebSocket. All of the public
|
||||
APIs (``execute()``, ``listen()``, etc.) will raise
|
||||
``CdpConnectionClosed`` after the CDP connection is closed. It
|
||||
is safe to call this multiple times.
|
||||
"""
|
||||
await self.ws.aclose()
|
||||
|
||||
@asynccontextmanager
|
||||
async def open_session(self, target_id) -> typing.AsyncIterator[CdpSession]:
|
||||
"""This context manager opens a session and enables the "simple" style
|
||||
of calling CDP APIs.
|
||||
|
||||
For example, inside a session context, you can call ``await
|
||||
dom.get_document()`` and it will execute on the current session
|
||||
automatically.
|
||||
"""
|
||||
session = await self.connect_session(target_id)
|
||||
with session_context(session):
|
||||
yield session
|
||||
|
||||
async def connect_session(self, target_id) -> "CdpSession":
|
||||
"""Returns a new :class:`CdpSession` connected to the specified
|
||||
target."""
|
||||
global devtools
|
||||
session_id = await self.execute(devtools.target.attach_to_target(target_id, True))
|
||||
session = CdpSession(self.ws, session_id, target_id)
|
||||
self.sessions[session_id] = session
|
||||
return session
|
||||
|
||||
async def _reader_task(self):
|
||||
"""Runs in the background and handles incoming messages: dispatching
|
||||
responses to commands and events to listeners."""
|
||||
global devtools
|
||||
while True:
|
||||
try:
|
||||
message = await self.ws.get_message()
|
||||
except WsConnectionClosed:
|
||||
# If the WebSocket is closed, we don't want to throw an
|
||||
# exception from the reader task. Instead we will throw
|
||||
# exceptions from the public API methods, and we can quietly
|
||||
# exit the reader task here.
|
||||
break
|
||||
try:
|
||||
data = json.loads(message)
|
||||
except json.JSONDecodeError:
|
||||
raise BrowserError({"code": -32700, "message": "Client received invalid JSON", "data": message})
|
||||
logger.debug("Received message %r", data)
|
||||
if "sessionId" in data:
|
||||
session_id = devtools.target.SessionID(data["sessionId"])
|
||||
try:
|
||||
session = self.sessions[session_id]
|
||||
except KeyError:
|
||||
raise BrowserError(
|
||||
{
|
||||
"code": -32700,
|
||||
"message": "Browser sent a message for an invalid session",
|
||||
"data": f"{session_id!r}",
|
||||
}
|
||||
)
|
||||
session._handle_data(data)
|
||||
else:
|
||||
self._handle_data(data)
|
||||
|
||||
for _, session in self.sessions.items():
|
||||
for _, senders in session.channels.items():
|
||||
for sender in senders:
|
||||
sender.close()
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def open_cdp(url) -> typing.AsyncIterator[CdpConnection]:
|
||||
"""This async context manager opens a connection to the browser specified
|
||||
by ``url`` before entering the block, then closes the connection when the
|
||||
block exits.
|
||||
|
||||
The context manager also sets the connection as the default
|
||||
connection for the current task, so that commands like ``await
|
||||
target.get_targets()`` will run on this connection automatically. If
|
||||
you want to use multiple connections concurrently, it is recommended
|
||||
to open each on in a separate task.
|
||||
"""
|
||||
|
||||
async with trio.open_nursery() as nursery:
|
||||
conn = await connect_cdp(nursery, url)
|
||||
try:
|
||||
with connection_context(conn):
|
||||
yield conn
|
||||
finally:
|
||||
await conn.aclose()
|
||||
|
||||
|
||||
async def connect_cdp(nursery, url) -> CdpConnection:
|
||||
"""Connect to the browser specified by ``url`` and spawn a background task
|
||||
in the specified nursery.
|
||||
|
||||
The ``open_cdp()`` context manager is preferred in most situations.
|
||||
You should only use this function if you need to specify a custom
|
||||
nursery. This connection is not automatically closed! You can either
|
||||
use the connection object as a context manager (``async with
|
||||
conn:``) or else call ``await conn.aclose()`` on it when you are
|
||||
done with it. If ``set_context`` is True, then the returned
|
||||
connection will be installed as the default connection for the
|
||||
current task. This argument is for unusual use cases, such as
|
||||
running inside of a notebook.
|
||||
"""
|
||||
ws = await connect_websocket_url(nursery, url, max_message_size=MAX_WS_MESSAGE_SIZE)
|
||||
cdp_conn = CdpConnection(ws)
|
||||
nursery.start_soon(cdp_conn._reader_task)
|
||||
return cdp_conn
|
Reference in New Issue
Block a user