Skip to content

Commit 7bb16ec

Browse files
update chore:browser-primitive-actions
1 parent 2998f32 commit 7bb16ec

File tree

4 files changed

+576
-23
lines changed

4 files changed

+576
-23
lines changed

camel/toolkits/async_browser_toolkit.py

Lines changed: 242 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@
6464
WEB_AGENT_SYSTEM_PROMPT,
6565
InteractiveRegion,
6666
VisualViewport,
67+
_normalize_identifier,
6768
_parse_json_output,
6869
_reload_image,
6970
add_set_of_mark,
@@ -547,8 +548,7 @@ async def async_click_id(self, identifier: Union[str, int]) -> None:
547548
Args:
548549
identifier (Union[str, int]): The ID of the element to click.
549550
"""
550-
if isinstance(identifier, int):
551-
identifier = str(identifier)
551+
identifier = _normalize_identifier(identifier)
552552
target = self.page.locator(f"[__elementId='{identifier}']")
553553

554554
try:
@@ -609,8 +609,7 @@ async def async_download_file_id(self, identifier: Union[str, int]) -> str:
609609
str: The path to the downloaded file.
610610
"""
611611

612-
if isinstance(identifier, int):
613-
identifier = str(identifier)
612+
identifier = _normalize_identifier(identifier)
614613
try:
615614
target = self.page.locator(f"[__elementId='{identifier}']")
616615
except (TimeoutError, Exception) as e: # type: ignore[misc]
@@ -661,8 +660,7 @@ async def async_fill_input_id(
661660
Returns:
662661
str: The result of the action.
663662
"""
664-
if isinstance(identifier, int):
665-
identifier = str(identifier)
663+
identifier = _normalize_identifier(identifier)
666664

667665
try:
668666
target = self.page.locator(f"[__elementId='{identifier}']")
@@ -726,8 +724,7 @@ async def async_hover_id(self, identifier: Union[str, int]) -> str:
726724
Returns:
727725
str: The result of the action.
728726
"""
729-
if isinstance(identifier, int):
730-
identifier = str(identifier)
727+
identifier = _normalize_identifier(identifier)
731728
try:
732729
target = self.page.locator(f"[__elementId='{identifier}']")
733730
except (TimeoutError, Exception) as e: # type: ignore[misc]
@@ -980,6 +977,7 @@ def __init__(
980977
self.planning_agent_model = planning_agent_model
981978
self.output_language = output_language
982979
self.browser.web_agent_model = web_agent_model
980+
self.browser_initialized = False
983981

984982
self.history: list[Any] = []
985983
self.web_agent, self.planning_agent = self._initialize_agent()
@@ -1343,7 +1341,7 @@ async def browse_url(
13431341
The task is not completed within the round limit. Please check
13441342
the last round {self.history_window} information to see if
13451343
there is any useful information:
1346-
<history>{self.history[-self.history_window :]}</history>
1344+
<history>{self.history[-self.history_window:]}</history>
13471345
"""
13481346

13491347
else:
@@ -1353,4 +1351,238 @@ async def browse_url(
13531351
return simulation_result
13541352

13551353
def get_tools(self) -> List[FunctionTool]:
1356-
return [FunctionTool(self.browse_url)]
1354+
return [
1355+
FunctionTool(self.browse_url),
1356+
FunctionTool(self.setup_browser),
1357+
FunctionTool(self.shutdown_browser),
1358+
FunctionTool(self.visit_page),
1359+
FunctionTool(self.get_current_url),
1360+
FunctionTool(self.scroll_page),
1361+
FunctionTool(self.click_element),
1362+
FunctionTool(self.fill_input_element),
1363+
FunctionTool(self.hover_element),
1364+
FunctionTool(self.find_text_on_page),
1365+
FunctionTool(self.navigate_back),
1366+
FunctionTool(self.get_page_content),
1367+
FunctionTool(self.download_file_by_element_id),
1368+
FunctionTool(self.capture_screenshot),
1369+
FunctionTool(self.ask_question_about_video),
1370+
]
1371+
1372+
async def _ensure_browser_initialized(self):
1373+
if not self.browser_initialized:
1374+
await self.browser.async_init()
1375+
self.browser_initialized = True
1376+
1377+
async def setup_browser(self) -> str:
1378+
r"""Initializes the browser session asynchronously."""
1379+
if not self.browser_initialized:
1380+
await self.browser.async_init()
1381+
self.browser_initialized = True
1382+
return "Browser session initialized."
1383+
return "Browser session already initialized."
1384+
1385+
async def shutdown_browser(self) -> str:
1386+
r"""Closes the browser session asynchronously."""
1387+
if self.browser_initialized:
1388+
await self.browser.async_close()
1389+
self.browser_initialized = False
1390+
return "Browser session closed."
1391+
return "Browser session was not initialized or already closed."
1392+
1393+
async def visit_page(self, url: str) -> str:
1394+
r"""Navigates the browser to the specified URL asynchronously.
1395+
1396+
Args:
1397+
url (str): The URL to visit.
1398+
1399+
Returns:
1400+
str: Confirmation message.
1401+
"""
1402+
await self._ensure_browser_initialized()
1403+
await self.browser.async_visit_page(url)
1404+
return f"Navigated to URL: {url}"
1405+
1406+
async def get_current_url(self) -> str:
1407+
r"""Returns the current URL of the browser asynchronously.
1408+
1409+
Returns:
1410+
str: The current URL.
1411+
"""
1412+
await self._ensure_browser_initialized()
1413+
return self.browser.get_url()
1414+
1415+
async def scroll_page(
1416+
self, direction: Literal["up", "down", "top", "bottom"]
1417+
) -> str:
1418+
r"""Scrolls the current page asynchronously.
1419+
1420+
Args:
1421+
direction (Literal["up", "down", "top", "bottom"]): The
1422+
direction to scroll.
1423+
1424+
Returns:
1425+
str: Confirmation message.
1426+
"""
1427+
await self._ensure_browser_initialized()
1428+
if direction == "up":
1429+
await self.browser.async_scroll_up()
1430+
return "Scrolled up."
1431+
elif direction == "down":
1432+
await self.browser.async_scroll_down()
1433+
return "Scrolled down."
1434+
elif direction == "top":
1435+
return await self.browser.async_scroll_to_top()
1436+
elif direction == "bottom":
1437+
return await self.browser.async_scroll_to_bottom()
1438+
return "Invalid scroll direction."
1439+
1440+
async def click_element(self, element_id: Union[str, int]) -> str:
1441+
r"""Clicks an element with the given ID asynchronously.
1442+
1443+
Args:
1444+
element_id (Union[str, int]): The ID of the element to click.
1445+
1446+
Returns:
1447+
str: Confirmation message or error.
1448+
"""
1449+
await self._ensure_browser_initialized()
1450+
try:
1451+
await self.browser.async_click_id(element_id)
1452+
return f"Clicked element with ID: {element_id}"
1453+
except ValueError as e:
1454+
return str(e)
1455+
except Exception as e:
1456+
return f"Error clicking element {element_id}: {e}"
1457+
1458+
async def fill_input_element(
1459+
self, element_id: Union[str, int], text: str
1460+
) -> str:
1461+
r"""Fills an input field with text and presses Enter, asynchronously.
1462+
1463+
Args:
1464+
element_id (Union[str, int]): The ID of the input field.
1465+
text (str): The text to fill.
1466+
1467+
Returns:
1468+
str: Confirmation message or error.
1469+
"""
1470+
await self._ensure_browser_initialized()
1471+
return await self.browser.async_fill_input_id(element_id, text)
1472+
1473+
async def hover_element(self, element_id: Union[str, int]) -> str:
1474+
r"""Hovers over an element with the given ID asynchronously.
1475+
1476+
Args:
1477+
element_id (Union[str, int]): The ID of the element to hover.
1478+
1479+
Returns:
1480+
str: Confirmation message or error.
1481+
"""
1482+
await self._ensure_browser_initialized()
1483+
return await self.browser.async_hover_id(element_id)
1484+
1485+
async def find_text_on_page(self, search_text: str) -> str:
1486+
r"""Finds text on the page and scrolls to it, asynchronously.
1487+
1488+
Args:
1489+
search_text (str): The text to find.
1490+
1491+
Returns:
1492+
str: Confirmation message or error.
1493+
"""
1494+
await self._ensure_browser_initialized()
1495+
return await self.browser.async_find_text_on_page(search_text)
1496+
1497+
async def navigate_back(self) -> str:
1498+
r"""Navigates back to the previous page, asynchronously.
1499+
1500+
Returns:
1501+
str: Confirmation message.
1502+
"""
1503+
await self._ensure_browser_initialized()
1504+
await self.browser.async_back()
1505+
return "Navigated back to the previous page."
1506+
1507+
async def get_page_content(
1508+
self, format: Literal["markdown", "html"] = "markdown"
1509+
) -> str:
1510+
r"""Extracts page content asynchronously.
1511+
1512+
Args:
1513+
format (Literal["markdown", "html"]): Content format.
1514+
Defaults to "markdown".
1515+
1516+
Returns:
1517+
str: Page content.
1518+
"""
1519+
await self._ensure_browser_initialized()
1520+
if format == "markdown":
1521+
return await self.browser.async_get_webpage_content()
1522+
elif format == "html":
1523+
return await self.browser.async_extract_url_content()
1524+
return "Invalid format. Choose 'markdown' or 'html'."
1525+
1526+
async def download_file_by_element_id(
1527+
self, element_id: Union[str, int]
1528+
) -> str:
1529+
r"""Downloads a file by element ID, asynchronously.
1530+
1531+
Args:
1532+
element_id (Union[str, int]): Element ID for download.
1533+
1534+
Returns:
1535+
str: Path to downloaded file or error.
1536+
"""
1537+
await self._ensure_browser_initialized()
1538+
return await self.browser.async_download_file_id(element_id)
1539+
1540+
async def capture_screenshot(
1541+
self, mark_interactive_elements: bool = False
1542+
) -> str:
1543+
r"""Captures a screenshot asynchronously.
1544+
1545+
Args:
1546+
mark_interactive_elements (bool): Mark interactive elements.
1547+
Defaults to False.
1548+
1549+
Returns:
1550+
str: Path to screenshot or error.
1551+
"""
1552+
await self._ensure_browser_initialized()
1553+
try:
1554+
if mark_interactive_elements:
1555+
_, file_path = await self.browser.async_get_som_screenshot(
1556+
save_image=True
1557+
)
1558+
else:
1559+
_, file_path = await self.browser.async_get_screenshot(
1560+
save_image=True
1561+
)
1562+
1563+
if file_path:
1564+
return f"Screenshot saved to: {file_path}"
1565+
return "Failed to save screenshot."
1566+
except Exception as e:
1567+
return f"Error capturing screenshot: {e}"
1568+
1569+
def ask_question_about_video(self, question: str) -> str:
1570+
r"""Asks a question about a video on the current page.
1571+
Note: This is an interactive function that may require user input.
1572+
This function itself is synchronous due to the input() call.
1573+
1574+
Args:
1575+
question (str): The question to ask about the video.
1576+
1577+
Returns:
1578+
str: The answer to the question or an error/cancellation message.
1579+
"""
1580+
# Note: self._ensure_browser_initialized() is not called here
1581+
# because the underlying self.browser.ask_question_about_video
1582+
# is synchronous and does not depend on the async browser init state.
1583+
# However, for consistency in tool usage, it might be better to ensure
1584+
# the browser is initialized if other async operations might precede
1585+
# or follow this call in a typical workflow.
1586+
# For now, assuming it's called in a context where browser state is
1587+
# managed.
1588+
return self.browser.ask_question_about_video(question)

0 commit comments

Comments
 (0)