64
64
WEB_AGENT_SYSTEM_PROMPT ,
65
65
InteractiveRegion ,
66
66
VisualViewport ,
67
+ _normalize_identifier ,
67
68
_parse_json_output ,
68
69
_reload_image ,
69
70
add_set_of_mark ,
@@ -547,8 +548,7 @@ async def async_click_id(self, identifier: Union[str, int]) -> None:
547
548
Args:
548
549
identifier (Union[str, int]): The ID of the element to click.
549
550
"""
550
- if isinstance (identifier , int ):
551
- identifier = str (identifier )
551
+ identifier = _normalize_identifier (identifier )
552
552
target = self .page .locator (f"[__elementId='{ identifier } ']" )
553
553
554
554
try :
@@ -609,8 +609,7 @@ async def async_download_file_id(self, identifier: Union[str, int]) -> str:
609
609
str: The path to the downloaded file.
610
610
"""
611
611
612
- if isinstance (identifier , int ):
613
- identifier = str (identifier )
612
+ identifier = _normalize_identifier (identifier )
614
613
try :
615
614
target = self .page .locator (f"[__elementId='{ identifier } ']" )
616
615
except (TimeoutError , Exception ) as e : # type: ignore[misc]
@@ -661,8 +660,7 @@ async def async_fill_input_id(
661
660
Returns:
662
661
str: The result of the action.
663
662
"""
664
- if isinstance (identifier , int ):
665
- identifier = str (identifier )
663
+ identifier = _normalize_identifier (identifier )
666
664
667
665
try :
668
666
target = self .page .locator (f"[__elementId='{ identifier } ']" )
@@ -726,8 +724,7 @@ async def async_hover_id(self, identifier: Union[str, int]) -> str:
726
724
Returns:
727
725
str: The result of the action.
728
726
"""
729
- if isinstance (identifier , int ):
730
- identifier = str (identifier )
727
+ identifier = _normalize_identifier (identifier )
731
728
try :
732
729
target = self .page .locator (f"[__elementId='{ identifier } ']" )
733
730
except (TimeoutError , Exception ) as e : # type: ignore[misc]
@@ -980,6 +977,7 @@ def __init__(
980
977
self .planning_agent_model = planning_agent_model
981
978
self .output_language = output_language
982
979
self .browser .web_agent_model = web_agent_model
980
+ self .browser_initialized = False
983
981
984
982
self .history : list [Any ] = []
985
983
self .web_agent , self .planning_agent = self ._initialize_agent ()
@@ -1343,7 +1341,7 @@ async def browse_url(
1343
1341
The task is not completed within the round limit. Please check
1344
1342
the last round { self .history_window } information to see if
1345
1343
there is any useful information:
1346
- <history>{ self .history [- self .history_window :]} </history>
1344
+ <history>{ self .history [- self .history_window :]} </history>
1347
1345
"""
1348
1346
1349
1347
else :
@@ -1353,4 +1351,238 @@ async def browse_url(
1353
1351
return simulation_result
1354
1352
1355
1353
def get_tools (self ) -> List [FunctionTool ]:
1356
- return [FunctionTool (self .browse_url )]
1354
+ return [
1355
+ FunctionTool (self .browse_url ),
1356
+ FunctionTool (self .setup_browser ),
1357
+ FunctionTool (self .shutdown_browser ),
1358
+ FunctionTool (self .visit_page ),
1359
+ FunctionTool (self .get_current_url ),
1360
+ FunctionTool (self .scroll_page ),
1361
+ FunctionTool (self .click_element ),
1362
+ FunctionTool (self .fill_input_element ),
1363
+ FunctionTool (self .hover_element ),
1364
+ FunctionTool (self .find_text_on_page ),
1365
+ FunctionTool (self .navigate_back ),
1366
+ FunctionTool (self .get_page_content ),
1367
+ FunctionTool (self .download_file_by_element_id ),
1368
+ FunctionTool (self .capture_screenshot ),
1369
+ FunctionTool (self .ask_question_about_video ),
1370
+ ]
1371
+
1372
+ async def _ensure_browser_initialized (self ):
1373
+ if not self .browser_initialized :
1374
+ await self .browser .async_init ()
1375
+ self .browser_initialized = True
1376
+
1377
+ async def setup_browser (self ) -> str :
1378
+ r"""Initializes the browser session asynchronously."""
1379
+ if not self .browser_initialized :
1380
+ await self .browser .async_init ()
1381
+ self .browser_initialized = True
1382
+ return "Browser session initialized."
1383
+ return "Browser session already initialized."
1384
+
1385
+ async def shutdown_browser (self ) -> str :
1386
+ r"""Closes the browser session asynchronously."""
1387
+ if self .browser_initialized :
1388
+ await self .browser .async_close ()
1389
+ self .browser_initialized = False
1390
+ return "Browser session closed."
1391
+ return "Browser session was not initialized or already closed."
1392
+
1393
+ async def visit_page (self , url : str ) -> str :
1394
+ r"""Navigates the browser to the specified URL asynchronously.
1395
+
1396
+ Args:
1397
+ url (str): The URL to visit.
1398
+
1399
+ Returns:
1400
+ str: Confirmation message.
1401
+ """
1402
+ await self ._ensure_browser_initialized ()
1403
+ await self .browser .async_visit_page (url )
1404
+ return f"Navigated to URL: { url } "
1405
+
1406
+ async def get_current_url (self ) -> str :
1407
+ r"""Returns the current URL of the browser asynchronously.
1408
+
1409
+ Returns:
1410
+ str: The current URL.
1411
+ """
1412
+ await self ._ensure_browser_initialized ()
1413
+ return self .browser .get_url ()
1414
+
1415
+ async def scroll_page (
1416
+ self , direction : Literal ["up" , "down" , "top" , "bottom" ]
1417
+ ) -> str :
1418
+ r"""Scrolls the current page asynchronously.
1419
+
1420
+ Args:
1421
+ direction (Literal["up", "down", "top", "bottom"]): The
1422
+ direction to scroll.
1423
+
1424
+ Returns:
1425
+ str: Confirmation message.
1426
+ """
1427
+ await self ._ensure_browser_initialized ()
1428
+ if direction == "up" :
1429
+ await self .browser .async_scroll_up ()
1430
+ return "Scrolled up."
1431
+ elif direction == "down" :
1432
+ await self .browser .async_scroll_down ()
1433
+ return "Scrolled down."
1434
+ elif direction == "top" :
1435
+ return await self .browser .async_scroll_to_top ()
1436
+ elif direction == "bottom" :
1437
+ return await self .browser .async_scroll_to_bottom ()
1438
+ return "Invalid scroll direction."
1439
+
1440
+ async def click_element (self , element_id : Union [str , int ]) -> str :
1441
+ r"""Clicks an element with the given ID asynchronously.
1442
+
1443
+ Args:
1444
+ element_id (Union[str, int]): The ID of the element to click.
1445
+
1446
+ Returns:
1447
+ str: Confirmation message or error.
1448
+ """
1449
+ await self ._ensure_browser_initialized ()
1450
+ try :
1451
+ await self .browser .async_click_id (element_id )
1452
+ return f"Clicked element with ID: { element_id } "
1453
+ except ValueError as e :
1454
+ return str (e )
1455
+ except Exception as e :
1456
+ return f"Error clicking element { element_id } : { e } "
1457
+
1458
+ async def fill_input_element (
1459
+ self , element_id : Union [str , int ], text : str
1460
+ ) -> str :
1461
+ r"""Fills an input field with text and presses Enter, asynchronously.
1462
+
1463
+ Args:
1464
+ element_id (Union[str, int]): The ID of the input field.
1465
+ text (str): The text to fill.
1466
+
1467
+ Returns:
1468
+ str: Confirmation message or error.
1469
+ """
1470
+ await self ._ensure_browser_initialized ()
1471
+ return await self .browser .async_fill_input_id (element_id , text )
1472
+
1473
+ async def hover_element (self , element_id : Union [str , int ]) -> str :
1474
+ r"""Hovers over an element with the given ID asynchronously.
1475
+
1476
+ Args:
1477
+ element_id (Union[str, int]): The ID of the element to hover.
1478
+
1479
+ Returns:
1480
+ str: Confirmation message or error.
1481
+ """
1482
+ await self ._ensure_browser_initialized ()
1483
+ return await self .browser .async_hover_id (element_id )
1484
+
1485
+ async def find_text_on_page (self , search_text : str ) -> str :
1486
+ r"""Finds text on the page and scrolls to it, asynchronously.
1487
+
1488
+ Args:
1489
+ search_text (str): The text to find.
1490
+
1491
+ Returns:
1492
+ str: Confirmation message or error.
1493
+ """
1494
+ await self ._ensure_browser_initialized ()
1495
+ return await self .browser .async_find_text_on_page (search_text )
1496
+
1497
+ async def navigate_back (self ) -> str :
1498
+ r"""Navigates back to the previous page, asynchronously.
1499
+
1500
+ Returns:
1501
+ str: Confirmation message.
1502
+ """
1503
+ await self ._ensure_browser_initialized ()
1504
+ await self .browser .async_back ()
1505
+ return "Navigated back to the previous page."
1506
+
1507
+ async def get_page_content (
1508
+ self , format : Literal ["markdown" , "html" ] = "markdown"
1509
+ ) -> str :
1510
+ r"""Extracts page content asynchronously.
1511
+
1512
+ Args:
1513
+ format (Literal["markdown", "html"]): Content format.
1514
+ Defaults to "markdown".
1515
+
1516
+ Returns:
1517
+ str: Page content.
1518
+ """
1519
+ await self ._ensure_browser_initialized ()
1520
+ if format == "markdown" :
1521
+ return await self .browser .async_get_webpage_content ()
1522
+ elif format == "html" :
1523
+ return await self .browser .async_extract_url_content ()
1524
+ return "Invalid format. Choose 'markdown' or 'html'."
1525
+
1526
+ async def download_file_by_element_id (
1527
+ self , element_id : Union [str , int ]
1528
+ ) -> str :
1529
+ r"""Downloads a file by element ID, asynchronously.
1530
+
1531
+ Args:
1532
+ element_id (Union[str, int]): Element ID for download.
1533
+
1534
+ Returns:
1535
+ str: Path to downloaded file or error.
1536
+ """
1537
+ await self ._ensure_browser_initialized ()
1538
+ return await self .browser .async_download_file_id (element_id )
1539
+
1540
+ async def capture_screenshot (
1541
+ self , mark_interactive_elements : bool = False
1542
+ ) -> str :
1543
+ r"""Captures a screenshot asynchronously.
1544
+
1545
+ Args:
1546
+ mark_interactive_elements (bool): Mark interactive elements.
1547
+ Defaults to False.
1548
+
1549
+ Returns:
1550
+ str: Path to screenshot or error.
1551
+ """
1552
+ await self ._ensure_browser_initialized ()
1553
+ try :
1554
+ if mark_interactive_elements :
1555
+ _ , file_path = await self .browser .async_get_som_screenshot (
1556
+ save_image = True
1557
+ )
1558
+ else :
1559
+ _ , file_path = await self .browser .async_get_screenshot (
1560
+ save_image = True
1561
+ )
1562
+
1563
+ if file_path :
1564
+ return f"Screenshot saved to: { file_path } "
1565
+ return "Failed to save screenshot."
1566
+ except Exception as e :
1567
+ return f"Error capturing screenshot: { e } "
1568
+
1569
+ def ask_question_about_video (self , question : str ) -> str :
1570
+ r"""Asks a question about a video on the current page.
1571
+ Note: This is an interactive function that may require user input.
1572
+ This function itself is synchronous due to the input() call.
1573
+
1574
+ Args:
1575
+ question (str): The question to ask about the video.
1576
+
1577
+ Returns:
1578
+ str: The answer to the question or an error/cancellation message.
1579
+ """
1580
+ # Note: self._ensure_browser_initialized() is not called here
1581
+ # because the underlying self.browser.ask_question_about_video
1582
+ # is synchronous and does not depend on the async browser init state.
1583
+ # However, for consistency in tool usage, it might be better to ensure
1584
+ # the browser is initialized if other async operations might precede
1585
+ # or follow this call in a typical workflow.
1586
+ # For now, assuming it's called in a context where browser state is
1587
+ # managed.
1588
+ return self .browser .ask_question_about_video (question )
0 commit comments