Skip to content

Commit 8241a7c

Browse files
committed
chore: iterate on perform
1 parent 9fbe3d8 commit 8241a7c

File tree

24 files changed

+475
-249
lines changed

24 files changed

+475
-249
lines changed

docs/src/api/class-browser.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,9 @@ await browser.CloseAsync();
267267
### option: Browser.newContext.storageStatePath = %%-csharp-java-context-option-storage-state-path-%%
268268
* since: v1.9
269269

270+
### option: Browser.newContext.agent = %%-js-context-option-agent-%%
271+
* since: v1.58
272+
270273
## async method: Browser.newPage
271274
* since: v1.8
272275
- returns: <[Page]>

docs/src/api/class-page.md

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2024,6 +2024,38 @@ Name of the function on the window object
20242024

20252025
Callback function which will be called in Playwright's context.
20262026

2027+
## async method: Page.extract
2028+
* since: v1.58
2029+
* langs: js
2030+
- returns: <[any]>
2031+
2032+
Extract information from the page using the agentic loop, return it in a given Zod format.
2033+
2034+
**Usage**
2035+
2036+
```js
2037+
await page.extract('List of items in the cart', z.object({
2038+
title: z.string().describe('Item title to extract'),
2039+
price: z.string().describe('Item price to extract'),
2040+
}).array());
2041+
```
2042+
2043+
### param: Page.extract.query
2044+
* since: v1.58
2045+
- `query` <[string]>
2046+
2047+
Task to perform using agentic loop.
2048+
2049+
### param: Page.extract.schema
2050+
* since: v1.58
2051+
- `schema` <[z.ZodSchema]>
2052+
2053+
### option: Page.extract.maxTurns
2054+
* since: v1.58
2055+
- `maxTurns` <[int]>
2056+
2057+
Maximum number of agentic steps to take while extracting the information.
2058+
20272059
## async method: Page.fill
20282060
* since: v1.8
20292061
* discouraged: Use locator-based [`method: Locator.fill`] instead. Read more about [locators](../locators.md).
@@ -2997,6 +3029,38 @@ Whether or not to generate tagged (accessible) PDF. Defaults to `false`.
29973029

29983030
Whether or not to embed the document outline into the PDF. Defaults to `false`.
29993031

3032+
## async method: Page.perform
3033+
* since: v1.58
3034+
* langs: js
3035+
3036+
Perform action using agentic loop.
3037+
3038+
**Usage**
3039+
3040+
```js
3041+
await page.perform('Click submit button');
3042+
```
3043+
3044+
### param: Page.perform.task
3045+
* since: v1.58
3046+
- `task` <[string]>
3047+
3048+
Task to perform using agentic loop.
3049+
3050+
### option: Page.perform.key
3051+
* since: v1.58
3052+
- `key` <[string]>
3053+
3054+
All the agentic actions are converted to the Playwright calls and are cached.
3055+
By default, they are cached globally with the `task` as a key. This option allows controlling the cache key explicitly.
3056+
3057+
### option: Page.perform.maxTurns
3058+
* since: v1.58
3059+
- `maxTurns` <[int]>
3060+
3061+
Maximum number of agentic steps to take while performing this action.
3062+
3063+
30003064
## async method: Page.press
30013065
* since: v1.8
30023066
* discouraged: Use locator-based [`method: Locator.press`] instead. Read more about [locators](../locators.md).

docs/src/api/params.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,16 @@ It makes the execution of the tests non-deterministic.
370370
Emulates consistent window screen size available inside web page via `window.screen`. Is only used when the
371371
[`option: viewport`] is set.
372372

373+
## js-context-option-agent
374+
* langs: js
375+
- `agent` <[Object]>
376+
- `provider` <[string]> LLM provider to use
377+
- `model` <[string]> Model identifier within provider
378+
- `cacheDir` ?<[string]> Cache folder to use/generate code for performed actions into. Cache is not used if not specified (default).
379+
- `cacheMode` ?<['force'|'ignore'|'auto']> Cache control, defauls to 'auto'
380+
381+
Agent settings for [`method: Page.perform`] and [`method: Page.extract`].
382+
373383
## fetch-param-url
374384
- `url` <[string]>
375385

docs/src/test-api/class-testoptions.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,10 @@ export default defineConfig({
4646
});
4747
```
4848

49+
## property: TestOptions.agent = %%-js-context-option-agent-%%
50+
* since: v1.58
51+
52+
4953
## property: TestOptions.baseURL = %%-context-option-baseURL-%%
5054
* since: v1.10
5155

package-lock.json

Lines changed: 4 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@
5757
"@eslint/compat": "^1.3.2",
5858
"@eslint/eslintrc": "^3.3.1",
5959
"@eslint/js": "^9.34.0",
60-
"@lowire/loop": "^0.0.4",
60+
"@lowire/loop": "^0.0.6",
6161
"@modelcontextprotocol/sdk": "^1.17.5",
6262
"@octokit/graphql-schema": "^15.26.0",
6363
"@stylistic/eslint-plugin": "^5.2.3",

packages/playwright-client/types/types.d.ts

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,11 @@ type ElementHandleWaitForSelectorOptionsNotHidden = ElementHandleWaitForSelector
2727
state?: 'visible'|'attached';
2828
};
2929

30+
// @ts-ignore this will be any if zod is not installed
31+
type ZodTypeAny = import('zod').ZodTypeAny;
32+
// @ts-ignore this will be any if zod is not installed
33+
type ZodInfer<T extends ZodTypeAny> = import('zod').infer<T>;
34+
3035
/**
3136
* Page provides methods to interact with a single tab in a [Browser](https://playwright.dev/docs/api/class-browser),
3237
* or an [extension background page](https://developer.chrome.com/extensions/background_pages) in Chromium. One
@@ -1013,6 +1018,24 @@ export interface Page {
10131018
*/
10141019
behavior?: 'wait'|'ignoreErrors'|'default'
10151020
}): Promise<void>;
1021+
1022+
/**
1023+
* Extract information from the page using the agentic loop, return it in a given Zod format.
1024+
*
1025+
* **Usage**
1026+
*
1027+
* ```js
1028+
* await page.extract('List of items in the cart', z.object({
1029+
* title: z.string().describe('Item title to extract'),
1030+
* price: z.string().describe('Item price to extract'),
1031+
* }).array());
1032+
* ```
1033+
*
1034+
* @param query Task to perform using agentic loop.
1035+
* @param schema
1036+
* @param options
1037+
*/
1038+
extract<Schema extends ZodTypeAny>(query: string, schema: Schema): Promise<ZodInfer<Schema>>;
10161039
/**
10171040
* Emitted when the page closes.
10181041
*/
@@ -3796,6 +3819,31 @@ export interface Page {
37963819
width?: string|number;
37973820
}): Promise<Buffer>;
37983821

3822+
/**
3823+
* Perform action using agentic loop.
3824+
*
3825+
* **Usage**
3826+
*
3827+
* ```js
3828+
* await page.perform('Click submit button');
3829+
* ```
3830+
*
3831+
* @param task Task to perform using agentic loop.
3832+
* @param options
3833+
*/
3834+
perform(task: string, options?: {
3835+
/**
3836+
* All the agentic actions are converted to the Playwright calls and are cached. By default, they are cached globally
3837+
* with the `task` as a key. This option allows controlling the cache key explicitly.
3838+
*/
3839+
key?: string;
3840+
3841+
/**
3842+
* Maximum number of agentic steps to take while performing this action.
3843+
*/
3844+
maxTurns?: number;
3845+
}): Promise<void>;
3846+
37993847
/**
38003848
* **NOTE** Use locator-based [locator.press(key[, options])](https://playwright.dev/docs/api/class-locator#locator-press)
38013849
* instead. Read more about [locators](https://playwright.dev/docs/locators).
@@ -22033,6 +22081,32 @@ export interface BrowserContextOptions {
2203322081
*/
2203422082
acceptDownloads?: boolean;
2203522083

22084+
/**
22085+
* Agent settings for [page.perform(task[, options])](https://playwright.dev/docs/api/class-page#page-perform) and
22086+
* [page.extract(query, schema[, options])](https://playwright.dev/docs/api/class-page#page-extract).
22087+
*/
22088+
agent?: {
22089+
/**
22090+
* LLM provider to use
22091+
*/
22092+
provider: string;
22093+
22094+
/**
22095+
* Model identifier within provider
22096+
*/
22097+
model: string;
22098+
22099+
/**
22100+
* Cache folder to use/generate code for performed actions into. Cache is not used if not specified (default).
22101+
*/
22102+
cacheDir?: string;
22103+
22104+
/**
22105+
* Cache control, defauls to 'auto'
22106+
*/
22107+
cacheMode?: 'force'|'ignore'|'auto';
22108+
};
22109+
2203622110
/**
2203722111
* When using [page.goto(url[, options])](https://playwright.dev/docs/api/class-page#page-goto),
2203822112
* [page.route(url, handler[, options])](https://playwright.dev/docs/api/class-page#page-route),

packages/playwright-core/src/client/clientInstrumentation.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import type { BrowserContext } from './browserContext';
1818
import type { APIRequestContext } from './fetch';
1919
import type { StackFrame } from '@protocol/channels';
20+
import type { Page } from './page';
2021

2122
// Instrumentation can mutate the data, for example change apiName or stepId.
2223
export interface ApiCallData {
@@ -35,6 +36,7 @@ export interface ClientInstrumentation {
3536
onApiCallBegin(apiCall: ApiCallData, channel: { type: string, method: string, params?: Record<string, any> }): void;
3637
onApiCallEnd(apiCall: ApiCallData): void;
3738
onWillPause(options: { keepTestTimeout: boolean }): void;
39+
onPage(page: Page): void;
3840

3941
runAfterCreateBrowserContext(context: BrowserContext): Promise<void>;
4042
runAfterCreateRequestContext(context: APIRequestContext): Promise<void>;
@@ -46,7 +48,7 @@ export interface ClientInstrumentationListener {
4648
onApiCallBegin?(apiCall: ApiCallData, channel: { type: string, method: string, params?: Record<string, any> }): void;
4749
onApiCallEnd?(apiCall: ApiCallData): void;
4850
onWillPause?(options: { keepTestTimeout: boolean }): void;
49-
51+
onPage?(page: Page): void;
5052
runAfterCreateBrowserContext?(context: BrowserContext): Promise<void>;
5153
runAfterCreateRequestContext?(context: APIRequestContext): Promise<void>;
5254
runBeforeCloseBrowserContext?(context: BrowserContext): Promise<void>;

packages/playwright-core/src/client/page.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ import type * as api from '../../types/types';
5454
import type { ByRoleOptions } from '../utils/isomorphic/locatorUtils';
5555
import type { URLMatch } from '../utils/isomorphic/urlMatch';
5656
import type * as channels from '@protocol/channels';
57+
import type z from 'zod';
5758

5859
type PDFOptions = Omit<channels.PagePdfParams, 'width' | 'height' | 'margin'> & {
5960
width?: string | number,
@@ -116,6 +117,7 @@ export class Page extends ChannelOwner<channels.PageChannel> implements api.Page
116117

117118
constructor(parent: ChannelOwner, type: string, guid: string, initializer: channels.PageInitializer) {
118119
super(parent, type, guid, initializer);
120+
this._instrumentation.onPage(this);
119121
this._browserContext = parent as unknown as BrowserContext;
120122
this._timeoutSettings = new TimeoutSettings(this._platform, this._browserContext._timeoutSettings);
121123

@@ -844,6 +846,14 @@ export class Page extends ChannelOwner<channels.PageChannel> implements api.Page
844846
return result.pdf;
845847
}
846848

849+
async perform(task: string, options: { key?: string, maxTurns?: number } = {}): Promise<void> {
850+
throw new Error('Not implemented in playwright-core');
851+
}
852+
853+
extract<Schema extends z.ZodTypeAny>(query: string, schema: Schema, options: { maxTurns?: number } = {}): Promise<z.infer<Schema>> {
854+
throw new Error('Not implemented in playwright-core');
855+
}
856+
847857
async _snapshotForAI(options: TimeoutOptions & { track?: string } = {}): Promise<{ full: string, incremental?: string }> {
848858
return await this._channel.snapshotForAI({ timeout: this._timeoutSettings.timeout(options), track: options.track });
849859
}

packages/playwright-core/src/protocol/validator.ts

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -602,6 +602,12 @@ scheme.BrowserTypeLaunchPersistentContextParams = tObject({
602602
serviceWorkers: tOptional(tEnum(['allow', 'block'])),
603603
selectorEngines: tOptional(tArray(tType('SelectorEngine'))),
604604
testIdAttributeName: tOptional(tString),
605+
agent: tOptional(tObject({
606+
provider: tString,
607+
model: tString,
608+
cacheDir: tOptional(tString),
609+
cacheMode: tOptional(tEnum(['ignore', 'force', 'auto'])),
610+
})),
605611
userDataDir: tString,
606612
slowMo: tOptional(tFloat),
607613
});
@@ -694,6 +700,12 @@ scheme.BrowserNewContextParams = tObject({
694700
serviceWorkers: tOptional(tEnum(['allow', 'block'])),
695701
selectorEngines: tOptional(tArray(tType('SelectorEngine'))),
696702
testIdAttributeName: tOptional(tString),
703+
agent: tOptional(tObject({
704+
provider: tString,
705+
model: tString,
706+
cacheDir: tOptional(tString),
707+
cacheMode: tOptional(tEnum(['ignore', 'force', 'auto'])),
708+
})),
697709
proxy: tOptional(tObject({
698710
server: tString,
699711
bypass: tOptional(tString),
@@ -765,6 +777,12 @@ scheme.BrowserNewContextForReuseParams = tObject({
765777
serviceWorkers: tOptional(tEnum(['allow', 'block'])),
766778
selectorEngines: tOptional(tArray(tType('SelectorEngine'))),
767779
testIdAttributeName: tOptional(tString),
780+
agent: tOptional(tObject({
781+
provider: tString,
782+
model: tString,
783+
cacheDir: tOptional(tString),
784+
cacheMode: tOptional(tEnum(['ignore', 'force', 'auto'])),
785+
})),
768786
proxy: tOptional(tObject({
769787
server: tString,
770788
bypass: tOptional(tString),
@@ -881,6 +899,12 @@ scheme.BrowserContextInitializer = tObject({
881899
serviceWorkers: tOptional(tEnum(['allow', 'block'])),
882900
selectorEngines: tOptional(tArray(tType('SelectorEngine'))),
883901
testIdAttributeName: tOptional(tString),
902+
agent: tOptional(tObject({
903+
provider: tString,
904+
model: tString,
905+
cacheDir: tOptional(tString),
906+
cacheMode: tOptional(tEnum(['ignore', 'force', 'auto'])),
907+
})),
884908
}),
885909
});
886910
scheme.BrowserContextBindingCallEvent = tObject({
@@ -2770,6 +2794,12 @@ scheme.AndroidDeviceLaunchBrowserParams = tObject({
27702794
serviceWorkers: tOptional(tEnum(['allow', 'block'])),
27712795
selectorEngines: tOptional(tArray(tType('SelectorEngine'))),
27722796
testIdAttributeName: tOptional(tString),
2797+
agent: tOptional(tObject({
2798+
provider: tString,
2799+
model: tString,
2800+
cacheDir: tOptional(tString),
2801+
cacheMode: tOptional(tEnum(['ignore', 'force', 'auto'])),
2802+
})),
27732803
pkg: tOptional(tString),
27742804
args: tOptional(tArray(tString)),
27752805
proxy: tOptional(tObject({

0 commit comments

Comments
 (0)