Skip to content

Commit 788491e

Browse files
committed
chore: perform-specific tools
1 parent 1513578 commit 788491e

File tree

27 files changed

+817
-133
lines changed

27 files changed

+817
-133
lines changed

docs/src/api/params.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -375,7 +375,7 @@ Emulates consistent window screen size available inside web page via `window.scr
375375
- `agent` <[Object]>
376376
- `provider` <[string]> LLM provider to use
377377
- `model` <[string]> Model identifier within provider
378-
- `cacheDir` ?<[string]> Cache folder to use/generate code for performed actions into. Cache is not used if not specified (default).
378+
- `cacheFile` ?<[string]> Cache file to use/generate code for performed actions into. Cache is not used if not specified (default).
379379
- `cacheMode` ?<['force'|'ignore'|'auto']> Cache control, defauls to 'auto'
380380

381381
Agent settings for [`method: Page.perform`] and [`method: Page.extract`].

packages/playwright-client/types/types.d.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22097,9 +22097,9 @@ export interface BrowserContextOptions {
2209722097
model: string;
2209822098

2209922099
/**
22100-
* Cache folder to use/generate code for performed actions into. Cache is not used if not specified (default).
22100+
* Cache file to use/generate code for performed actions into. Cache is not used if not specified (default).
2210122101
*/
22102-
cacheDir?: string;
22102+
cacheFile?: string;
2210322103

2210422104
/**
2210522105
* Cache control, defauls to 'auto'

packages/playwright-core/src/client/page.ts

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -847,11 +847,12 @@ export class Page extends ChannelOwner<channels.PageChannel> implements api.Page
847847
}
848848

849849
async perform(task: string, options: { key?: string, maxTurns?: number } = {}): Promise<void> {
850-
throw new Error('Not implemented in playwright-core');
850+
await this._channel.perform({ task, ...options });
851851
}
852852

853-
extract<Schema extends z.ZodTypeAny>(query: string, schema: Schema, options: { maxTurns?: number } = {}): Promise<z.infer<Schema>> {
854-
throw new Error('Not implemented in playwright-core');
853+
async extract<Schema extends z.ZodTypeAny>(query: string, schema: Schema, options: { maxTurns?: number } = {}): Promise<z.infer<Schema>> {
854+
const { result } = await this._channel.extract({ query, schema: this._platform.zodToJsonSchema(schema), ...options });
855+
return result;
855856
}
856857

857858
async _snapshotForAI(options: TimeoutOptions & { track?: string } = {}): Promise<{ full: string, incremental?: string }> {

packages/playwright-core/src/client/platform.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ export type Platform = {
5959
streamFile: (path: string, writable: Writable) => Promise<void>,
6060
streamReadable: (channel: channels.StreamChannel) => Readable,
6161
streamWritable: (channel: channels.WritableStreamChannel) => Writable,
62+
zodToJsonSchema: (schema: any) => any,
6263
zones: { empty: Zone, current: () => Zone; };
6364
};
6465

@@ -119,5 +120,9 @@ export const emptyPlatform: Platform = {
119120
throw new Error('Streams are not available');
120121
},
121122

123+
zodToJsonSchema: (schema: any) => {
124+
throw new Error('Zod is not available');
125+
},
126+
122127
zones: { empty: noopZone, current: () => noopZone },
123128
};

packages/playwright-core/src/protocol/validator.ts

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -605,7 +605,7 @@ scheme.BrowserTypeLaunchPersistentContextParams = tObject({
605605
agent: tOptional(tObject({
606606
provider: tString,
607607
model: tString,
608-
cacheDir: tOptional(tString),
608+
cacheFile: tOptional(tString),
609609
cacheMode: tOptional(tEnum(['ignore', 'force', 'auto'])),
610610
})),
611611
userDataDir: tString,
@@ -703,7 +703,7 @@ scheme.BrowserNewContextParams = tObject({
703703
agent: tOptional(tObject({
704704
provider: tString,
705705
model: tString,
706-
cacheDir: tOptional(tString),
706+
cacheFile: tOptional(tString),
707707
cacheMode: tOptional(tEnum(['ignore', 'force', 'auto'])),
708708
})),
709709
proxy: tOptional(tObject({
@@ -780,7 +780,7 @@ scheme.BrowserNewContextForReuseParams = tObject({
780780
agent: tOptional(tObject({
781781
provider: tString,
782782
model: tString,
783-
cacheDir: tOptional(tString),
783+
cacheFile: tOptional(tString),
784784
cacheMode: tOptional(tEnum(['ignore', 'force', 'auto'])),
785785
})),
786786
proxy: tOptional(tObject({
@@ -902,7 +902,7 @@ scheme.BrowserContextInitializer = tObject({
902902
agent: tOptional(tObject({
903903
provider: tString,
904904
model: tString,
905-
cacheDir: tOptional(tString),
905+
cacheFile: tOptional(tString),
906906
cacheMode: tOptional(tEnum(['ignore', 'force', 'auto'])),
907907
})),
908908
}),
@@ -1506,6 +1506,20 @@ scheme.PageUpdateSubscriptionParams = tObject({
15061506
enabled: tBoolean,
15071507
});
15081508
scheme.PageUpdateSubscriptionResult = tOptional(tObject({}));
1509+
scheme.PagePerformParams = tObject({
1510+
task: tString,
1511+
key: tOptional(tString),
1512+
maxTurns: tOptional(tInt),
1513+
});
1514+
scheme.PagePerformResult = tOptional(tObject({}));
1515+
scheme.PageExtractParams = tObject({
1516+
query: tString,
1517+
schema: tAny,
1518+
maxTurns: tOptional(tInt),
1519+
});
1520+
scheme.PageExtractResult = tObject({
1521+
result: tAny,
1522+
});
15091523
scheme.FrameInitializer = tObject({
15101524
url: tString,
15111525
name: tString,
@@ -2797,7 +2811,7 @@ scheme.AndroidDeviceLaunchBrowserParams = tObject({
27972811
agent: tOptional(tObject({
27982812
provider: tString,
27992813
model: tString,
2800-
cacheDir: tOptional(tString),
2814+
cacheFile: tOptional(tString),
28012815
cacheMode: tOptional(tEnum(['ignore', 'force', 'auto'])),
28022816
})),
28032817
pkg: tOptional(tString),
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
[*]
2+
../browserContext.ts
3+
../page.ts
4+
../progress.ts
5+
../../mcpBundle.ts
6+
../../utilsBundle.ts
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
/**
2+
* Copyright (c) Microsoft Corporation.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
import type * as actions from './actions';
18+
import type { Page } from '../page';
19+
import type { Progress } from '../progress';
20+
21+
export async function runAction(progress: Progress, page: Page, action: actions.Action) {
22+
const frame = page.mainFrame();
23+
switch (action.method) {
24+
case 'click':
25+
await frame.click(progress, action.selector, { ...action.options, ...strictTrue });
26+
break;
27+
case 'drag':
28+
await frame.dragAndDrop(progress, action.sourceSelector, action.targetSelector, { ...strictTrue });
29+
break;
30+
case 'hover':
31+
await frame.hover(progress, action.selector, { ...action.options, ...strictTrue });
32+
break;
33+
case 'selectOption':
34+
await frame.selectOption(progress, action.selector, [], action.values.map(a => ({ value: a })), { ...strictTrue });
35+
break;
36+
case 'pressKey':
37+
await page.keyboard.press(progress, action.key);
38+
break;
39+
case 'pressSequentially':
40+
await frame.type(progress, action.selector, action.text, { ...strictTrue });
41+
if (action.submit)
42+
await page.keyboard.press(progress, 'Enter');
43+
break;
44+
case 'fill':
45+
await frame.fill(progress, action.selector, action.text, { ...strictTrue });
46+
if (action.submit)
47+
await page.keyboard.press(progress, 'Enter');
48+
break;
49+
}
50+
}
51+
52+
const strictTrue = { strict: true };
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
/**
2+
* Copyright (c) Microsoft Corporation.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
import type * as channels from '@protocol/channels';
18+
19+
export type ClickAction = {
20+
method: 'click';
21+
selector: string;
22+
options: Pick<channels.FrameClickParams, 'button' | 'clickCount' | 'modifiers'>;
23+
};
24+
25+
export type DragAction = {
26+
method: 'drag';
27+
sourceSelector: string;
28+
targetSelector: string;
29+
};
30+
31+
export type HoverAction = {
32+
method: 'hover';
33+
selector: string;
34+
options: Pick<channels.FrameHoverParams, 'modifiers'>;
35+
};
36+
37+
export type SelectOptionAction = {
38+
method: 'selectOption';
39+
selector: string;
40+
values: string[];
41+
};
42+
43+
export type PressAction = {
44+
method: 'pressKey';
45+
key: string;
46+
};
47+
48+
export type PressSequentiallyAction = {
49+
method: 'pressSequentially';
50+
selector: string;
51+
text: string;
52+
submit?: boolean;
53+
};
54+
55+
export type FillAction = {
56+
method: 'fill';
57+
selector: string;
58+
text: string;
59+
submit?: boolean;
60+
};
61+
62+
export type Action = ClickAction | DragAction | HoverAction | SelectOptionAction | PressAction | PressSequentiallyAction | FillAction;
Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
/**
2+
* Copyright (c) Microsoft Corporation.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
import fs from 'fs';
18+
19+
import { toolsForLoop } from './backend';
20+
import { debug } from '../../utilsBundle';
21+
import { Loop, z, zodToJsonSchema } from '../../mcpBundle';
22+
import { runAction } from './actionRunner';
23+
import { Context } from './context';
24+
25+
import type { Progress } from '../progress';
26+
import type * as channels from '@protocol/channels';
27+
import type { Page } from '../page';
28+
import type * as loopTypes from '@lowire/loop';
29+
import type * as actions from './actions';
30+
31+
export async function pagePerform(progress: Progress, page: Page, options: channels.PagePerformParams): Promise<void> {
32+
const context = new Context(progress, page);
33+
34+
if (await cachedPerform(context, options))
35+
return;
36+
37+
await perform(context, options.task, zodToJsonSchema(z.object({
38+
error: z.string().optional().describe('An error message if the task could not be completed successfully'),
39+
})) as loopTypes.Schema, options);
40+
await updateCache(context, options);
41+
}
42+
43+
export async function pageExtract(progress: Progress, page: Page, options: channels.PageExtractParams) {
44+
const context = new Context(progress, page);
45+
const task = `
46+
### Instructions
47+
Extract the following information from the page. Do not perform any actions, just extract the information.
48+
49+
### Query
50+
${options.query}`;
51+
return await perform(context, task, options.schema, options);
52+
}
53+
54+
async function perform(context: Context, userTask: string, resultSchema: loopTypes.Schema, options: { maxTurns?: number } = {}): Promise<any> {
55+
const { progress, page } = context;
56+
const browserContext = page.browserContext;
57+
if (!browserContext._options.agent)
58+
throw new Error(`page.perform() and page.extract() require the agent to be set on the browser context`);
59+
60+
const { full } = await page.snapshotForAI(progress);
61+
const { tools, callTool } = toolsForLoop(context);
62+
63+
const loop = new Loop(browserContext._options.agent.provider as any, {
64+
model: browserContext._options.agent.model,
65+
summarize: true,
66+
debug,
67+
callTool,
68+
tools,
69+
...options
70+
});
71+
72+
const task = `${userTask}
73+
74+
### Page snapshot
75+
${full}
76+
`;
77+
78+
return await loop.run(task, {
79+
resultSchema
80+
});
81+
}
82+
83+
type CachedActions = Record<string, actions.Action[]>;
84+
85+
const allCaches = new Map<string, CachedActions>();
86+
87+
async function cachedPerform(context: Context, options: channels.PagePerformParams): Promise<boolean> {
88+
const agentSettings = context.page.browserContext._options.agent;
89+
if (!agentSettings?.cacheFile || agentSettings.cacheMode === 'ignore')
90+
return false;
91+
92+
const cache = await cachedActions(agentSettings.cacheFile);
93+
const cacheKey = options.key ?? options.task;
94+
const actions = cache[cacheKey];
95+
if (!actions) {
96+
if (agentSettings.cacheMode === 'force')
97+
throw new Error(`No cached actions for key "${cacheKey}", but cache mode is set to "force"`);
98+
return false;
99+
}
100+
101+
for (const action of actions)
102+
await runAction(context.progress, context.page, action);
103+
return true;
104+
}
105+
106+
async function updateCache(context: Context, options: channels.PagePerformParams) {
107+
const cacheFile = context.page.browserContext._options.agent?.cacheFile;
108+
if (!cacheFile)
109+
return;
110+
const cache = await cachedActions(cacheFile);
111+
const cacheKey = options.key ?? options.task;
112+
cache[cacheKey] = context.actions;
113+
await fs.promises.writeFile(cacheFile, JSON.stringify(cache, undefined, 2));
114+
}
115+
116+
async function cachedActions(cacheFile: string): Promise<CachedActions> {
117+
let cache = allCaches.get(cacheFile);
118+
if (!cache) {
119+
const text = await fs.promises.readFile(cacheFile, 'utf-8').catch(() => '{}');
120+
cache = JSON.parse(text) as CachedActions;
121+
allCaches.set(cacheFile, cache);
122+
}
123+
return cache;
124+
}

0 commit comments

Comments
 (0)