Skip to content

Commit dede985

Browse files
committed
chore: perform-specific tools
1 parent 1513578 commit dede985

File tree

19 files changed

+642
-103
lines changed

19 files changed

+642
-103
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
[*]
22
../protocol/
33
../utils/isomorphic
4+
../mcpBundle.ts

packages/playwright-core/src/client/page.ts

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ import { urlMatches, urlMatchesEqual } from '../utils/isomorphic/urlMatch';
4141
import { LongStandingScope } from '../utils/isomorphic/manualPromise';
4242
import { isObject, isRegExp, isString } from '../utils/isomorphic/rtti';
4343
import { ConsoleMessage } from './consoleMessage';
44+
import { zodToJsonSchema } from '../mcpBundle';
4445

4546
import type { BrowserContext } from './browserContext';
4647
import type { Clock } from './clock';
@@ -847,11 +848,12 @@ export class Page extends ChannelOwner<channels.PageChannel> implements api.Page
847848
}
848849

849850
async perform(task: string, options: { key?: string, maxTurns?: number } = {}): Promise<void> {
850-
throw new Error('Not implemented in playwright-core');
851+
await this._channel.perform({ task, ...options });
851852
}
852853

853-
extract<Schema extends z.ZodTypeAny>(query: string, schema: Schema, options: { maxTurns?: number } = {}): Promise<z.infer<Schema>> {
854-
throw new Error('Not implemented in playwright-core');
854+
async extract<Schema extends z.ZodTypeAny>(query: string, schema: Schema, options: { maxTurns?: number } = {}): Promise<z.infer<Schema>> {
855+
const { result } = await this._channel.extract({ query, schema: zodToJsonSchema(schema), ...options });
856+
return result;
855857
}
856858

857859
async _snapshotForAI(options: TimeoutOptions & { track?: string } = {}): Promise<{ full: string, incremental?: string }> {

packages/playwright-core/src/protocol/validator.ts

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1506,6 +1506,20 @@ scheme.PageUpdateSubscriptionParams = tObject({
15061506
enabled: tBoolean,
15071507
});
15081508
scheme.PageUpdateSubscriptionResult = tOptional(tObject({}));
1509+
scheme.PagePerformParams = tObject({
1510+
task: tString,
1511+
key: tOptional(tString),
1512+
maxTurns: tOptional(tInt),
1513+
});
1514+
scheme.PagePerformResult = tOptional(tObject({}));
1515+
scheme.PageExtractParams = tObject({
1516+
query: tString,
1517+
schema: tAny,
1518+
maxTurns: tOptional(tInt),
1519+
});
1520+
scheme.PageExtractResult = tObject({
1521+
result: tAny,
1522+
});
15091523
scheme.FrameInitializer = tObject({
15101524
url: tString,
15111525
name: tString,
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
[*]
2+
../browserContext.ts
3+
../page.ts
4+
../progress.ts
5+
../../mcpBundle.ts
6+
../../utilsBundle.ts
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
/**
2+
* Copyright (c) Microsoft Corporation.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
import { waitForCompletion, wrapResult } from './utils';
18+
19+
import type { Progress } from '../progress';
20+
import type { Page } from '../page';
21+
import type { Action } from './actions';
22+
23+
export async function runActionFromTool(progress: Progress, page: Page, action: Action) {
24+
await waitForCompletion(progress, page, () => runAction(progress, page, action));
25+
return await wrapResult(progress, page, action);
26+
}
27+
28+
export async function runAction(progress: Progress, page: Page, action: Action) {
29+
const frame = page.mainFrame();
30+
switch (action.method) {
31+
case 'click':
32+
await frame.click(progress, action.selector, { ...action.options, ...strictTrue });
33+
break;
34+
case 'drag':
35+
await frame.dragAndDrop(progress, action.sourceSelector, action.targetSelector, { ...strictTrue });
36+
break;
37+
case 'hover':
38+
await frame.hover(progress, action.selector, { ...action.options, ...strictTrue });
39+
break;
40+
case 'selectOption':
41+
await frame.selectOption(progress, action.selector, [], action.values.map(a => ({ value: a })), { ...strictTrue });
42+
break;
43+
case 'pressKey':
44+
await page.keyboard.press(progress, action.key);
45+
break;
46+
case 'pressSequentially':
47+
await frame.type(progress, action.selector, action.text, { ...strictTrue });
48+
if (action.submit)
49+
await page.keyboard.press(progress, 'Enter');
50+
break;
51+
case 'fill':
52+
await frame.fill(progress, action.selector, action.text, { ...strictTrue });
53+
if (action.submit)
54+
await page.keyboard.press(progress, 'Enter');
55+
break;
56+
}
57+
}
58+
59+
const strictTrue = { strict: true };
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
/**
2+
* Copyright (c) Microsoft Corporation.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
import type * as channels from '@protocol/channels';
18+
19+
export type ClickAction = {
20+
method: 'click';
21+
selector: string;
22+
options: Pick<channels.FrameClickParams, 'button' | 'clickCount' | 'modifiers'>;
23+
};
24+
25+
export type DragAction = {
26+
method: 'drag';
27+
sourceSelector: string;
28+
targetSelector: string;
29+
};
30+
31+
export type HoverAction = {
32+
method: 'hover';
33+
selector: string;
34+
options: Pick<channels.FrameHoverParams, 'modifiers'>;
35+
};
36+
37+
export type SelectOptionAction = {
38+
method: 'selectOption';
39+
selector: string;
40+
values: string[];
41+
};
42+
43+
export type PressAction = {
44+
method: 'pressKey';
45+
key: string;
46+
};
47+
48+
export type PressSequentiallyAction = {
49+
method: 'pressSequentially';
50+
selector: string;
51+
text: string;
52+
submit?: boolean;
53+
};
54+
55+
export type FillAction = {
56+
method: 'fill';
57+
selector: string;
58+
text: string;
59+
submit?: boolean;
60+
};
61+
62+
export type Action = ClickAction | DragAction | HoverAction | SelectOptionAction | PressAction | PressSequentiallyAction | FillAction;
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
/**
2+
* Copyright (c) Microsoft Corporation.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
import { toolsForLoop } from './backend';
18+
import { debug } from '../../utilsBundle';
19+
import { Loop } from '../../mcpBundle';
20+
21+
import type { Progress } from '../progress';
22+
import type * as channels from '@protocol/channels';
23+
import type { Page } from '../page';
24+
import type * as loopTypes from '@lowire/loop';
25+
26+
export async function pagePerform(progress: Progress, page: Page, options: channels.PagePerformParams): Promise<void> {
27+
const resultSchema = {
28+
type: 'object' as const,
29+
properties: {
30+
actions: { type: 'array' as const, items: { type: 'object' as const } },
31+
},
32+
required: ['actions']
33+
};
34+
const task = `
35+
### Instructions
36+
- Perform the following actions on the page.
37+
- Return performed actions.
38+
39+
### Task
40+
${options.task}
41+
`;
42+
const actions = await perform(progress, page, task, resultSchema, options);
43+
// eslint-disable-next-line no-console
44+
console.log(actions);
45+
}
46+
47+
export async function pageExtract(progress: Progress, page: Page, options: channels.PageExtractParams) {
48+
const task = `
49+
### Instructions
50+
Extract the following information from the page. Do not perform any actions, just extract the information.
51+
52+
### Query
53+
${options.query}`;
54+
return await perform(progress, page, task, options.schema, options);
55+
}
56+
57+
async function perform(progress: Progress, page: Page, userTask: string, resultSchema: loopTypes.Schema, options: { maxTurns?: number } = {}): Promise<any> {
58+
const context = page.browserContext;
59+
if (!context._options.agent)
60+
throw new Error(`page.perform() and page.extract() require the agent to be set on the browser context`);
61+
62+
const { full } = await page.snapshotForAI(progress);
63+
const { tools, callTool } = toolsForLoop(page);
64+
65+
const loop = new Loop(context._options.agent.provider as any, {
66+
model: context._options.agent.model,
67+
summarize: true,
68+
debug,
69+
callTool,
70+
tools,
71+
...options
72+
});
73+
74+
const task = `${userTask}
75+
76+
### Page snapshot
77+
${full}
78+
`;
79+
80+
return await loop.run(task, {
81+
resultSchema
82+
});
83+
}
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
/**
2+
* Copyright (c) Microsoft Corporation.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
import toolDefinitions from './tools';
18+
import { ProgressController } from '../progress';
19+
import { zodToJsonSchema } from '../../mcpBundle';
20+
21+
import type * as loopTypes from '@lowire/loop';
22+
import type { Page } from '../page';
23+
24+
export function toolsForLoop(page: Page): { tools: loopTypes.Tool[], callTool: loopTypes.ToolCallback } {
25+
const tools = toolDefinitions.map(tool => {
26+
const result: loopTypes.Tool = {
27+
name: tool.schema.name,
28+
description: tool.schema.description,
29+
inputSchema: zodToJsonSchema(tool.schema.inputSchema) as loopTypes.Schema,
30+
};
31+
return result;
32+
});
33+
34+
const callTool: loopTypes.ToolCallback = async params => {
35+
const tool = toolDefinitions.find(t => t.schema.name === params.name);
36+
if (!tool) {
37+
return {
38+
content: [{ type: 'text',
39+
text: `Tool ${params.name} not found. Available tools: ${toolDefinitions.map(t => t.schema.name)}`
40+
}],
41+
isError: true,
42+
};
43+
}
44+
45+
const progressController = new ProgressController();
46+
return await progressController.run(async progress => {
47+
return await tool.handle(progress, page, params.arguments);
48+
});
49+
};
50+
51+
return {
52+
tools,
53+
callTool,
54+
};
55+
}

0 commit comments

Comments
 (0)