diff --git a/README.md b/README.md index 6f83fe56e..1cc5a2451 100644 --- a/README.md +++ b/README.md @@ -601,6 +601,23 @@ The Chrome DevTools MCP server supports the following configuration option: - **Type:** boolean - **Default:** `true` +- **`--screenshotFormat`/ `--screenshot-format`** + Override the default output format used by take_screenshot when the caller does not specify one. JPEG and WebP are ~3-5x smaller than PNG, which helps reduce context size in AI conversations. Unset preserves the existing default ("png"). + - **Type:** string + - **Choices:** `jpeg`, `png`, `webp` + +- **`--screenshotQuality`/ `--screenshot-quality`** + Override the default compression quality (0-100) used by take_screenshot for JPEG and WebP when the caller does not specify one. Lower values mean smaller files. Ignored for PNG. Unset preserves the Puppeteer default. + - **Type:** number + +- **`--screenshotMaxWidth`/ `--screenshot-max-width`** + Maximum width in pixels for screenshots. If the captured image is wider, it is downscaled (preserving aspect ratio) before being returned. Reduces context size in AI conversations. Unset means no resize. + - **Type:** number + +- **`--screenshotMaxHeight`/ `--screenshot-max-height`** + Maximum height in pixels for screenshots. If the captured image is taller, it is downscaled (preserving aspect ratio) before being returned. Can be combined with --screenshot-max-width; the smaller scale factor wins. Unset means no resize. + - **Type:** number + - **`--slim`** Exposes a "slim" set of 3 tools covering navigation, script execution and screenshots only. Useful for basic browser tasks. - **Type:** boolean diff --git a/src/bin/chrome-devtools-mcp-cli-options.ts b/src/bin/chrome-devtools-mcp-cli-options.ts index 3cfbacac0..9c9576b6f 100644 --- a/src/bin/chrome-devtools-mcp-cli-options.ts +++ b/src/bin/chrome-devtools-mcp-cli-options.ts @@ -250,6 +250,27 @@ export const cliOptions = { hidden: true, describe: 'Include watchdog PID in Clearcut request headers (for testing).', }, + screenshotFormat: { + type: 'string', + description: + 'Override the default output format used by take_screenshot when the caller does not specify one. JPEG and WebP are ~3-5x smaller than PNG, which helps reduce context size in AI conversations. Unset preserves the existing default ("png").', + choices: ['jpeg', 'png', 'webp'] as const, + }, + screenshotQuality: { + type: 'number', + description: + 'Override the default compression quality (0-100) used by take_screenshot for JPEG and WebP when the caller does not specify one. Lower values mean smaller files. Ignored for PNG. Unset preserves the Puppeteer default.', + }, + screenshotMaxWidth: { + type: 'number', + description: + 'Maximum width in pixels for screenshots. If the captured image is wider, it is downscaled (preserving aspect ratio) before being returned. Reduces context size in AI conversations. Unset means no resize.', + }, + screenshotMaxHeight: { + type: 'number', + description: + 'Maximum height in pixels for screenshots. If the captured image is taller, it is downscaled (preserving aspect ratio) before being returned. Can be combined with --screenshot-max-width; the smaller scale factor wins. Unset means no resize.', + }, slim: { type: 'boolean', describe: diff --git a/src/tools/screenshot.ts b/src/tools/screenshot.ts index 2e648531c..57f6f9736 100644 --- a/src/tools/screenshot.ts +++ b/src/tools/screenshot.ts @@ -5,101 +5,236 @@ */ import {zod} from '../third_party/index.js'; -import type {ElementHandle, Page} from '../third_party/index.js'; +import type { + BoundingBox, + ElementHandle, + Page, + ScreenshotClip, +} from '../third_party/index.js'; import {ToolCategory} from './categories.js'; import {definePageTool} from './ToolDefinition.js'; -export const screenshot = definePageTool({ - name: 'take_screenshot', - description: `Take a screenshot of the page or element.`, - annotations: { - category: ToolCategory.DEBUGGING, - // Not read-only due to filePath param. - readOnlyHint: false, - }, - schema: { - format: zod - .enum(['png', 'jpeg', 'webp']) - .default('png') - .describe('Type of format to save the screenshot as. Default is "png"'), - quality: zod - .number() - .min(0) - .max(100) - .optional() - .describe( - 'Compression quality for JPEG and WebP formats (0-100). Higher values mean better quality but larger file sizes. Ignored for PNG format.', - ), - uid: zod - .string() - .optional() - .describe( - 'The uid of an element on the page from the page content snapshot. If omitted, takes a page screenshot.', - ), - fullPage: zod - .boolean() - .optional() - .describe( - 'If set to true takes a screenshot of the full page instead of the currently visible viewport. Incompatible with uid.', +type ScreenshotFormat = 'png' | 'jpeg' | 'webp'; + +function isScreenshotFormat(value: unknown): value is ScreenshotFormat { + return value === 'png' || value === 'jpeg' || value === 'webp'; +} + +function isPositiveFiniteNumber(value: number | undefined): value is number { + return value !== undefined && Number.isFinite(value) && value > 0; +} + +async function getSourceBox( + page: Page, + element: ElementHandle | undefined, + fullPage: boolean, +): Promise { + if (element) { + const box = await element.boundingBox(); + return box ?? undefined; + } + if (fullPage) { + const dims = await page.evaluate(() => ({ + width: Math.max( + document.documentElement.scrollWidth, + document.body?.scrollWidth ?? 0, ), - filePath: zod - .string() - .optional() - .describe( - 'The absolute path, or a path relative to the current working directory, to save the screenshot to instead of attaching it to the response.', + height: Math.max( + document.documentElement.scrollHeight, + document.body?.scrollHeight ?? 0, ), - }, - handler: async (request, response, context) => { - if (request.params.uid && request.params.fullPage) { - throw new Error('Providing both "uid" and "fullPage" is not allowed.'); + })); + if (dims.width <= 0 || dims.height <= 0) { + return undefined; } + return {x: 0, y: 0, width: dims.width, height: dims.height}; + } + const viewport = page.viewport(); + if (!viewport) { + return undefined; + } + return {x: 0, y: 0, width: viewport.width, height: viewport.height}; +} - let pageOrHandle: Page | ElementHandle; - if (request.params.uid) { - pageOrHandle = await request.page.getElementByUid(request.params.uid); - } else { - pageOrHandle = request.page.pptrPage; - } +function computeDownscaleClip( + box: BoundingBox, + maxWidth: number | undefined, + maxHeight: number | undefined, +): ScreenshotClip | undefined { + const widthScale = isPositiveFiniteNumber(maxWidth) + ? Math.min(1, maxWidth / box.width) + : 1; + const heightScale = isPositiveFiniteNumber(maxHeight) + ? Math.min(1, maxHeight / box.height) + : 1; + const scale = Math.min(widthScale, heightScale); + if (scale >= 1) { + return undefined; + } + // Skip degenerate sub-pixel results. + if (Math.round(box.width * scale) < 1 || Math.round(box.height * scale) < 1) { + return undefined; + } + return { + x: box.x, + y: box.y, + width: box.width, + height: box.height, + scale, + }; +} - const format = request.params.format; - const quality = format === 'png' ? undefined : request.params.quality; - - const screenshot = await pageOrHandle.screenshot({ - type: format, - fullPage: request.params.fullPage, - quality, - optimizeForSpeed: true, // Bonus: optimize encoding for speed - }); - - if (request.params.uid) { - response.appendResponseLine( - `Took a screenshot of node with uid "${request.params.uid}".`, - ); - } else if (request.params.fullPage) { - response.appendResponseLine( - 'Took a screenshot of the full current page.', - ); - } else { - response.appendResponseLine( - "Took a screenshot of the current page's viewport.", - ); - } +export const screenshot = definePageTool(args => { + const { + screenshotFormat, + screenshotQuality, + screenshotMaxWidth, + screenshotMaxHeight, + } = args ?? {}; - if (request.params.filePath) { - const file = await context.saveFile(screenshot, request.params.filePath); - response.appendResponseLine(`Saved screenshot to ${file.filename}.`); - } else if (screenshot.length >= 2_000_000) { - const {filepath} = await context.saveTemporaryFile( - screenshot, - `screenshot.${request.params.format}`, - ); - response.appendResponseLine(`Saved screenshot to ${filepath}.`); - } else { - response.attachImage({ - mimeType: `image/${request.params.format}`, - data: Buffer.from(screenshot).toString('base64'), - }); - } - }, + const defaultFormat: ScreenshotFormat = isScreenshotFormat(screenshotFormat) + ? screenshotFormat + : 'png'; + const defaultQuality = isPositiveFiniteNumber(screenshotQuality) + ? screenshotQuality + : undefined; + const maxWidth = isPositiveFiniteNumber(screenshotMaxWidth) + ? screenshotMaxWidth + : undefined; + const maxHeight = isPositiveFiniteNumber(screenshotMaxHeight) + ? screenshotMaxHeight + : undefined; + + return { + name: 'take_screenshot', + description: `Take a screenshot of the page or element.`, + annotations: { + category: ToolCategory.DEBUGGING, + // Not read-only due to filePath param. + readOnlyHint: false, + }, + schema: { + format: zod + .enum(['png', 'jpeg', 'webp']) + .default(defaultFormat) + .describe( + `Type of format to save the screenshot as. Default is "${defaultFormat}"`, + ), + quality: zod + .number() + .min(0) + .max(100) + .optional() + .describe( + 'Compression quality for JPEG and WebP formats (0-100). Higher values mean better quality but larger file sizes. Ignored for PNG format.', + ), + uid: zod + .string() + .optional() + .describe( + 'The uid of an element on the page from the page content snapshot. If omitted, takes a page screenshot.', + ), + fullPage: zod + .boolean() + .optional() + .describe( + 'If set to true takes a screenshot of the full page instead of the currently visible viewport. Incompatible with uid.', + ), + filePath: zod + .string() + .optional() + .describe( + 'The absolute path, or a path relative to the current working directory, to save the screenshot to instead of attaching it to the response.', + ), + }, + handler: async (request, response, context) => { + if (request.params.uid && request.params.fullPage) { + throw new Error('Providing both "uid" and "fullPage" is not allowed.'); + } + + const page = request.page.pptrPage; + const element = request.params.uid + ? await request.page.getElementByUid(request.params.uid) + : undefined; + + const format = request.params.format; + const quality = + format === 'png' + ? undefined + : (request.params.quality ?? defaultQuality); + const fullPage = request.params.fullPage ?? false; + + // Compute downscale clip when maxWidth/maxHeight is set and the source + // exceeds either bound. The smaller scale factor wins so both bounds + // are respected while preserving aspect ratio. + let clip: ScreenshotClip | undefined; + if (maxWidth !== undefined || maxHeight !== undefined) { + const box = await getSourceBox(page, element, fullPage); + if (box) { + clip = computeDownscaleClip(box, maxWidth, maxHeight); + } + } + + let screenshot: Uint8Array; + if (clip) { + // page.screenshot with clip lets the CDP scale param downscale the + // capture for viewport, full-page and element shots alike. We rely on + // Puppeteer's default of captureBeyondViewport=true when a clip is + // present so element/full-page captures below the fold still work. + screenshot = await page.screenshot({ + type: format, + quality, + optimizeForSpeed: true, + clip, + }); + } else if (element) { + screenshot = await element.screenshot({ + type: format, + quality, + optimizeForSpeed: true, + }); + } else { + screenshot = await page.screenshot({ + type: format, + fullPage, + quality, + optimizeForSpeed: true, + }); + } + + if (request.params.uid) { + response.appendResponseLine( + `Took a screenshot of node with uid "${request.params.uid}".`, + ); + } else if (fullPage) { + response.appendResponseLine( + 'Took a screenshot of the full current page.', + ); + } else { + response.appendResponseLine( + "Took a screenshot of the current page's viewport.", + ); + } + + if (request.params.filePath) { + const file = await context.saveFile( + screenshot, + request.params.filePath, + ); + response.appendResponseLine(`Saved screenshot to ${file.filename}.`); + } else if (screenshot.length >= 2_000_000) { + const {filepath} = await context.saveTemporaryFile( + screenshot, + `screenshot.${request.params.format}`, + ); + response.appendResponseLine(`Saved screenshot to ${filepath}.`); + } else { + response.attachImage({ + mimeType: `image/${request.params.format}`, + data: Buffer.from(screenshot).toString('base64'), + }); + } + }, + }; }); diff --git a/tests/tools/screenshot.test.ts b/tests/tools/screenshot.test.ts index 9d177f9bb..a24f3b2df 100644 --- a/tests/tools/screenshot.test.ts +++ b/tests/tools/screenshot.test.ts @@ -10,10 +10,27 @@ import {tmpdir} from 'node:os'; import {join} from 'node:path'; import {describe, it} from 'node:test'; +import type {ParsedArguments} from '../../src/bin/chrome-devtools-mcp-cli-options.js'; import {screenshot} from '../../src/tools/screenshot.js'; import {screenshots} from '../snapshot.js'; import {html, withMcpContext} from '../utils.js'; +const screenshotTool = screenshot({} as ParsedArguments); + +/** + * Reads the pixel width from a PNG buffer's IHDR chunk (bytes 16..19). + */ +function pngWidth(data: Buffer): number { + return data.readUInt32BE(16); +} + +/** + * Reads the pixel height from a PNG buffer's IHDR chunk (bytes 20..23). + */ +function pngHeight(data: Buffer): number { + return data.readUInt32BE(20); +} + describe('screenshot', () => { describe('browser_take_screenshot', () => { it('with default options', async () => { @@ -21,7 +38,7 @@ describe('screenshot', () => { const fixture = screenshots.basic; const page = context.getSelectedPptrPage(); await page.setContent(fixture.html); - await screenshot.handler( + await screenshotTool.handler( {params: {format: 'png'}, page: context.getSelectedMcpPage()}, response, context, @@ -40,7 +57,7 @@ describe('screenshot', () => { const fixture = screenshots.basic; const page = context.getSelectedPptrPage(); await page.setContent(fixture.html); - await screenshot.handler( + await screenshotTool.handler( { params: {format: 'png', quality: 0}, page: context.getSelectedMcpPage(), @@ -59,7 +76,7 @@ describe('screenshot', () => { }); it('with jpeg', async () => { await withMcpContext(async (response, context) => { - await screenshot.handler( + await screenshotTool.handler( {params: {format: 'jpeg'}, page: context.getSelectedMcpPage()}, response, context, @@ -75,7 +92,7 @@ describe('screenshot', () => { }); it('with webp', async () => { await withMcpContext(async (response, context) => { - await screenshot.handler( + await screenshotTool.handler( {params: {format: 'webp'}, page: context.getSelectedMcpPage()}, response, context, @@ -94,7 +111,7 @@ describe('screenshot', () => { const fixture = screenshots.viewportOverflow; const page = context.getSelectedPptrPage(); await page.setContent(fixture.html); - await screenshot.handler( + await screenshotTool.handler( { params: {format: 'png', fullPage: true}, page: context.getSelectedMcpPage(), @@ -129,7 +146,7 @@ describe('screenshot', () => { return el?.scrollIntoViewIfNeeded(); }); - await screenshot.handler( + await screenshotTool.handler( { params: {format: 'png', fullPage: true}, page: context.getSelectedMcpPage(), @@ -156,7 +173,7 @@ describe('screenshot', () => { const page = context.getSelectedPptrPage(); await page.setContent(fixture.html); await context.createTextSnapshot(context.getSelectedMcpPage()); - await screenshot.handler( + await screenshotTool.handler( { params: { format: 'png', @@ -184,7 +201,7 @@ describe('screenshot', () => { const fixture = screenshots.basic; const page = context.getSelectedPptrPage(); await page.setContent(fixture.html); - await screenshot.handler( + await screenshotTool.handler( { params: {format: 'png', filePath}, page: context.getSelectedMcpPage(), @@ -228,7 +245,7 @@ describe('screenshot', () => { const page = context.getSelectedPptrPage(); await page.setContent(fixture.html); await assert.rejects( - screenshot.handler( + screenshotTool.handler( { params: {format: 'png', filePath}, page: context.getSelectedMcpPage(), @@ -255,7 +272,7 @@ describe('screenshot', () => { const page = context.getSelectedPptrPage(); await page.setContent(fixture.html); await assert.rejects( - screenshot.handler( + screenshotTool.handler( { params: {format: 'png', filePath}, page: context.getSelectedMcpPage(), @@ -272,6 +289,141 @@ describe('screenshot', () => { } }); + it('honors screenshotFormat default from CLI args', async () => { + const tool = screenshot({ + screenshotFormat: 'jpeg', + } as ParsedArguments); + await withMcpContext(async (response, context) => { + const fixture = screenshots.basic; + const page = context.getSelectedPptrPage(); + await page.setContent(fixture.html); + // No explicit format passed: zod should apply the CLI-driven default. + await tool.handler( + { + params: {format: tool.schema.format.parse(undefined)}, + page: context.getSelectedMcpPage(), + }, + response, + context, + ); + + assert.equal(response.images.length, 1); + assert.equal(response.images[0].mimeType, 'image/jpeg'); + }); + }); + + it('keeps "png" as default format when no CLI override is set', async () => { + const tool = screenshot({} as ParsedArguments); + assert.equal(tool.schema.format.parse(undefined), 'png'); + }); + + it('downscales viewport screenshot when screenshotMaxWidth is set', async () => { + const tool = screenshot({ + screenshotMaxWidth: 100, + } as ParsedArguments); + await withMcpContext(async (response, context) => { + const page = context.getSelectedPptrPage(); + await page.setViewport({width: 800, height: 600}); + await page.setContent( + html`
`, + ); + + await tool.handler( + {params: {format: 'png'}, page: context.getSelectedMcpPage()}, + response, + context, + ); + + assert.equal(response.images.length, 1); + const buf = Buffer.from(response.images[0].data, 'base64'); + assert.equal(pngWidth(buf), 100); + // Aspect ratio preserved: 800x600 -> 100x75. + assert.equal(pngHeight(buf), 75); + }); + }); + + it('downscales using the smaller scale when both max-width and max-height are set', async () => { + const tool = screenshot({ + screenshotMaxWidth: 400, + screenshotMaxHeight: 60, + } as ParsedArguments); + await withMcpContext(async (response, context) => { + const page = context.getSelectedPptrPage(); + await page.setViewport({width: 800, height: 600}); + await page.setContent( + html`
`, + ); + + await tool.handler( + {params: {format: 'png'}, page: context.getSelectedMcpPage()}, + response, + context, + ); + + const buf = Buffer.from(response.images[0].data, 'base64'); + // height bound dictates: 60/600 = 0.1 -> 80x60. + assert.equal(pngHeight(buf), 60); + assert.equal(pngWidth(buf), 80); + }); + }); + + it('does not resize when source is smaller than the max bounds', async () => { + const tool = screenshot({ + screenshotMaxWidth: 4000, + screenshotMaxHeight: 4000, + } as ParsedArguments); + await withMcpContext(async (response, context) => { + const page = context.getSelectedPptrPage(); + await page.setViewport({width: 800, height: 600}); + await page.setContent(html`
`); + + await tool.handler( + {params: {format: 'png'}, page: context.getSelectedMcpPage()}, + response, + context, + ); + + const buf = Buffer.from(response.images[0].data, 'base64'); + assert.equal(pngWidth(buf), 800); + assert.equal(pngHeight(buf), 600); + }); + }); + + it('downscales full page screenshot when screenshotMaxWidth is set', async () => { + const tool = screenshot({ + screenshotMaxWidth: 200, + } as ParsedArguments); + await withMcpContext(async (response, context) => { + const page = context.getSelectedPptrPage(); + await page.setViewport({width: 800, height: 600}); + await page.setContent( + html`
`, + ); + + await tool.handler( + { + params: {format: 'png', fullPage: true}, + page: context.getSelectedMcpPage(), + }, + response, + context, + ); + + const buf = Buffer.from(response.images[0].data, 'base64'); + // Source is at least 1000x1500; scale = 200/1000 = 0.2 -> ~200x300. + // Allow ±2px to absorb sub-pixel rasterization rounding by Chrome. + assert.equal(pngWidth(buf), 200); + assert.ok( + Math.abs(pngHeight(buf) - 300) <= 2, + `expected height near 300, got ${pngHeight(buf)}`, + ); + }); + }); + it('with malformed filePath', async () => { await withMcpContext(async (response, context) => { // Use a platform-specific invalid character. @@ -283,7 +435,7 @@ describe('screenshot', () => { const page = context.getSelectedPptrPage(); await page.setContent(fixture.html); await assert.rejects( - screenshot.handler( + screenshotTool.handler( { params: {format: 'png', filePath}, page: context.getSelectedMcpPage(),