From f09e1ea06a1dc7f97202c9dda9eeb267d300ca5b Mon Sep 17 00:00:00 2001 From: Antoine Kingue Date: Tue, 7 Apr 2026 10:55:21 +0200 Subject: [PATCH] feat(screenshot): add CLI options to cap screenshot size at the source MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds opt-in CLI flags so operators can cap the size of screenshots returned by `take_screenshot` before they are embedded in the MCP response. Addresses two related symptoms reported when MCP clients display screenshots inline: 1. The hosted LLM API rejects images exceeding its per-image dimension limits (e.g. Anthropic's 8000x8000 px / 2000x2000 px when >20 images are in the same request). 2. After many captures the cumulative base64 payload pushes the request over the per-call body size limit. Both can be mitigated at the source by reducing format/quality and downscaling the capture. New CLI flags (all opt-in, no behavior change when unset): - --screenshot-format : override the default format used by take_screenshot when the caller does not specify one. - --screenshot-quality <0-100>: override the default JPEG/WebP quality when the caller does not specify one. Ignored for PNG. - --screenshot-max-width : downscale screenshots wider than this before they are returned. - --screenshot-max-height : downscale screenshots taller than this before they are returned. Combines with --screenshot-max-width; the smaller scale wins so both bounds are respected while preserving aspect ratio. Resizing leverages Puppeteer's clip.scale (CDP Page.captureScreenshot) so no new dependencies are introduced. Source dimensions are computed per capture mode: - viewport: page.viewport() - full page: document.documentElement.scrollWidth/scrollHeight via page.evaluate() - element (uid): elementHandle.boundingBox() For element and full-page captures with a downscale clip, the call is routed through page.screenshot({clip}) so the scale parameter applies. captureBeyondViewport is left to Puppeteer's default (true when a clip is set), which preserves correct behavior for elements below the fold and for full-page captures. Design notes: - Aligned with the "Reference over Value" principle in docs/design-principles.md: the existing 2 MB threshold still routes oversized screenshots to a temporary file. This change only reduces the size of the inline base64 fallback path, which the principles document calls out as an acceptable exception when MCP clients display images natively. - Fully opt-in: when no flags are set, take_screenshot returns the exact same bytes as before. No breaking change. - The MCP server hardcodes no LLM-specific size limits — operators pick the values that match their client/model combination. This keeps the maintenance surface minimal as model limits evolve and is intended as a complement to, not a replacement for, fixes in the MCP client itself. - Compares against CSS pixels (page.viewport()), not raw bitmap pixels, so HiDPI emulation behaves predictably from the user's perspective. Tests added (6 new): - honors screenshotFormat default from CLI args - keeps "png" as default format when no CLI override is set - downscales viewport screenshot when screenshotMaxWidth is set - downscales using the smaller scale when both max-width and max-height are set - does not resize when source is smaller than the max bounds - downscales full page screenshot when screenshotMaxWidth is set Refs https://github.com/ChromeDevTools/chrome-devtools-mcp/issues/879 --- README.md | 17 ++ src/bin/chrome-devtools-mcp-cli-options.ts | 21 ++ src/tools/screenshot.ts | 309 +++++++++++++++------ tests/tools/screenshot.test.ts | 174 +++++++++++- 4 files changed, 423 insertions(+), 98 deletions(-) diff --git a/README.md b/README.md index 6f83fe56e..1cc5a2451 100644 --- a/README.md +++ b/README.md @@ -601,6 +601,23 @@ The Chrome DevTools MCP server supports the following configuration option: - **Type:** boolean - **Default:** `true` +- **`--screenshotFormat`/ `--screenshot-format`** + Override the default output format used by take_screenshot when the caller does not specify one. JPEG and WebP are ~3-5x smaller than PNG, which helps reduce context size in AI conversations. Unset preserves the existing default ("png"). + - **Type:** string + - **Choices:** `jpeg`, `png`, `webp` + +- **`--screenshotQuality`/ `--screenshot-quality`** + Override the default compression quality (0-100) used by take_screenshot for JPEG and WebP when the caller does not specify one. Lower values mean smaller files. Ignored for PNG. Unset preserves the Puppeteer default. + - **Type:** number + +- **`--screenshotMaxWidth`/ `--screenshot-max-width`** + Maximum width in pixels for screenshots. If the captured image is wider, it is downscaled (preserving aspect ratio) before being returned. Reduces context size in AI conversations. Unset means no resize. + - **Type:** number + +- **`--screenshotMaxHeight`/ `--screenshot-max-height`** + Maximum height in pixels for screenshots. If the captured image is taller, it is downscaled (preserving aspect ratio) before being returned. Can be combined with --screenshot-max-width; the smaller scale factor wins. Unset means no resize. + - **Type:** number + - **`--slim`** Exposes a "slim" set of 3 tools covering navigation, script execution and screenshots only. Useful for basic browser tasks. - **Type:** boolean diff --git a/src/bin/chrome-devtools-mcp-cli-options.ts b/src/bin/chrome-devtools-mcp-cli-options.ts index 3cfbacac0..9c9576b6f 100644 --- a/src/bin/chrome-devtools-mcp-cli-options.ts +++ b/src/bin/chrome-devtools-mcp-cli-options.ts @@ -250,6 +250,27 @@ export const cliOptions = { hidden: true, describe: 'Include watchdog PID in Clearcut request headers (for testing).', }, + screenshotFormat: { + type: 'string', + description: + 'Override the default output format used by take_screenshot when the caller does not specify one. JPEG and WebP are ~3-5x smaller than PNG, which helps reduce context size in AI conversations. Unset preserves the existing default ("png").', + choices: ['jpeg', 'png', 'webp'] as const, + }, + screenshotQuality: { + type: 'number', + description: + 'Override the default compression quality (0-100) used by take_screenshot for JPEG and WebP when the caller does not specify one. Lower values mean smaller files. Ignored for PNG. Unset preserves the Puppeteer default.', + }, + screenshotMaxWidth: { + type: 'number', + description: + 'Maximum width in pixels for screenshots. If the captured image is wider, it is downscaled (preserving aspect ratio) before being returned. Reduces context size in AI conversations. Unset means no resize.', + }, + screenshotMaxHeight: { + type: 'number', + description: + 'Maximum height in pixels for screenshots. If the captured image is taller, it is downscaled (preserving aspect ratio) before being returned. Can be combined with --screenshot-max-width; the smaller scale factor wins. Unset means no resize.', + }, slim: { type: 'boolean', describe: diff --git a/src/tools/screenshot.ts b/src/tools/screenshot.ts index 2e648531c..57f6f9736 100644 --- a/src/tools/screenshot.ts +++ b/src/tools/screenshot.ts @@ -5,101 +5,236 @@ */ import {zod} from '../third_party/index.js'; -import type {ElementHandle, Page} from '../third_party/index.js'; +import type { + BoundingBox, + ElementHandle, + Page, + ScreenshotClip, +} from '../third_party/index.js'; import {ToolCategory} from './categories.js'; import {definePageTool} from './ToolDefinition.js'; -export const screenshot = definePageTool({ - name: 'take_screenshot', - description: `Take a screenshot of the page or element.`, - annotations: { - category: ToolCategory.DEBUGGING, - // Not read-only due to filePath param. - readOnlyHint: false, - }, - schema: { - format: zod - .enum(['png', 'jpeg', 'webp']) - .default('png') - .describe('Type of format to save the screenshot as. Default is "png"'), - quality: zod - .number() - .min(0) - .max(100) - .optional() - .describe( - 'Compression quality for JPEG and WebP formats (0-100). Higher values mean better quality but larger file sizes. Ignored for PNG format.', - ), - uid: zod - .string() - .optional() - .describe( - 'The uid of an element on the page from the page content snapshot. If omitted, takes a page screenshot.', - ), - fullPage: zod - .boolean() - .optional() - .describe( - 'If set to true takes a screenshot of the full page instead of the currently visible viewport. Incompatible with uid.', +type ScreenshotFormat = 'png' | 'jpeg' | 'webp'; + +function isScreenshotFormat(value: unknown): value is ScreenshotFormat { + return value === 'png' || value === 'jpeg' || value === 'webp'; +} + +function isPositiveFiniteNumber(value: number | undefined): value is number { + return value !== undefined && Number.isFinite(value) && value > 0; +} + +async function getSourceBox( + page: Page, + element: ElementHandle | undefined, + fullPage: boolean, +): Promise { + if (element) { + const box = await element.boundingBox(); + return box ?? undefined; + } + if (fullPage) { + const dims = await page.evaluate(() => ({ + width: Math.max( + document.documentElement.scrollWidth, + document.body?.scrollWidth ?? 0, ), - filePath: zod - .string() - .optional() - .describe( - 'The absolute path, or a path relative to the current working directory, to save the screenshot to instead of attaching it to the response.', + height: Math.max( + document.documentElement.scrollHeight, + document.body?.scrollHeight ?? 0, ), - }, - handler: async (request, response, context) => { - if (request.params.uid && request.params.fullPage) { - throw new Error('Providing both "uid" and "fullPage" is not allowed.'); + })); + if (dims.width <= 0 || dims.height <= 0) { + return undefined; } + return {x: 0, y: 0, width: dims.width, height: dims.height}; + } + const viewport = page.viewport(); + if (!viewport) { + return undefined; + } + return {x: 0, y: 0, width: viewport.width, height: viewport.height}; +} - let pageOrHandle: Page | ElementHandle; - if (request.params.uid) { - pageOrHandle = await request.page.getElementByUid(request.params.uid); - } else { - pageOrHandle = request.page.pptrPage; - } +function computeDownscaleClip( + box: BoundingBox, + maxWidth: number | undefined, + maxHeight: number | undefined, +): ScreenshotClip | undefined { + const widthScale = isPositiveFiniteNumber(maxWidth) + ? Math.min(1, maxWidth / box.width) + : 1; + const heightScale = isPositiveFiniteNumber(maxHeight) + ? Math.min(1, maxHeight / box.height) + : 1; + const scale = Math.min(widthScale, heightScale); + if (scale >= 1) { + return undefined; + } + // Skip degenerate sub-pixel results. + if (Math.round(box.width * scale) < 1 || Math.round(box.height * scale) < 1) { + return undefined; + } + return { + x: box.x, + y: box.y, + width: box.width, + height: box.height, + scale, + }; +} - const format = request.params.format; - const quality = format === 'png' ? undefined : request.params.quality; - - const screenshot = await pageOrHandle.screenshot({ - type: format, - fullPage: request.params.fullPage, - quality, - optimizeForSpeed: true, // Bonus: optimize encoding for speed - }); - - if (request.params.uid) { - response.appendResponseLine( - `Took a screenshot of node with uid "${request.params.uid}".`, - ); - } else if (request.params.fullPage) { - response.appendResponseLine( - 'Took a screenshot of the full current page.', - ); - } else { - response.appendResponseLine( - "Took a screenshot of the current page's viewport.", - ); - } +export const screenshot = definePageTool(args => { + const { + screenshotFormat, + screenshotQuality, + screenshotMaxWidth, + screenshotMaxHeight, + } = args ?? {}; - if (request.params.filePath) { - const file = await context.saveFile(screenshot, request.params.filePath); - response.appendResponseLine(`Saved screenshot to ${file.filename}.`); - } else if (screenshot.length >= 2_000_000) { - const {filepath} = await context.saveTemporaryFile( - screenshot, - `screenshot.${request.params.format}`, - ); - response.appendResponseLine(`Saved screenshot to ${filepath}.`); - } else { - response.attachImage({ - mimeType: `image/${request.params.format}`, - data: Buffer.from(screenshot).toString('base64'), - }); - } - }, + const defaultFormat: ScreenshotFormat = isScreenshotFormat(screenshotFormat) + ? screenshotFormat + : 'png'; + const defaultQuality = isPositiveFiniteNumber(screenshotQuality) + ? screenshotQuality + : undefined; + const maxWidth = isPositiveFiniteNumber(screenshotMaxWidth) + ? screenshotMaxWidth + : undefined; + const maxHeight = isPositiveFiniteNumber(screenshotMaxHeight) + ? screenshotMaxHeight + : undefined; + + return { + name: 'take_screenshot', + description: `Take a screenshot of the page or element.`, + annotations: { + category: ToolCategory.DEBUGGING, + // Not read-only due to filePath param. + readOnlyHint: false, + }, + schema: { + format: zod + .enum(['png', 'jpeg', 'webp']) + .default(defaultFormat) + .describe( + `Type of format to save the screenshot as. Default is "${defaultFormat}"`, + ), + quality: zod + .number() + .min(0) + .max(100) + .optional() + .describe( + 'Compression quality for JPEG and WebP formats (0-100). Higher values mean better quality but larger file sizes. Ignored for PNG format.', + ), + uid: zod + .string() + .optional() + .describe( + 'The uid of an element on the page from the page content snapshot. If omitted, takes a page screenshot.', + ), + fullPage: zod + .boolean() + .optional() + .describe( + 'If set to true takes a screenshot of the full page instead of the currently visible viewport. Incompatible with uid.', + ), + filePath: zod + .string() + .optional() + .describe( + 'The absolute path, or a path relative to the current working directory, to save the screenshot to instead of attaching it to the response.', + ), + }, + handler: async (request, response, context) => { + if (request.params.uid && request.params.fullPage) { + throw new Error('Providing both "uid" and "fullPage" is not allowed.'); + } + + const page = request.page.pptrPage; + const element = request.params.uid + ? await request.page.getElementByUid(request.params.uid) + : undefined; + + const format = request.params.format; + const quality = + format === 'png' + ? undefined + : (request.params.quality ?? defaultQuality); + const fullPage = request.params.fullPage ?? false; + + // Compute downscale clip when maxWidth/maxHeight is set and the source + // exceeds either bound. The smaller scale factor wins so both bounds + // are respected while preserving aspect ratio. + let clip: ScreenshotClip | undefined; + if (maxWidth !== undefined || maxHeight !== undefined) { + const box = await getSourceBox(page, element, fullPage); + if (box) { + clip = computeDownscaleClip(box, maxWidth, maxHeight); + } + } + + let screenshot: Uint8Array; + if (clip) { + // page.screenshot with clip lets the CDP scale param downscale the + // capture for viewport, full-page and element shots alike. We rely on + // Puppeteer's default of captureBeyondViewport=true when a clip is + // present so element/full-page captures below the fold still work. + screenshot = await page.screenshot({ + type: format, + quality, + optimizeForSpeed: true, + clip, + }); + } else if (element) { + screenshot = await element.screenshot({ + type: format, + quality, + optimizeForSpeed: true, + }); + } else { + screenshot = await page.screenshot({ + type: format, + fullPage, + quality, + optimizeForSpeed: true, + }); + } + + if (request.params.uid) { + response.appendResponseLine( + `Took a screenshot of node with uid "${request.params.uid}".`, + ); + } else if (fullPage) { + response.appendResponseLine( + 'Took a screenshot of the full current page.', + ); + } else { + response.appendResponseLine( + "Took a screenshot of the current page's viewport.", + ); + } + + if (request.params.filePath) { + const file = await context.saveFile( + screenshot, + request.params.filePath, + ); + response.appendResponseLine(`Saved screenshot to ${file.filename}.`); + } else if (screenshot.length >= 2_000_000) { + const {filepath} = await context.saveTemporaryFile( + screenshot, + `screenshot.${request.params.format}`, + ); + response.appendResponseLine(`Saved screenshot to ${filepath}.`); + } else { + response.attachImage({ + mimeType: `image/${request.params.format}`, + data: Buffer.from(screenshot).toString('base64'), + }); + } + }, + }; }); diff --git a/tests/tools/screenshot.test.ts b/tests/tools/screenshot.test.ts index 9d177f9bb..a24f3b2df 100644 --- a/tests/tools/screenshot.test.ts +++ b/tests/tools/screenshot.test.ts @@ -10,10 +10,27 @@ import {tmpdir} from 'node:os'; import {join} from 'node:path'; import {describe, it} from 'node:test'; +import type {ParsedArguments} from '../../src/bin/chrome-devtools-mcp-cli-options.js'; import {screenshot} from '../../src/tools/screenshot.js'; import {screenshots} from '../snapshot.js'; import {html, withMcpContext} from '../utils.js'; +const screenshotTool = screenshot({} as ParsedArguments); + +/** + * Reads the pixel width from a PNG buffer's IHDR chunk (bytes 16..19). + */ +function pngWidth(data: Buffer): number { + return data.readUInt32BE(16); +} + +/** + * Reads the pixel height from a PNG buffer's IHDR chunk (bytes 20..23). + */ +function pngHeight(data: Buffer): number { + return data.readUInt32BE(20); +} + describe('screenshot', () => { describe('browser_take_screenshot', () => { it('with default options', async () => { @@ -21,7 +38,7 @@ describe('screenshot', () => { const fixture = screenshots.basic; const page = context.getSelectedPptrPage(); await page.setContent(fixture.html); - await screenshot.handler( + await screenshotTool.handler( {params: {format: 'png'}, page: context.getSelectedMcpPage()}, response, context, @@ -40,7 +57,7 @@ describe('screenshot', () => { const fixture = screenshots.basic; const page = context.getSelectedPptrPage(); await page.setContent(fixture.html); - await screenshot.handler( + await screenshotTool.handler( { params: {format: 'png', quality: 0}, page: context.getSelectedMcpPage(), @@ -59,7 +76,7 @@ describe('screenshot', () => { }); it('with jpeg', async () => { await withMcpContext(async (response, context) => { - await screenshot.handler( + await screenshotTool.handler( {params: {format: 'jpeg'}, page: context.getSelectedMcpPage()}, response, context, @@ -75,7 +92,7 @@ describe('screenshot', () => { }); it('with webp', async () => { await withMcpContext(async (response, context) => { - await screenshot.handler( + await screenshotTool.handler( {params: {format: 'webp'}, page: context.getSelectedMcpPage()}, response, context, @@ -94,7 +111,7 @@ describe('screenshot', () => { const fixture = screenshots.viewportOverflow; const page = context.getSelectedPptrPage(); await page.setContent(fixture.html); - await screenshot.handler( + await screenshotTool.handler( { params: {format: 'png', fullPage: true}, page: context.getSelectedMcpPage(), @@ -129,7 +146,7 @@ describe('screenshot', () => { return el?.scrollIntoViewIfNeeded(); }); - await screenshot.handler( + await screenshotTool.handler( { params: {format: 'png', fullPage: true}, page: context.getSelectedMcpPage(), @@ -156,7 +173,7 @@ describe('screenshot', () => { const page = context.getSelectedPptrPage(); await page.setContent(fixture.html); await context.createTextSnapshot(context.getSelectedMcpPage()); - await screenshot.handler( + await screenshotTool.handler( { params: { format: 'png', @@ -184,7 +201,7 @@ describe('screenshot', () => { const fixture = screenshots.basic; const page = context.getSelectedPptrPage(); await page.setContent(fixture.html); - await screenshot.handler( + await screenshotTool.handler( { params: {format: 'png', filePath}, page: context.getSelectedMcpPage(), @@ -228,7 +245,7 @@ describe('screenshot', () => { const page = context.getSelectedPptrPage(); await page.setContent(fixture.html); await assert.rejects( - screenshot.handler( + screenshotTool.handler( { params: {format: 'png', filePath}, page: context.getSelectedMcpPage(), @@ -255,7 +272,7 @@ describe('screenshot', () => { const page = context.getSelectedPptrPage(); await page.setContent(fixture.html); await assert.rejects( - screenshot.handler( + screenshotTool.handler( { params: {format: 'png', filePath}, page: context.getSelectedMcpPage(), @@ -272,6 +289,141 @@ describe('screenshot', () => { } }); + it('honors screenshotFormat default from CLI args', async () => { + const tool = screenshot({ + screenshotFormat: 'jpeg', + } as ParsedArguments); + await withMcpContext(async (response, context) => { + const fixture = screenshots.basic; + const page = context.getSelectedPptrPage(); + await page.setContent(fixture.html); + // No explicit format passed: zod should apply the CLI-driven default. + await tool.handler( + { + params: {format: tool.schema.format.parse(undefined)}, + page: context.getSelectedMcpPage(), + }, + response, + context, + ); + + assert.equal(response.images.length, 1); + assert.equal(response.images[0].mimeType, 'image/jpeg'); + }); + }); + + it('keeps "png" as default format when no CLI override is set', async () => { + const tool = screenshot({} as ParsedArguments); + assert.equal(tool.schema.format.parse(undefined), 'png'); + }); + + it('downscales viewport screenshot when screenshotMaxWidth is set', async () => { + const tool = screenshot({ + screenshotMaxWidth: 100, + } as ParsedArguments); + await withMcpContext(async (response, context) => { + const page = context.getSelectedPptrPage(); + await page.setViewport({width: 800, height: 600}); + await page.setContent( + html`
`, + ); + + await tool.handler( + {params: {format: 'png'}, page: context.getSelectedMcpPage()}, + response, + context, + ); + + assert.equal(response.images.length, 1); + const buf = Buffer.from(response.images[0].data, 'base64'); + assert.equal(pngWidth(buf), 100); + // Aspect ratio preserved: 800x600 -> 100x75. + assert.equal(pngHeight(buf), 75); + }); + }); + + it('downscales using the smaller scale when both max-width and max-height are set', async () => { + const tool = screenshot({ + screenshotMaxWidth: 400, + screenshotMaxHeight: 60, + } as ParsedArguments); + await withMcpContext(async (response, context) => { + const page = context.getSelectedPptrPage(); + await page.setViewport({width: 800, height: 600}); + await page.setContent( + html`
`, + ); + + await tool.handler( + {params: {format: 'png'}, page: context.getSelectedMcpPage()}, + response, + context, + ); + + const buf = Buffer.from(response.images[0].data, 'base64'); + // height bound dictates: 60/600 = 0.1 -> 80x60. + assert.equal(pngHeight(buf), 60); + assert.equal(pngWidth(buf), 80); + }); + }); + + it('does not resize when source is smaller than the max bounds', async () => { + const tool = screenshot({ + screenshotMaxWidth: 4000, + screenshotMaxHeight: 4000, + } as ParsedArguments); + await withMcpContext(async (response, context) => { + const page = context.getSelectedPptrPage(); + await page.setViewport({width: 800, height: 600}); + await page.setContent(html`
`); + + await tool.handler( + {params: {format: 'png'}, page: context.getSelectedMcpPage()}, + response, + context, + ); + + const buf = Buffer.from(response.images[0].data, 'base64'); + assert.equal(pngWidth(buf), 800); + assert.equal(pngHeight(buf), 600); + }); + }); + + it('downscales full page screenshot when screenshotMaxWidth is set', async () => { + const tool = screenshot({ + screenshotMaxWidth: 200, + } as ParsedArguments); + await withMcpContext(async (response, context) => { + const page = context.getSelectedPptrPage(); + await page.setViewport({width: 800, height: 600}); + await page.setContent( + html`
`, + ); + + await tool.handler( + { + params: {format: 'png', fullPage: true}, + page: context.getSelectedMcpPage(), + }, + response, + context, + ); + + const buf = Buffer.from(response.images[0].data, 'base64'); + // Source is at least 1000x1500; scale = 200/1000 = 0.2 -> ~200x300. + // Allow ±2px to absorb sub-pixel rasterization rounding by Chrome. + assert.equal(pngWidth(buf), 200); + assert.ok( + Math.abs(pngHeight(buf) - 300) <= 2, + `expected height near 300, got ${pngHeight(buf)}`, + ); + }); + }); + it('with malformed filePath', async () => { await withMcpContext(async (response, context) => { // Use a platform-specific invalid character. @@ -283,7 +435,7 @@ describe('screenshot', () => { const page = context.getSelectedPptrPage(); await page.setContent(fixture.html); await assert.rejects( - screenshot.handler( + screenshotTool.handler( { params: {format: 'png', filePath}, page: context.getSelectedMcpPage(),