From 0b8268a646f160f161f820a9783d9769fbcaa987 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Fri, 23 May 2025 20:03:05 +0100 Subject: [PATCH 01/27] =?UTF-8?q?=F0=9F=93=93=20Add=20`ipynb`=20as=20expor?= =?UTF-8?q?t=20format?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .changeset/config.json | 1 + .changeset/witty-tigers-hunt.md | 7 + packages/myst-cli/package.json | 1 + packages/myst-cli/src/build/build.ts | 15 +- packages/myst-cli/src/build/ipynb/index.ts | 51 ++++ .../src/build/utils/collectExportOptions.ts | 1 + .../src/build/utils/localArticleExport.ts | 3 + packages/myst-cli/src/cli/build.ts | 2 + packages/myst-cli/src/cli/options.ts | 4 + .../myst-frontmatter/src/exports/types.ts | 1 + packages/myst-to-ipynb/.eslintrc.cjs | 4 + packages/myst-to-ipynb/CHANGELOG.md | 1 + packages/myst-to-ipynb/README.md | 3 + packages/myst-to-ipynb/package.json | 50 ++++ packages/myst-to-ipynb/src/index.ts | 59 ++++ packages/myst-to-ipynb/tests/basic.yml | 280 ++++++++++++++++++ packages/myst-to-ipynb/tests/example.ipynb | 31 ++ packages/myst-to-ipynb/tests/run.spec.ts | 91 ++++++ packages/myst-to-ipynb/tsconfig.json | 8 + 19 files changed, 608 insertions(+), 5 deletions(-) create mode 100644 .changeset/witty-tigers-hunt.md create mode 100644 packages/myst-cli/src/build/ipynb/index.ts create mode 100644 packages/myst-to-ipynb/.eslintrc.cjs create mode 100644 packages/myst-to-ipynb/CHANGELOG.md create mode 100644 packages/myst-to-ipynb/README.md create mode 100644 packages/myst-to-ipynb/package.json create mode 100644 packages/myst-to-ipynb/src/index.ts create mode 100644 packages/myst-to-ipynb/tests/basic.yml create mode 100644 packages/myst-to-ipynb/tests/example.ipynb create mode 100644 packages/myst-to-ipynb/tests/run.spec.ts create mode 100644 packages/myst-to-ipynb/tsconfig.json diff --git a/.changeset/config.json b/.changeset/config.json index 18d149766d..9a36f6a6c4 100644 --- a/.changeset/config.json +++ b/.changeset/config.json @@ -6,6 +6,7 @@ ["myst-common", "myst-config", "myst-frontmatter", "myst-spec-ext"], ["myst-to-jats", "jats-to-myst"], ["myst-to-tex", "tex-to-myst"], + ["myst-to-md", "myst-to-ipynb"], ["myst-parser", "myst-roles", "myst-directives", "myst-to-html"], ["mystmd", "myst-cli", "myst-migrate"] ], diff --git a/.changeset/witty-tigers-hunt.md b/.changeset/witty-tigers-hunt.md new file mode 100644 index 0000000000..c0b0a78a0d --- /dev/null +++ b/.changeset/witty-tigers-hunt.md @@ -0,0 +1,7 @@ +--- +"myst-frontmatter": patch +"myst-to-ipynb": patch +"myst-cli": patch +--- + +Add ipynb as export format diff --git a/packages/myst-cli/package.json b/packages/myst-cli/package.json index 2a851ae478..35d9189712 100644 --- a/packages/myst-cli/package.json +++ b/packages/myst-cli/package.json @@ -88,6 +88,7 @@ "myst-spec-ext": "^1.9.4", "myst-templates": "^1.0.27", "myst-to-docx": "^1.0.16", + "myst-to-ipynb": "^1.0.15", "myst-to-jats": "^1.0.35", "myst-to-md": "^1.0.16", "myst-to-tex": "^1.0.45", diff --git a/packages/myst-cli/src/build/build.ts b/packages/myst-cli/src/build/build.ts index d8a5154cbc..f699126a24 100644 --- a/packages/myst-cli/src/build/build.ts +++ b/packages/myst-cli/src/build/build.ts @@ -26,6 +26,7 @@ type FormatBuildOpts = { typst?: boolean; xml?: boolean; md?: boolean; + ipynb?: boolean; meca?: boolean; cff?: boolean; html?: boolean; @@ -37,8 +38,8 @@ type FormatBuildOpts = { export type BuildOpts = FormatBuildOpts & CollectionOptions & RunExportOptions & StartOptions; export function hasAnyExplicitExportFormat(opts: BuildOpts): boolean { - const { docx, pdf, tex, typst, xml, md, meca, cff } = opts; - return docx || pdf || tex || typst || xml || md || meca || cff || false; + const { docx, pdf, tex, typst, xml, md, ipynb, meca, cff } = opts; + return docx || pdf || tex || typst || xml || md || ipynb || meca || cff || false; } /** @@ -50,12 +51,13 @@ export function hasAnyExplicitExportFormat(opts: BuildOpts): boolean { * @param opts.typst * @param opts.xml * @param opts.md + * @param opts.ipynb * @param opts.meca * @param opts.all all exports requested with --all option * @param opts.explicit explicit input file was provided */ export function getAllowedExportFormats(opts: FormatBuildOpts & { explicit?: boolean }) { - const { docx, pdf, tex, typst, xml, md, meca, cff, all, explicit } = opts; + const { docx, pdf, tex, typst, xml, md, ipynb, meca, cff, all, explicit } = opts; const formats = []; const any = hasAnyExplicitExportFormat(opts); const override = all || (!any && explicit); @@ -69,6 +71,7 @@ export function getAllowedExportFormats(opts: FormatBuildOpts & { explicit?: boo if (typst || override) formats.push(ExportFormats.typst); if (xml || override) formats.push(ExportFormats.xml); if (md || override) formats.push(ExportFormats.md); + if (ipynb || override) formats.push(ExportFormats.ipynb); if (meca || override) formats.push(ExportFormats.meca); if (cff || override) formats.push(ExportFormats.cff); return [...new Set(formats)]; @@ -78,7 +81,7 @@ export function getAllowedExportFormats(opts: FormatBuildOpts & { explicit?: boo * Return requested formats from CLI options */ export function getRequestedExportFormats(opts: FormatBuildOpts) { - const { docx, pdf, tex, typst, xml, md, meca, cff } = opts; + const { docx, pdf, tex, typst, xml, md, ipynb, meca, cff } = opts; const formats = []; if (docx) formats.push(ExportFormats.docx); if (pdf) formats.push(ExportFormats.pdf); @@ -86,6 +89,7 @@ export function getRequestedExportFormats(opts: FormatBuildOpts) { if (typst) formats.push(ExportFormats.typst); if (xml) formats.push(ExportFormats.xml); if (md) formats.push(ExportFormats.md); + if (ipynb) formats.push(ExportFormats.ipynb); if (meca) formats.push(ExportFormats.meca); if (cff) formats.push(ExportFormats.cff); return formats; @@ -239,7 +243,8 @@ export async function build(session: ISession, files: string[], opts: BuildOpts) // Print out the kinds that are filtered const kinds = Object.entries(opts) .filter( - ([k, v]) => ['docx', 'pdf', 'tex', 'typst', 'xml', 'md', 'meca', 'cff'].includes(k) && v, + ([k, v]) => + ['docx', 'pdf', 'tex', 'typst', 'xml', 'md', 'ipynb', 'meca', 'cff'].includes(k) && v, ) .map(([k]) => k); session.log.info( diff --git a/packages/myst-cli/src/build/ipynb/index.ts b/packages/myst-cli/src/build/ipynb/index.ts new file mode 100644 index 0000000000..27f2896a0d --- /dev/null +++ b/packages/myst-cli/src/build/ipynb/index.ts @@ -0,0 +1,51 @@ +import path from 'node:path'; +import { tic, writeFileToFolder } from 'myst-cli-utils'; +import { FRONTMATTER_ALIASES, PAGE_FRONTMATTER_KEYS } from 'myst-frontmatter'; +import { writeIpynb } from 'myst-to-ipynb'; +import { filterKeys } from 'simple-validators'; +import { VFile } from 'vfile'; +import { finalizeMdast } from '../../process/mdast.js'; +import type { ISession } from '../../session/types.js'; +import { logMessagesFromVFile } from '../../utils/logging.js'; +import { KNOWN_IMAGE_EXTENSIONS } from '../../utils/resolveExtension.js'; +import type { ExportWithOutput, ExportFnOptions } from '../types.js'; +import { cleanOutput } from '../utils/cleanOutput.js'; +import { getFileContent } from '../utils/getFileContent.js'; + +export async function runIpynbExport( + session: ISession, + sourceFile: string, + exportOptions: ExportWithOutput, + opts?: ExportFnOptions, +) { + const toc = tic(); + const { output, articles } = exportOptions; + const { clean, projectPath, extraLinkTransformers, execute } = opts ?? {}; + // At this point, export options are resolved to contain one-and-only-one article + const article = articles[0]; + if (!article?.file) return { tempFolders: [] }; + if (clean) cleanOutput(session, output); + const [{ mdast, frontmatter }] = await getFileContent(session, [article.file], { + projectPath, + imageExtensions: KNOWN_IMAGE_EXTENSIONS, + extraLinkTransformers, + preFrontmatters: [ + filterKeys(article, [...PAGE_FRONTMATTER_KEYS, ...Object.keys(FRONTMATTER_ALIASES)]), + ], + execute, + }); + await finalizeMdast(session, mdast, frontmatter, article.file, { + imageWriteFolder: path.join(path.dirname(output), 'files'), + imageAltOutputFolder: 'files/', + imageExtensions: KNOWN_IMAGE_EXTENSIONS, + simplifyFigures: false, + useExistingImages: true, + }); + const vfile = new VFile(); + vfile.path = output; + const mdOut = writeIpynb(vfile, mdast as any, frontmatter); + logMessagesFromVFile(session, mdOut); + session.log.info(toc(`📑 Exported MD in %s, copying to ${output}`)); + writeFileToFolder(output, mdOut.result as string); + return { tempFolders: [] }; +} diff --git a/packages/myst-cli/src/build/utils/collectExportOptions.ts b/packages/myst-cli/src/build/utils/collectExportOptions.ts index da0393fcd6..596d2baefe 100644 --- a/packages/myst-cli/src/build/utils/collectExportOptions.ts +++ b/packages/myst-cli/src/build/utils/collectExportOptions.ts @@ -271,6 +271,7 @@ export function resolveArticles( export const ALLOWED_EXTENSIONS: Record = { [ExportFormats.docx]: ['.doc', '.docx'], [ExportFormats.md]: ['.md'], + [ExportFormats.ipynb]: ['.ipynb'], [ExportFormats.meca]: ['.zip', '.meca'], [ExportFormats.pdf]: ['.pdf'], [ExportFormats.pdftex]: ['.pdf', '.tex', '.zip'], diff --git a/packages/myst-cli/src/build/utils/localArticleExport.ts b/packages/myst-cli/src/build/utils/localArticleExport.ts index ebdd25996c..d062c6465b 100644 --- a/packages/myst-cli/src/build/utils/localArticleExport.ts +++ b/packages/myst-cli/src/build/utils/localArticleExport.ts @@ -20,6 +20,7 @@ import { texExportOptionsFromPdf } from '../pdf/single.js'; import { createPdfGivenTexExport } from '../pdf/create.js'; import { runMecaExport } from '../meca/index.js'; import { runMdExport } from '../md/index.js'; +import { runIpynbExport } from '../ipynb/index.js'; import { selectors, watch as watchReducer } from '../../store/index.js'; import { runCffExport } from '../cff.js'; @@ -113,6 +114,8 @@ async function _localArticleExport( exportFn = runJatsExport; } else if (format === ExportFormats.md) { exportFn = runMdExport; + } else if (format === ExportFormats.ipynb) { + exportFn = runIpynbExport; } else if (format === ExportFormats.meca) { exportFn = runMecaExport; } else if (format === ExportFormats.cff) { diff --git a/packages/myst-cli/src/cli/build.ts b/packages/myst-cli/src/cli/build.ts index d2e2295b7d..720c6fd41a 100644 --- a/packages/myst-cli/src/cli/build.ts +++ b/packages/myst-cli/src/cli/build.ts @@ -23,6 +23,7 @@ import { makeCffOption, makeKeepHostOption, makePortOption, + makeIpynbOption, } from './options.js'; import { readableName } from '../utils/whiteLabelling.js'; @@ -37,6 +38,7 @@ export function makeBuildCommand() { .addOption(makeTypstOption('Build Typst outputs')) .addOption(makeDocxOption('Build Docx output')) .addOption(makeMdOption('Build MD output')) + .addOption(makeIpynbOption('Build IPYNB output')) .addOption(makeJatsOption('Build JATS xml output')) .addOption(makeMecaOptions('Build MECA zip output')) .addOption(makeCffOption('Build CFF output')) diff --git a/packages/myst-cli/src/cli/options.ts b/packages/myst-cli/src/cli/options.ts index abc45716a0..2c1e8d4a8c 100644 --- a/packages/myst-cli/src/cli/options.ts +++ b/packages/myst-cli/src/cli/options.ts @@ -29,6 +29,10 @@ export function makeMdOption(description: string) { return new Option('--md', description).default(false); } +export function makeIpynbOption(description: string) { + return new Option('--ipynb', description).default(false); +} + export function makeJatsOption(description: string) { return new Option('--jats, --xml', description).default(false); } diff --git a/packages/myst-frontmatter/src/exports/types.ts b/packages/myst-frontmatter/src/exports/types.ts index 0cd118a79d..b562abb4dd 100644 --- a/packages/myst-frontmatter/src/exports/types.ts +++ b/packages/myst-frontmatter/src/exports/types.ts @@ -8,6 +8,7 @@ export enum ExportFormats { docx = 'docx', xml = 'xml', md = 'md', + ipynb = 'ipynb', meca = 'meca', cff = 'cff', } diff --git a/packages/myst-to-ipynb/.eslintrc.cjs b/packages/myst-to-ipynb/.eslintrc.cjs new file mode 100644 index 0000000000..76787609ad --- /dev/null +++ b/packages/myst-to-ipynb/.eslintrc.cjs @@ -0,0 +1,4 @@ +module.exports = { + root: true, + extends: ['curvenote'], +}; diff --git a/packages/myst-to-ipynb/CHANGELOG.md b/packages/myst-to-ipynb/CHANGELOG.md new file mode 100644 index 0000000000..5ec5958361 --- /dev/null +++ b/packages/myst-to-ipynb/CHANGELOG.md @@ -0,0 +1 @@ +# myst-to-ipynb diff --git a/packages/myst-to-ipynb/README.md b/packages/myst-to-ipynb/README.md new file mode 100644 index 0000000000..516b66e962 --- /dev/null +++ b/packages/myst-to-ipynb/README.md @@ -0,0 +1,3 @@ +# myst-to-ipynb + +Convert a MyST AST to ipynb notebook. diff --git a/packages/myst-to-ipynb/package.json b/packages/myst-to-ipynb/package.json new file mode 100644 index 0000000000..85064ec617 --- /dev/null +++ b/packages/myst-to-ipynb/package.json @@ -0,0 +1,50 @@ +{ + "name": "myst-to-ipynb", + "version": "1.0.15", + "description": "Export from MyST mdast to ipynb", + "author": "Rowan Cockett ", + "homepage": "https://github.com/jupyter-book/mystmd/tree/main/packages/myst-to-md", + "license": "MIT", + "type": "module", + "exports": "./dist/index.js", + "types": "./dist/index.d.ts", + "files": [ + "src", + "dist" + ], + "keywords": [ + "myst-plugin", + "markdown" + ], + "publishConfig": { + "access": "public" + }, + "repository": { + "type": "git", + "url": "git+https://github.com/jupyter-book/mystmd.git" + }, + "scripts": { + "clean": "rimraf dist", + "lint": "eslint \"src/**/*.ts\" -c .eslintrc.cjs --max-warnings 1", + "lint:format": "prettier --check src/*.ts src/**/*.ts", + "test": "vitest run", + "test:watch": "vitest watch", + "build:esm": "tsc", + "build": "npm-run-all -l clean -p build:esm" + }, + "bugs": { + "url": "https://github.com/jupyter-book/mystmd/issues" + }, + "dependencies": { + "js-yaml": "^4.1.0", + "mdast-util-gfm-footnote": "^1.0.2", + "mdast-util-gfm-table": "^1.0.7", + "mdast-util-to-markdown": "^1.5.0", + "myst-common": "^1.7.6", + "myst-frontmatter": "^1.7.6", + "myst-to-md": "^1.0.15", + "unist-util-select": "^4.0.3", + "vfile": "^5.3.7", + "vfile-reporter": "^7.0.4" + } +} diff --git a/packages/myst-to-ipynb/src/index.ts b/packages/myst-to-ipynb/src/index.ts new file mode 100644 index 0000000000..4f490c7c6c --- /dev/null +++ b/packages/myst-to-ipynb/src/index.ts @@ -0,0 +1,59 @@ +import type { Root } from 'myst-spec'; +import type { Block, Code } from 'myst-spec-ext'; +import type { Plugin } from 'unified'; +import type { VFile } from 'vfile'; +import type { PageFrontmatter } from 'myst-frontmatter'; +import { writeMd } from 'myst-to-md'; +import { select } from 'unist-util-select'; + +function sourceToStringList(src: string): string[] { + const lines = src.split('\n').map((s) => `${s}\n`); + lines[lines.length - 1] = lines[lines.length - 1].trimEnd(); + return lines; +} + +export function writeIpynb(file: VFile, node: Root, frontmatter?: PageFrontmatter) { + const cells = (node.children as Block[]).map((block: Block) => { + if (block.type === 'block' && block.kind === 'notebook-code') { + const code = select('code', block) as Code; + return { + cell_type: 'code', + execution_count: null, + metadata: {}, + outputs: [], + source: sourceToStringList(code.value), + }; + } + const md = writeMd(file, { type: 'root', children: block.children as any }).result as string; + return { + cell_type: 'markdown', + metadata: {}, + source: sourceToStringList(md), + }; + }); + const ipynb = { + cells, + metadata: { + language_info: { + name: 'python', + }, + }, + nbformat: 4, + nbformat_minor: 2, + }; + file.result = JSON.stringify(ipynb, null, 2); + return file; +} + +const plugin: Plugin<[PageFrontmatter?], Root, VFile> = function (frontmatter?) { + this.Compiler = (node, file) => { + return writeIpynb(file, node, frontmatter); + }; + + return (node: Root) => { + // Preprocess + return node; + }; +}; + +export default plugin; diff --git a/packages/myst-to-ipynb/tests/basic.yml b/packages/myst-to-ipynb/tests/basic.yml new file mode 100644 index 0000000000..48c124b3c8 --- /dev/null +++ b/packages/myst-to-ipynb/tests/basic.yml @@ -0,0 +1,280 @@ +title: myst-to-md basic features +cases: + - title: styles in paragraph + mdast: + type: root + children: + - type: paragraph + children: + - type: text + value: 'Some % ' + - type: emphasis + children: + - type: text + value: markdown + - type: text + value: ' with ' + - type: strong + children: + - type: text + value: different + - type: text + value: ' ' + - type: inlineCode + value: style`s + markdown: |- + Some % *markdown* with **different** ``style`s`` + - title: headings + mdast: + type: root + children: + - type: heading + depth: 1 + children: + - type: text + value: first + - type: paragraph + children: + - type: text + value: 'Some % ' + - type: emphasis + children: + - type: text + value: markdown + - type: heading + depth: 4 + children: + - type: text + value: fourth + markdown: |- + # first + + Some % *markdown* + + #### fourth + - title: thematic break + mdast: + type: root + children: + - type: paragraph + children: + - type: text + value: Some markdown + - type: thematicBreak + - type: paragraph + children: + - type: text + value: Some more markdown + markdown: |- + Some markdown + + --- + + Some more markdown + - title: block quote + mdast: + type: root + children: + - type: blockquote + children: + - type: paragraph + children: + - type: text + value: 'Some % ' + - type: emphasis + children: + - type: text + value: markdown + markdown: |- + > Some % *markdown* + - title: unordered list + mdast: + type: root + children: + - type: list + ordered: false + children: + - type: listItem + children: + - type: paragraph + children: + - type: text + value: Some markdown + - type: listItem + children: + - type: paragraph + children: + - type: text + value: Some more markdown + markdown: |- + * Some markdown + + * Some more markdown + - title: ordered list + mdast: + type: root + children: + - type: list + ordered: true + start: 5 + children: + - type: listItem + children: + - type: paragraph + children: + - type: text + value: Some markdown + - type: listItem + children: + - type: paragraph + children: + - type: text + value: Some more markdown + markdown: |- + 5. Some markdown + + 6. Some more markdown + - title: html + mdast: + type: root + children: + - type: html + value:
*Not markdown*<\div> + markdown: |- +
*Not markdown*<\div> + - title: code - plain + mdast: + type: root + children: + - type: code + value: |- + 5+5 + print("hello world") + markdown: |- + ``` + 5+5 + print("hello world") + ``` + - title: code - nested backticks + mdast: + type: root + children: + - type: code + value: |- + 5+5 + ````{abc} + ```` + print("hello world") + markdown: |- + ````` + 5+5 + ````{abc} + ```` + print("hello world") + ````` + - title: code - with language + mdast: + type: root + children: + - type: code + lang: python + value: |- + 5+5 + print("hello world") + markdown: |- + ```python + 5+5 + print("hello world") + ``` + - title: code - with metadata + mdast: + type: root + children: + - type: code + lang: python + meta: highlight-line="2" + value: |- + 5+5 + print("hello world") + markdown: |- + ```python highlight-line="2" + 5+5 + print("hello world") + ``` + - title: definition + mdast: + type: root + children: + - type: definition + identifier: my-def + label: My-Def + url: https://example.com + title: Example + markdown: |- + [My-Def]: https://example.com "Example" + - title: break + mdast: + type: root + children: + - type: paragraph + children: + - type: text + value: Some markdown + - type: break + - type: text + value: Some more markdown + markdown: |- + Some markdown\ + Some more markdown + - title: link + mdast: + type: root + children: + - type: link + url: https://example.com + title: my link + children: + - type: text + value: 'Some % ' + - type: emphasis + children: + - type: text + value: markdown + markdown: |- + [Some % *markdown*](https://example.com "my link") + - title: image + mdast: + type: root + children: + - type: image + url: https://example.com + title: my image + alt: Some text + markdown: |- + ![Some text](https://example.com "my image") + - title: link reference + mdast: + type: root + children: + - type: linkReference + identifier: my-link + label: My-Link + children: + - type: text + value: 'Some % ' + - type: emphasis + children: + - type: text + value: markdown + markdown: |- + [Some % *markdown*][My-Link] + - title: image reference + mdast: + type: root + children: + - type: imageReference + identifier: my-image + label: My-Image + alt: Some text + markdown: |- + ![Some text][My-Image] diff --git a/packages/myst-to-ipynb/tests/example.ipynb b/packages/myst-to-ipynb/tests/example.ipynb new file mode 100644 index 0000000000..8e16dcd584 --- /dev/null +++ b/packages/myst-to-ipynb/tests/example.ipynb @@ -0,0 +1,31 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "hello\n", + "\n", + "world" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "x = 1\n", + "\n", + "hello = 2" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/packages/myst-to-ipynb/tests/run.spec.ts b/packages/myst-to-ipynb/tests/run.spec.ts new file mode 100644 index 0000000000..dc707e2b18 --- /dev/null +++ b/packages/myst-to-ipynb/tests/run.spec.ts @@ -0,0 +1,91 @@ +import { describe, expect, test } from 'vitest'; +import fs from 'node:fs'; +import path from 'node:path'; +import yaml from 'js-yaml'; +import { unified } from 'unified'; +import mystToMd from '../src'; + +type TestCase = { + title: string; + markdown: string; + mdast: Record; +}; + +type TestCases = { + title: string; + cases: TestCase[]; +}; + +const casesList: TestCases[] = fs + .readdirSync(__dirname) + .filter((file) => file.endsWith('.yml')) + .map((file) => { + const content = fs.readFileSync(path.join(__dirname, file), { encoding: 'utf-8' }); + return yaml.load(content) as TestCases; + }); + +casesList.forEach(({ title, cases }) => { + describe(title, () => { + test.each(cases.map((c): [string, TestCase] => [c.title, c]))( + '%s', + (_, { markdown, mdast }) => { + const pipe = unified().use(mystToMd); + pipe.runSync(mdast as any); + const file = pipe.stringify(mdast as any); + expect(file.result).toEqual(markdown); + }, + ); + }); +}); + +describe('myst-to-md frontmatter', () => { + test('empty frontmatter passes', () => { + const pipe = unified().use(mystToMd, {}); + const mdast = { + type: 'root', + children: [{ type: 'paragraph', children: [{ type: 'text', value: 'Hello world!' }] }], + }; + pipe.runSync(mdast as any); + const file = pipe.stringify(mdast as any); + expect(file.result).toEqual('Hello world!'); + }); + test('simple frontmatter passes', () => { + const pipe = unified().use(mystToMd, { title: 'My Title' }); + const mdast = { + type: 'root', + children: [{ type: 'paragraph', children: [{ type: 'text', value: 'Hello world!' }] }], + }; + pipe.runSync(mdast as any); + const file = pipe.stringify(mdast as any); + expect(file.result).toEqual('---\ntitle: My Title\n---\nHello world!'); + }); + test('frontmatter with licenses passes', () => { + const pipe = unified().use(mystToMd, { + title: 'My Title', + license: { + content: { + id: 'Apache-2.0', + name: 'Apache License 2.0', + url: 'https://opensource.org/licenses/Apache-2.0', + free: true, + osi: true, + }, + code: { + name: 'Creative Commons Attribution 3.0 Unported', + id: 'CC-BY-3.0', + CC: true, + url: 'https://creativecommons.org/licenses/by/3.0/', + }, + }, + }); + const mdast = { + type: 'root', + children: [{ type: 'paragraph', children: [{ type: 'text', value: 'Hello world!' }] }], + }; + pipe.runSync(mdast as any); + const file = pipe.stringify(mdast as any); + expect(file.result).toEqual( + '---\ntitle: My Title\nlicense:\n content: Apache-2.0\n code: CC-BY-3.0\n---\nHello world!', + ); + }); +}); diff --git a/packages/myst-to-ipynb/tsconfig.json b/packages/myst-to-ipynb/tsconfig.json new file mode 100644 index 0000000000..1c5c0f1c4b --- /dev/null +++ b/packages/myst-to-ipynb/tsconfig.json @@ -0,0 +1,8 @@ +{ + "extends": "../tsconfig/base.json", + "compilerOptions": { + "outDir": "dist" + }, + "include": ["."], + "exclude": ["dist", "build", "node_modules", "src/**/*.spec.ts", "tests"] +} From 3d208281758d4139f3dbd136154713d97e03901d Mon Sep 17 00:00:00 2001 From: kp992 Date: Tue, 11 Mar 2025 18:22:08 -0700 Subject: [PATCH 02/27] fix some tests and comment the failing ones to fix in upcoming commits --- packages/myst-to-ipynb/src/index.ts | 42 +- packages/myst-to-ipynb/tests/basic.yml | 478 ++++++++++++----------- packages/myst-to-ipynb/tests/run.spec.ts | 115 +++--- 3 files changed, 355 insertions(+), 280 deletions(-) diff --git a/packages/myst-to-ipynb/src/index.ts b/packages/myst-to-ipynb/src/index.ts index 4f490c7c6c..c5566de2e4 100644 --- a/packages/myst-to-ipynb/src/index.ts +++ b/packages/myst-to-ipynb/src/index.ts @@ -6,31 +6,43 @@ import type { PageFrontmatter } from 'myst-frontmatter'; import { writeMd } from 'myst-to-md'; import { select } from 'unist-util-select'; -function sourceToStringList(src: string): string[] { - const lines = src.split('\n').map((s) => `${s}\n`); - lines[lines.length - 1] = lines[lines.length - 1].trimEnd(); - return lines; +function markdownString(file: VFile, md_cells: Block[]) { + const md = writeMd(file, { type: 'root', children: md_cells }).result as string; + return { + cell_type: 'markdown', + metadata: {}, + source: md, + }; } export function writeIpynb(file: VFile, node: Root, frontmatter?: PageFrontmatter) { - const cells = (node.children as Block[]).map((block: Block) => { + const cells = []; + const md_cells: Block[] = []; + + for (const block of node.children as Block[]) { if (block.type === 'block' && block.kind === 'notebook-code') { + if (md_cells.length != 0) { + cells.push(markdownString(file, md_cells)); + md_cells.length = 0; + } const code = select('code', block) as Code; - return { + cells.push({ cell_type: 'code', execution_count: null, metadata: {}, outputs: [], - source: sourceToStringList(code.value), - }; + source: code.value, + }); + } else { + md_cells.push(block); } - const md = writeMd(file, { type: 'root', children: block.children as any }).result as string; - return { - cell_type: 'markdown', - metadata: {}, - source: sourceToStringList(md), - }; - }); + } + + if (md_cells.length != 0) { + cells.push(markdownString(file, md_cells)); + md_cells.length = 0; + } + const ipynb = { cells, metadata: { diff --git a/packages/myst-to-ipynb/tests/basic.yml b/packages/myst-to-ipynb/tests/basic.yml index 48c124b3c8..536467062f 100644 --- a/packages/myst-to-ipynb/tests/basic.yml +++ b/packages/myst-to-ipynb/tests/basic.yml @@ -1,4 +1,5 @@ -title: myst-to-md basic features +# TODO: Uncomment and make all the tests pass +title: myst-to-ipynb basic features cases: - title: styles in paragraph mdast: @@ -22,8 +23,24 @@ cases: value: ' ' - type: inlineCode value: style`s - markdown: |- - Some % *markdown* with **different** ``style`s`` + ipynb: |- + { + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": "Some % *markdown* with **different** ``style`s``" + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 + } + - title: headings mdast: type: root @@ -46,12 +63,24 @@ cases: children: - type: text value: fourth - markdown: |- - # first - - Some % *markdown* + ipynb: |- + { + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": "# first\n\nSome % *markdown*\n\n#### fourth" + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 + } - #### fourth - title: thematic break mdast: type: root @@ -65,216 +94,227 @@ cases: children: - type: text value: Some more markdown - markdown: |- - Some markdown - - --- + ipynb: |- + { + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": "Some markdown\n\n---\n\nSome more markdown" + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 + } + # - title: block quote + # mdast: + # type: root + # children: + # - type: blockquote + # children: + # - type: paragraph + # children: + # - type: text + # value: 'Some % ' + # - type: emphasis + # children: + # - type: text + # value: markdown + # ipynb: |- + # > Some % *markdown* + # - title: unordered list + # mdast: + # type: root + # children: + # - type: list + # ordered: false + # children: + # - type: listItem + # children: + # - type: paragraph + # children: + # - type: text + # value: Some markdown + # - type: listItem + # children: + # - type: paragraph + # children: + # - type: text + # value: Some more markdown + # ipynb: |- + # * Some markdown - Some more markdown - - title: block quote - mdast: - type: root - children: - - type: blockquote - children: - - type: paragraph - children: - - type: text - value: 'Some % ' - - type: emphasis - children: - - type: text - value: markdown - markdown: |- - > Some % *markdown* - - title: unordered list - mdast: - type: root - children: - - type: list - ordered: false - children: - - type: listItem - children: - - type: paragraph - children: - - type: text - value: Some markdown - - type: listItem - children: - - type: paragraph - children: - - type: text - value: Some more markdown - markdown: |- - * Some markdown - - * Some more markdown - - title: ordered list - mdast: - type: root - children: - - type: list - ordered: true - start: 5 - children: - - type: listItem - children: - - type: paragraph - children: - - type: text - value: Some markdown - - type: listItem - children: - - type: paragraph - children: - - type: text - value: Some more markdown - markdown: |- - 5. Some markdown + # * Some more markdown + # - title: ordered list + # mdast: + # type: root + # children: + # - type: list + # ordered: true + # start: 5 + # children: + # - type: listItem + # children: + # - type: paragraph + # children: + # - type: text + # value: Some markdown + # - type: listItem + # children: + # - type: paragraph + # children: + # - type: text + # value: Some more markdown + # ipynb: |- + # 5. Some markdown - 6. Some more markdown - - title: html - mdast: - type: root - children: - - type: html - value:
*Not markdown*<\div> - markdown: |- -
*Not markdown*<\div> - - title: code - plain - mdast: - type: root - children: - - type: code - value: |- - 5+5 - print("hello world") - markdown: |- - ``` - 5+5 - print("hello world") - ``` - - title: code - nested backticks - mdast: - type: root - children: - - type: code - value: |- - 5+5 - ````{abc} - ```` - print("hello world") - markdown: |- - ````` - 5+5 - ````{abc} - ```` - print("hello world") - ````` - - title: code - with language - mdast: - type: root - children: - - type: code - lang: python - value: |- - 5+5 - print("hello world") - markdown: |- - ```python - 5+5 - print("hello world") - ``` - - title: code - with metadata - mdast: - type: root - children: - - type: code - lang: python - meta: highlight-line="2" - value: |- - 5+5 - print("hello world") - markdown: |- - ```python highlight-line="2" - 5+5 - print("hello world") - ``` - - title: definition - mdast: - type: root - children: - - type: definition - identifier: my-def - label: My-Def - url: https://example.com - title: Example - markdown: |- - [My-Def]: https://example.com "Example" - - title: break - mdast: - type: root - children: - - type: paragraph - children: - - type: text - value: Some markdown - - type: break - - type: text - value: Some more markdown - markdown: |- - Some markdown\ - Some more markdown - - title: link - mdast: - type: root - children: - - type: link - url: https://example.com - title: my link - children: - - type: text - value: 'Some % ' - - type: emphasis - children: - - type: text - value: markdown - markdown: |- - [Some % *markdown*](https://example.com "my link") - - title: image - mdast: - type: root - children: - - type: image - url: https://example.com - title: my image - alt: Some text - markdown: |- - ![Some text](https://example.com "my image") - - title: link reference - mdast: - type: root - children: - - type: linkReference - identifier: my-link - label: My-Link - children: - - type: text - value: 'Some % ' - - type: emphasis - children: - - type: text - value: markdown - markdown: |- - [Some % *markdown*][My-Link] - - title: image reference - mdast: - type: root - children: - - type: imageReference - identifier: my-image - label: My-Image - alt: Some text - markdown: |- - ![Some text][My-Image] + # 6. Some more markdown + # - title: html + # mdast: + # type: root + # children: + # - type: html + # value:
*Not markdown*<\div> + # ipynb: |- + #
*Not markdown*<\div> + # - title: code - plain + # mdast: + # type: root + # children: + # - type: code + # value: |- + # 5+5 + # print("hello world") + # ipynb: |- + # ``` + # 5+5 + # print("hello world") + # ``` + # - title: code - nested backticks + # mdast: + # type: root + # children: + # - type: code + # value: |- + # 5+5 + # ````{abc} + # ```` + # print("hello world") + # ipynb: |- + # ````` + # 5+5 + # ````{abc} + # ```` + # print("hello world") + # ````` + # - title: code - with language + # mdast: + # type: root + # children: + # - type: code + # lang: python + # value: |- + # 5+5 + # print("hello world") + # ipynb: |- + # ```python + # 5+5 + # print("hello world") + # ``` + # - title: code - with metadata + # mdast: + # type: root + # children: + # - type: code + # lang: python + # meta: highlight-line="2" + # value: |- + # 5+5 + # print("hello world") + # ipynb: |- + # ```python highlight-line="2" + # 5+5 + # print("hello world") + # ``` + # - title: definition + # mdast: + # type: root + # children: + # - type: definition + # identifier: my-def + # label: My-Def + # url: https://example.com + # title: Example + # ipynb: |- + # [My-Def]: https://example.com "Example" + # - title: break + # mdast: + # type: root + # children: + # - type: paragraph + # children: + # - type: text + # value: Some markdown + # - type: break + # - type: text + # value: Some more markdown + # ipynb: |- + # Some markdown\ + # Some more markdown + # - title: link + # mdast: + # type: root + # children: + # - type: link + # url: https://example.com + # title: my link + # children: + # - type: text + # value: 'Some % ' + # - type: emphasis + # children: + # - type: text + # value: markdown + # ipynb: |- + # [Some % *markdown*](https://example.com "my link") + # - title: image + # mdast: + # type: root + # children: + # - type: image + # url: https://example.com + # title: my image + # alt: Some text + # ipynb: |- + # ![Some text](https://example.com "my image") + # - title: link reference + # mdast: + # type: root + # children: + # - type: linkReference + # identifier: my-link + # label: My-Link + # children: + # - type: text + # value: 'Some % ' + # - type: emphasis + # children: + # - type: text + # value: markdown + # ipynb: |- + # [Some % *markdown*][My-Link] + # - title: image reference + # mdast: + # type: root + # children: + # - type: imageReference + # identifier: my-image + # label: My-Image + # alt: Some text + # ipynb: |- + # ![Some text][My-Image] diff --git a/packages/myst-to-ipynb/tests/run.spec.ts b/packages/myst-to-ipynb/tests/run.spec.ts index dc707e2b18..cd1aba873a 100644 --- a/packages/myst-to-ipynb/tests/run.spec.ts +++ b/packages/myst-to-ipynb/tests/run.spec.ts @@ -3,11 +3,11 @@ import fs from 'node:fs'; import path from 'node:path'; import yaml from 'js-yaml'; import { unified } from 'unified'; -import mystToMd from '../src'; +import writeIpynb from '../src'; type TestCase = { title: string; - markdown: string; + ipynb: string; mdast: Record; }; @@ -28,64 +28,87 @@ casesList.forEach(({ title, cases }) => { describe(title, () => { test.each(cases.map((c): [string, TestCase] => [c.title, c]))( '%s', - (_, { markdown, mdast }) => { - const pipe = unified().use(mystToMd); + (_, { ipynb, mdast }) => { + const pipe = unified().use(writeIpynb); pipe.runSync(mdast as any); const file = pipe.stringify(mdast as any); - expect(file.result).toEqual(markdown); + console.log(file.result); + expect(file.result).toEqual(ipynb); }, ); }); }); -describe('myst-to-md frontmatter', () => { +describe('myst-to-ipynb frontmatter', () => { test('empty frontmatter passes', () => { - const pipe = unified().use(mystToMd, {}); + const pipe = unified().use(writeIpynb, {}); const mdast = { type: 'root', children: [{ type: 'paragraph', children: [{ type: 'text', value: 'Hello world!' }] }], }; pipe.runSync(mdast as any); const file = pipe.stringify(mdast as any); - expect(file.result).toEqual('Hello world!'); - }); - test('simple frontmatter passes', () => { - const pipe = unified().use(mystToMd, { title: 'My Title' }); - const mdast = { - type: 'root', - children: [{ type: 'paragraph', children: [{ type: 'text', value: 'Hello world!' }] }], - }; - pipe.runSync(mdast as any); - const file = pipe.stringify(mdast as any); - expect(file.result).toEqual('---\ntitle: My Title\n---\nHello world!'); - }); - test('frontmatter with licenses passes', () => { - const pipe = unified().use(mystToMd, { - title: 'My Title', - license: { - content: { - id: 'Apache-2.0', - name: 'Apache License 2.0', - url: 'https://opensource.org/licenses/Apache-2.0', - free: true, - osi: true, - }, - code: { - name: 'Creative Commons Attribution 3.0 Unported', - id: 'CC-BY-3.0', - CC: true, - url: 'https://creativecommons.org/licenses/by/3.0/', - }, - }, - }); - const mdast = { - type: 'root', - children: [{ type: 'paragraph', children: [{ type: 'text', value: 'Hello world!' }] }], - }; - pipe.runSync(mdast as any); - const file = pipe.stringify(mdast as any); - expect(file.result).toEqual( - '---\ntitle: My Title\nlicense:\n content: Apache-2.0\n code: CC-BY-3.0\n---\nHello world!', + console.log(file.result); + expect(file.result).toEqual(`{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": "Hello world!" + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}` ); }); + + + // test('simple frontmatter passes', () => { + // const pipe = unified().use(writeIpynb, { title: 'My Title' }); + // const mdast = { + // type: 'root', + // children: [{ type: 'paragraph', children: [{ type: 'text', value: 'Hello world!' }] }], + // }; + // pipe.runSync(mdast as any); + // const file = pipe.stringify(mdast as any); + // console.log(file.result); + // expect(file.result).toEqual('---\ntitle: My Title\n---\nHello world!'); + // }); + + + // test('frontmatter with licenses passes', () => { + // const pipe = unified().use(writeIpynb, { + // title: 'My Title', + // license: { + // content: { + // id: 'Apache-2.0', + // name: 'Apache License 2.0', + // url: 'https://opensource.org/licenses/Apache-2.0', + // free: true, + // osi: true, + // }, + // code: { + // name: 'Creative Commons Attribution 3.0 Unported', + // id: 'CC-BY-3.0', + // CC: true, + // url: 'https://creativecommons.org/licenses/by/3.0/', + // }, + // }, + // }); + // const mdast = { + // type: 'root', + // children: [{ type: 'paragraph', children: [{ type: 'text', value: 'Hello world!' }] }], + // }; + // pipe.runSync(mdast as any); + // const file = pipe.stringify(mdast as any); + // expect(file.result).toEqual( + // '---\ntitle: My Title\nlicense:\n content: Apache-2.0\n code: CC-BY-3.0\n---\nHello world!', + // ); + // }); }); From 09f1a541b3443424aac5414da22cb9c572cc24b1 Mon Sep 17 00:00:00 2001 From: kp992 Date: Thu, 13 Mar 2025 20:14:18 -0700 Subject: [PATCH 03/27] fix tests --- packages/myst-to-ipynb/tests/basic.yml | 607 +++++++++++++++-------- packages/myst-to-ipynb/tests/run.spec.ts | 45 -- 2 files changed, 399 insertions(+), 253 deletions(-) diff --git a/packages/myst-to-ipynb/tests/basic.yml b/packages/myst-to-ipynb/tests/basic.yml index 536467062f..853f6ad34f 100644 --- a/packages/myst-to-ipynb/tests/basic.yml +++ b/packages/myst-to-ipynb/tests/basic.yml @@ -1,4 +1,3 @@ -# TODO: Uncomment and make all the tests pass title: myst-to-ipynb basic features cases: - title: styles in paragraph @@ -111,210 +110,402 @@ cases: "nbformat": 4, "nbformat_minor": 2 } - # - title: block quote - # mdast: - # type: root - # children: - # - type: blockquote - # children: - # - type: paragraph - # children: - # - type: text - # value: 'Some % ' - # - type: emphasis - # children: - # - type: text - # value: markdown - # ipynb: |- - # > Some % *markdown* - # - title: unordered list - # mdast: - # type: root - # children: - # - type: list - # ordered: false - # children: - # - type: listItem - # children: - # - type: paragraph - # children: - # - type: text - # value: Some markdown - # - type: listItem - # children: - # - type: paragraph - # children: - # - type: text - # value: Some more markdown - # ipynb: |- - # * Some markdown - - # * Some more markdown - # - title: ordered list - # mdast: - # type: root - # children: - # - type: list - # ordered: true - # start: 5 - # children: - # - type: listItem - # children: - # - type: paragraph - # children: - # - type: text - # value: Some markdown - # - type: listItem - # children: - # - type: paragraph - # children: - # - type: text - # value: Some more markdown - # ipynb: |- - # 5. Some markdown - - # 6. Some more markdown - # - title: html - # mdast: - # type: root - # children: - # - type: html - # value:
*Not markdown*<\div> - # ipynb: |- - #
*Not markdown*<\div> - # - title: code - plain - # mdast: - # type: root - # children: - # - type: code - # value: |- - # 5+5 - # print("hello world") - # ipynb: |- - # ``` - # 5+5 - # print("hello world") - # ``` - # - title: code - nested backticks - # mdast: - # type: root - # children: - # - type: code - # value: |- - # 5+5 - # ````{abc} - # ```` - # print("hello world") - # ipynb: |- - # ````` - # 5+5 - # ````{abc} - # ```` - # print("hello world") - # ````` - # - title: code - with language - # mdast: - # type: root - # children: - # - type: code - # lang: python - # value: |- - # 5+5 - # print("hello world") - # ipynb: |- - # ```python - # 5+5 - # print("hello world") - # ``` - # - title: code - with metadata - # mdast: - # type: root - # children: - # - type: code - # lang: python - # meta: highlight-line="2" - # value: |- - # 5+5 - # print("hello world") - # ipynb: |- - # ```python highlight-line="2" - # 5+5 - # print("hello world") - # ``` - # - title: definition - # mdast: - # type: root - # children: - # - type: definition - # identifier: my-def - # label: My-Def - # url: https://example.com - # title: Example - # ipynb: |- - # [My-Def]: https://example.com "Example" - # - title: break - # mdast: - # type: root - # children: - # - type: paragraph - # children: - # - type: text - # value: Some markdown - # - type: break - # - type: text - # value: Some more markdown - # ipynb: |- - # Some markdown\ - # Some more markdown - # - title: link - # mdast: - # type: root - # children: - # - type: link - # url: https://example.com - # title: my link - # children: - # - type: text - # value: 'Some % ' - # - type: emphasis - # children: - # - type: text - # value: markdown - # ipynb: |- - # [Some % *markdown*](https://example.com "my link") - # - title: image - # mdast: - # type: root - # children: - # - type: image - # url: https://example.com - # title: my image - # alt: Some text - # ipynb: |- - # ![Some text](https://example.com "my image") - # - title: link reference - # mdast: - # type: root - # children: - # - type: linkReference - # identifier: my-link - # label: My-Link - # children: - # - type: text - # value: 'Some % ' - # - type: emphasis - # children: - # - type: text - # value: markdown - # ipynb: |- - # [Some % *markdown*][My-Link] - # - title: image reference - # mdast: - # type: root - # children: - # - type: imageReference - # identifier: my-image - # label: My-Image - # alt: Some text - # ipynb: |- - # ![Some text][My-Image] + - title: block quote + mdast: + type: root + children: + - type: blockquote + children: + - type: paragraph + children: + - type: text + value: 'Some % ' + - type: emphasis + children: + - type: text + value: markdown + ipynb: |- + { + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": "> Some % *markdown*" + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 + } + - title: unordered list + mdast: + type: root + children: + - type: list + ordered: false + children: + - type: listItem + children: + - type: paragraph + children: + - type: text + value: Some markdown + - type: listItem + children: + - type: paragraph + children: + - type: text + value: Some more markdown + ipynb: |- + { + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": "* Some markdown\n\n* Some more markdown" + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 + } + - title: ordered list + mdast: + type: root + children: + - type: list + ordered: true + start: 5 + children: + - type: listItem + children: + - type: paragraph + children: + - type: text + value: Some markdown + - type: listItem + children: + - type: paragraph + children: + - type: text + value: Some more markdown + ipynb: |- + { + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": "5. Some markdown\n\n6. Some more markdown" + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 + } + - title: html + mdast: + type: root + children: + - type: html + value:
*Not markdown*
+ ipynb: |- + { + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": "
*Not markdown*
" + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 + } + - title: code - plain + mdast: + type: root + children: + - type: code + value: |- + 5+5 + print("hello world") + ipynb: |- + { + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": "```\n5+5\nprint(\"hello world\")\n```" + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 + } + - title: code - nested backticks + mdast: + type: root + children: + - type: code + value: |- + 5+5 + ````{abc} + ```` + print("hello world") + ipynb: |- + { + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": "`````\n5+5\n````{abc}\n````\nprint(\"hello world\")\n`````" + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 + } + - title: code - with language + mdast: + type: root + children: + - type: block + kind: notebook-code + data: + id: nb-cell-0 + identifier: nb-cell-0 + label: nb-cell-0 + html_id: nb-cell-0 + children: + - type: code + lang: python + executable: true + value: print('abc') + identifier: nb-cell-0-code + enumerator: 1 + html_id: nb-cell-0-code + - type: output + id: T7FMDqDm8dM2bOT1tKeeM + identifier: nb-cell-0-output + html_id: nb-cell-0-output + - type: code + lang: python + value: |- + 5+5 + print("hello world") + ipynb: |- + { + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": "print('abc')" + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": "```python\n5+5\nprint(\"hello world\")\n```" + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 + } + - title: code - with metadata + mdast: + type: root + children: + - type: code + lang: python + meta: highlight-line="2" + value: |- + 5+5 + print("hello world") + ipynb: |- + { + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": "```python highlight-line=\"2\"\n5+5\nprint(\"hello world\")\n```" + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 + } + - title: definition + mdast: + type: root + children: + - type: definition + identifier: my-def + label: My-Def + url: https://example.com + title: Example + ipynb: |- + { + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": "[My-Def]: https://example.com \"Example\"" + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 + } + - title: break + mdast: + type: root + children: + - type: paragraph + children: + - type: text + value: Some markdown + - type: break + - type: text + value: Some more markdown + ipynb: |- + { + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": "Some markdown\\\nSome more markdown" + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 + } + - title: link + mdast: + type: root + children: + - type: link + url: https://example.com + title: my link + children: + - type: text + value: 'Some % ' + - type: emphasis + children: + - type: text + value: markdown + ipynb: |- + { + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": "[Some % *markdown*](https://example.com \"my link\")" + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 + } + - title: link reference + mdast: + type: root + children: + - type: linkReference + identifier: my-link + label: My-Link + children: + - type: text + value: 'Some % ' + - type: emphasis + children: + - type: text + value: markdown + ipynb: |- + { + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": "[Some % *markdown*][My-Link]" + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 + } + - title: image reference + mdast: + type: root + children: + - type: imageReference + identifier: my-image + label: My-Image + alt: Some text + ipynb: |- + { + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": "![Some text][My-Image]" + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 + } diff --git a/packages/myst-to-ipynb/tests/run.spec.ts b/packages/myst-to-ipynb/tests/run.spec.ts index cd1aba873a..17660cf0f2 100644 --- a/packages/myst-to-ipynb/tests/run.spec.ts +++ b/packages/myst-to-ipynb/tests/run.spec.ts @@ -48,7 +48,6 @@ describe('myst-to-ipynb frontmatter', () => { }; pipe.runSync(mdast as any); const file = pipe.stringify(mdast as any); - console.log(file.result); expect(file.result).toEqual(`{ "cells": [ { @@ -67,48 +66,4 @@ describe('myst-to-ipynb frontmatter', () => { }` ); }); - - - // test('simple frontmatter passes', () => { - // const pipe = unified().use(writeIpynb, { title: 'My Title' }); - // const mdast = { - // type: 'root', - // children: [{ type: 'paragraph', children: [{ type: 'text', value: 'Hello world!' }] }], - // }; - // pipe.runSync(mdast as any); - // const file = pipe.stringify(mdast as any); - // console.log(file.result); - // expect(file.result).toEqual('---\ntitle: My Title\n---\nHello world!'); - // }); - - - // test('frontmatter with licenses passes', () => { - // const pipe = unified().use(writeIpynb, { - // title: 'My Title', - // license: { - // content: { - // id: 'Apache-2.0', - // name: 'Apache License 2.0', - // url: 'https://opensource.org/licenses/Apache-2.0', - // free: true, - // osi: true, - // }, - // code: { - // name: 'Creative Commons Attribution 3.0 Unported', - // id: 'CC-BY-3.0', - // CC: true, - // url: 'https://creativecommons.org/licenses/by/3.0/', - // }, - // }, - // }); - // const mdast = { - // type: 'root', - // children: [{ type: 'paragraph', children: [{ type: 'text', value: 'Hello world!' }] }], - // }; - // pipe.runSync(mdast as any); - // const file = pipe.stringify(mdast as any); - // expect(file.result).toEqual( - // '---\ntitle: My Title\nlicense:\n content: Apache-2.0\n code: CC-BY-3.0\n---\nHello world!', - // ); - // }); }); From ab8f6ef955a2e32bf0d211d95341ae87a4880e29 Mon Sep 17 00:00:00 2001 From: kp992 Date: Thu, 13 Mar 2025 20:16:50 -0700 Subject: [PATCH 04/27] fix failure in myst-cli --- packages/myst-cli/src/build/build.spec.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/myst-cli/src/build/build.spec.ts b/packages/myst-cli/src/build/build.spec.ts index d3151a0812..7ce7bdd3c7 100644 --- a/packages/myst-cli/src/build/build.spec.ts +++ b/packages/myst-cli/src/build/build.spec.ts @@ -36,6 +36,7 @@ describe('get export formats', () => { ExportFormats.tex, ExportFormats.xml, ExportFormats.md, + ExportFormats.ipynb, ExportFormats.meca, ExportFormats.cff, ]); From 137f9a879b56225b67b97362c87b9bc8e336d3cb Mon Sep 17 00:00:00 2001 From: kp992 Date: Wed, 2 Apr 2025 18:27:45 -0700 Subject: [PATCH 05/27] update the tests and keep the split lines logic --- packages/myst-to-ipynb/src/index.ts | 10 ++- packages/myst-to-ipynb/tests/basic.yml | 99 +++++++++++++++++++----- packages/myst-to-ipynb/tests/run.spec.ts | 4 +- 3 files changed, 91 insertions(+), 22 deletions(-) diff --git a/packages/myst-to-ipynb/src/index.ts b/packages/myst-to-ipynb/src/index.ts index c5566de2e4..28795c3575 100644 --- a/packages/myst-to-ipynb/src/index.ts +++ b/packages/myst-to-ipynb/src/index.ts @@ -6,12 +6,18 @@ import type { PageFrontmatter } from 'myst-frontmatter'; import { writeMd } from 'myst-to-md'; import { select } from 'unist-util-select'; +function sourceToStringList(src: string): string[] { + const lines = src.split('\n').map((s) => `${s}\n`); + lines[lines.length - 1] = lines[lines.length - 1].trimEnd(); + return lines; +} + function markdownString(file: VFile, md_cells: Block[]) { const md = writeMd(file, { type: 'root', children: md_cells }).result as string; return { cell_type: 'markdown', metadata: {}, - source: md, + source: sourceToStringList(md), }; } @@ -31,7 +37,7 @@ export function writeIpynb(file: VFile, node: Root, frontmatter?: PageFrontmatte execution_count: null, metadata: {}, outputs: [], - source: code.value, + source: sourceToStringList(code.value), }); } else { md_cells.push(block); diff --git a/packages/myst-to-ipynb/tests/basic.yml b/packages/myst-to-ipynb/tests/basic.yml index 853f6ad34f..353e18b77c 100644 --- a/packages/myst-to-ipynb/tests/basic.yml +++ b/packages/myst-to-ipynb/tests/basic.yml @@ -28,7 +28,9 @@ cases: { "cell_type": "markdown", "metadata": {}, - "source": "Some % *markdown* with **different** ``style`s``" + "source": [ + "Some % *markdown* with **different** ``style`s``" + ] } ], "metadata": { @@ -68,7 +70,13 @@ cases: { "cell_type": "markdown", "metadata": {}, - "source": "# first\n\nSome % *markdown*\n\n#### fourth" + "source": [ + "# first\n", + "\n", + "Some % *markdown*\n", + "\n", + "#### fourth" + ] } ], "metadata": { @@ -99,7 +107,13 @@ cases: { "cell_type": "markdown", "metadata": {}, - "source": "Some markdown\n\n---\n\nSome more markdown" + "source": [ + "Some markdown\n", + "\n", + "---\n", + "\n", + "Some more markdown" + ] } ], "metadata": { @@ -130,7 +144,9 @@ cases: { "cell_type": "markdown", "metadata": {}, - "source": "> Some % *markdown*" + "source": [ + "> Some % *markdown*" + ] } ], "metadata": { @@ -166,7 +182,11 @@ cases: { "cell_type": "markdown", "metadata": {}, - "source": "* Some markdown\n\n* Some more markdown" + "source": [ + "* Some markdown\n", + "\n", + "* Some more markdown" + ] } ], "metadata": { @@ -203,7 +223,11 @@ cases: { "cell_type": "markdown", "metadata": {}, - "source": "5. Some markdown\n\n6. Some more markdown" + "source": [ + "5. Some markdown\n", + "\n", + "6. Some more markdown" + ] } ], "metadata": { @@ -226,7 +250,9 @@ cases: { "cell_type": "markdown", "metadata": {}, - "source": "
*Not markdown*
" + "source": [ + "
*Not markdown*
" + ] } ], "metadata": { @@ -244,14 +270,19 @@ cases: - type: code value: |- 5+5 - print("hello world") + print("hello world\n") ipynb: |- { "cells": [ { "cell_type": "markdown", "metadata": {}, - "source": "```\n5+5\nprint(\"hello world\")\n```" + "source": [ + "```\n", + "5+5\n", + "print(\"hello world\\n\")\n", + "```" + ] } ], "metadata": { @@ -278,7 +309,14 @@ cases: { "cell_type": "markdown", "metadata": {}, - "source": "`````\n5+5\n````{abc}\n````\nprint(\"hello world\")\n`````" + "source": [ + "`````\n", + "5+5\n", + "````{abc}\n", + "````\n", + "print(\"hello world\")\n", + "`````" + ] } ], "metadata": { @@ -304,7 +342,7 @@ cases: - type: code lang: python executable: true - value: print('abc') + value: print('abc\n') identifier: nb-cell-0-code enumerator: 1 html_id: nb-cell-0-code @@ -325,12 +363,19 @@ cases: "execution_count": null, "metadata": {}, "outputs": [], - "source": "print('abc')" + "source": [ + "print('abc\\n')" + ] }, { "cell_type": "markdown", "metadata": {}, - "source": "```python\n5+5\nprint(\"hello world\")\n```" + "source": [ + "```python\n", + "5+5\n", + "print(\"hello world\")\n", + "```" + ] } ], "metadata": { @@ -357,7 +402,12 @@ cases: { "cell_type": "markdown", "metadata": {}, - "source": "```python highlight-line=\"2\"\n5+5\nprint(\"hello world\")\n```" + "source": [ + "```python highlight-line=\"2\"\n", + "5+5\n", + "print(\"hello world\")\n", + "```" + ] } ], "metadata": { @@ -383,7 +433,9 @@ cases: { "cell_type": "markdown", "metadata": {}, - "source": "[My-Def]: https://example.com \"Example\"" + "source": [ + "[My-Def]: https://example.com \"Example\"" + ] } ], "metadata": { @@ -411,7 +463,10 @@ cases: { "cell_type": "markdown", "metadata": {}, - "source": "Some markdown\\\nSome more markdown" + "source": [ + "Some markdown\\\n", + "Some more markdown" + ] } ], "metadata": { @@ -442,7 +497,9 @@ cases: { "cell_type": "markdown", "metadata": {}, - "source": "[Some % *markdown*](https://example.com \"my link\")" + "source": [ + "[Some % *markdown*](https://example.com \"my link\")" + ] } ], "metadata": { @@ -473,7 +530,9 @@ cases: { "cell_type": "markdown", "metadata": {}, - "source": "[Some % *markdown*][My-Link]" + "source": [ + "[Some % *markdown*][My-Link]" + ] } ], "metadata": { @@ -498,7 +557,9 @@ cases: { "cell_type": "markdown", "metadata": {}, - "source": "![Some text][My-Image]" + "source": [ + "![Some text][My-Image]" + ] } ], "metadata": { diff --git a/packages/myst-to-ipynb/tests/run.spec.ts b/packages/myst-to-ipynb/tests/run.spec.ts index 17660cf0f2..7d6c2e9edc 100644 --- a/packages/myst-to-ipynb/tests/run.spec.ts +++ b/packages/myst-to-ipynb/tests/run.spec.ts @@ -53,7 +53,9 @@ describe('myst-to-ipynb frontmatter', () => { { "cell_type": "markdown", "metadata": {}, - "source": "Hello world!" + "source": [ + "Hello world!" + ] } ], "metadata": { From d889e7a06ac1b271c53b743e00b054818bd25085 Mon Sep 17 00:00:00 2001 From: kp992 Date: Thu, 24 Apr 2025 18:21:41 -0700 Subject: [PATCH 06/27] Revert merging md blocks and update 2 sample test cases --- packages/myst-to-ipynb/src/index.ts | 38 +- packages/myst-to-ipynb/tests/basic.yml | 1036 +++++++++++----------- packages/myst-to-ipynb/tests/run.spec.ts | 5 +- 3 files changed, 525 insertions(+), 554 deletions(-) diff --git a/packages/myst-to-ipynb/src/index.ts b/packages/myst-to-ipynb/src/index.ts index 28795c3575..4593d951c4 100644 --- a/packages/myst-to-ipynb/src/index.ts +++ b/packages/myst-to-ipynb/src/index.ts @@ -12,42 +12,25 @@ function sourceToStringList(src: string): string[] { return lines; } -function markdownString(file: VFile, md_cells: Block[]) { - const md = writeMd(file, { type: 'root', children: md_cells }).result as string; - return { - cell_type: 'markdown', - metadata: {}, - source: sourceToStringList(md), - }; -} - export function writeIpynb(file: VFile, node: Root, frontmatter?: PageFrontmatter) { - const cells = []; - const md_cells: Block[] = []; - - for (const block of node.children as Block[]) { + const cells = (node.children as Block[]).map((block: Block) => { if (block.type === 'block' && block.kind === 'notebook-code') { - if (md_cells.length != 0) { - cells.push(markdownString(file, md_cells)); - md_cells.length = 0; - } const code = select('code', block) as Code; - cells.push({ + return { cell_type: 'code', execution_count: null, metadata: {}, outputs: [], source: sourceToStringList(code.value), - }); - } else { - md_cells.push(block); + }; } - } - - if (md_cells.length != 0) { - cells.push(markdownString(file, md_cells)); - md_cells.length = 0; - } + const md = writeMd(file, { type: 'root', children: [block] }).result as string; + return { + cell_type: 'markdown', + metadata: {}, + source: sourceToStringList(md), + }; + }); const ipynb = { cells, @@ -59,6 +42,7 @@ export function writeIpynb(file: VFile, node: Root, frontmatter?: PageFrontmatte nbformat: 4, nbformat_minor: 2, }; + file.result = JSON.stringify(ipynb, null, 2); return file; } diff --git a/packages/myst-to-ipynb/tests/basic.yml b/packages/myst-to-ipynb/tests/basic.yml index 353e18b77c..a0080e27c0 100644 --- a/packages/myst-to-ipynb/tests/basic.yml +++ b/packages/myst-to-ipynb/tests/basic.yml @@ -22,25 +22,17 @@ cases: value: ' ' - type: inlineCode value: style`s - ipynb: |- - { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Some % *markdown* with **different** ``style`s``" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 2 - } + ipynb: + cells: + - cell_type: "markdown" + metadata: {} + source: + - "Some % *markdown* with **different** ``style`s``" + metadata: + language_info: + name: "python" + nbformat: 4 + nbformat_minor: 2 - title: headings mdast: @@ -64,509 +56,505 @@ cases: children: - type: text value: fourth - ipynb: |- - { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# first\n", - "\n", - "Some % *markdown*\n", - "\n", - "#### fourth" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 2 - } + ipynb: + cells: + - cell_type: "markdown" + metadata: {} + source: + - "# first" + - cell_type: "markdown" + metadata: {} + source: + - "Some % *markdown*" + - cell_type: "markdown" + metadata: {} + source: + - "#### fourth" + metadata: + language_info: + name: "python" + nbformat: 4 + nbformat_minor: 2 - - title: thematic break - mdast: - type: root - children: - - type: paragraph - children: - - type: text - value: Some markdown - - type: thematicBreak - - type: paragraph - children: - - type: text - value: Some more markdown - ipynb: |- - { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Some markdown\n", - "\n", - "---\n", - "\n", - "Some more markdown" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 2 - } - - title: block quote - mdast: - type: root - children: - - type: blockquote - children: - - type: paragraph - children: - - type: text - value: 'Some % ' - - type: emphasis - children: - - type: text - value: markdown - ipynb: |- - { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "> Some % *markdown*" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 2 - } - - title: unordered list - mdast: - type: root - children: - - type: list - ordered: false - children: - - type: listItem - children: - - type: paragraph - children: - - type: text - value: Some markdown - - type: listItem - children: - - type: paragraph - children: - - type: text - value: Some more markdown - ipynb: |- - { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "* Some markdown\n", - "\n", - "* Some more markdown" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 2 - } - - title: ordered list - mdast: - type: root - children: - - type: list - ordered: true - start: 5 - children: - - type: listItem - children: - - type: paragraph - children: - - type: text - value: Some markdown - - type: listItem - children: - - type: paragraph - children: - - type: text - value: Some more markdown - ipynb: |- - { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "5. Some markdown\n", - "\n", - "6. Some more markdown" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 2 - } - - title: html - mdast: - type: root - children: - - type: html - value:
*Not markdown*
- ipynb: |- - { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "
*Not markdown*
" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 2 - } - - title: code - plain - mdast: - type: root - children: - - type: code - value: |- - 5+5 - print("hello world\n") - ipynb: |- - { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "```\n", - "5+5\n", - "print(\"hello world\\n\")\n", - "```" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 2 - } - - title: code - nested backticks - mdast: - type: root - children: - - type: code - value: |- - 5+5 - ````{abc} - ```` - print("hello world") - ipynb: |- - { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "`````\n", - "5+5\n", - "````{abc}\n", - "````\n", - "print(\"hello world\")\n", - "`````" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 2 - } - - title: code - with language - mdast: - type: root - children: - - type: block - kind: notebook-code - data: - id: nb-cell-0 - identifier: nb-cell-0 - label: nb-cell-0 - html_id: nb-cell-0 - children: - - type: code - lang: python - executable: true - value: print('abc\n') - identifier: nb-cell-0-code - enumerator: 1 - html_id: nb-cell-0-code - - type: output - id: T7FMDqDm8dM2bOT1tKeeM - identifier: nb-cell-0-output - html_id: nb-cell-0-output - - type: code - lang: python - value: |- - 5+5 - print("hello world") - ipynb: |- - { - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('abc\\n')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "```python\n", - "5+5\n", - "print(\"hello world\")\n", - "```" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 2 - } - - title: code - with metadata - mdast: - type: root - children: - - type: code - lang: python - meta: highlight-line="2" - value: |- - 5+5 - print("hello world") - ipynb: |- - { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "```python highlight-line=\"2\"\n", - "5+5\n", - "print(\"hello world\")\n", - "```" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 2 - } - - title: definition - mdast: - type: root - children: - - type: definition - identifier: my-def - label: My-Def - url: https://example.com - title: Example - ipynb: |- - { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "[My-Def]: https://example.com \"Example\"" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 2 - } - - title: break - mdast: - type: root - children: - - type: paragraph - children: - - type: text - value: Some markdown - - type: break - - type: text - value: Some more markdown - ipynb: |- - { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Some markdown\\\n", - "Some more markdown" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 2 - } - - title: link - mdast: - type: root - children: - - type: link - url: https://example.com - title: my link - children: - - type: text - value: 'Some % ' - - type: emphasis - children: - - type: text - value: markdown - ipynb: |- - { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "[Some % *markdown*](https://example.com \"my link\")" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 2 - } - - title: link reference - mdast: - type: root - children: - - type: linkReference - identifier: my-link - label: My-Link - children: - - type: text - value: 'Some % ' - - type: emphasis - children: - - type: text - value: markdown - ipynb: |- - { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "[Some % *markdown*][My-Link]" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 2 - } - - title: image reference - mdast: - type: root - children: - - type: imageReference - identifier: my-image - label: My-Image - alt: Some text - ipynb: |- - { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "![Some text][My-Image]" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 2 - } + # - title: thematic break + # mdast: + # type: root + # children: + # - type: paragraph + # children: + # - type: text + # value: Some markdown + # - type: thematicBreak + # - type: paragraph + # children: + # - type: text + # value: Some more markdown + # ipynb: |- + # { + # "cells": [ + # { + # "cell_type": "markdown", + # "metadata": {}, + # "source": [ + # "Some markdown\n", + # "\n", + # "---\n", + # "\n", + # "Some more markdown" + # ] + # } + # ], + # "metadata": { + # "language_info": { + # "name": "python" + # } + # }, + # "nbformat": 4, + # "nbformat_minor": 2 + # } + # - title: block quote + # mdast: + # type: root + # children: + # - type: blockquote + # children: + # - type: paragraph + # children: + # - type: text + # value: 'Some % ' + # - type: emphasis + # children: + # - type: text + # value: markdown + # ipynb: |- + # { + # "cells": [ + # { + # "cell_type": "markdown", + # "metadata": {}, + # "source": [ + # "> Some % *markdown*" + # ] + # } + # ], + # "metadata": { + # "language_info": { + # "name": "python" + # } + # }, + # "nbformat": 4, + # "nbformat_minor": 2 + # } + # - title: unordered list + # mdast: + # type: root + # children: + # - type: list + # ordered: false + # children: + # - type: listItem + # children: + # - type: paragraph + # children: + # - type: text + # value: Some markdown + # - type: listItem + # children: + # - type: paragraph + # children: + # - type: text + # value: Some more markdown + # ipynb: |- + # { + # "cells": [ + # { + # "cell_type": "markdown", + # "metadata": {}, + # "source": [ + # "* Some markdown\n", + # "\n", + # "* Some more markdown" + # ] + # } + # ], + # "metadata": { + # "language_info": { + # "name": "python" + # } + # }, + # "nbformat": 4, + # "nbformat_minor": 2 + # } + # - title: ordered list + # mdast: + # type: root + # children: + # - type: list + # ordered: true + # start: 5 + # children: + # - type: listItem + # children: + # - type: paragraph + # children: + # - type: text + # value: Some markdown + # - type: listItem + # children: + # - type: paragraph + # children: + # - type: text + # value: Some more markdown + # ipynb: |- + # { + # "cells": [ + # { + # "cell_type": "markdown", + # "metadata": {}, + # "source": [ + # "5. Some markdown\n", + # "\n", + # "6. Some more markdown" + # ] + # } + # ], + # "metadata": { + # "language_info": { + # "name": "python" + # } + # }, + # "nbformat": 4, + # "nbformat_minor": 2 + # } + # - title: html + # mdast: + # type: root + # children: + # - type: html + # value:
*Not markdown*
+ # ipynb: |- + # { + # "cells": [ + # { + # "cell_type": "markdown", + # "metadata": {}, + # "source": [ + # "
*Not markdown*
" + # ] + # } + # ], + # "metadata": { + # "language_info": { + # "name": "python" + # } + # }, + # "nbformat": 4, + # "nbformat_minor": 2 + # } + # - title: code - plain + # mdast: + # type: root + # children: + # - type: code + # value: |- + # 5+5 + # print("hello world\n") + # ipynb: |- + # { + # "cells": [ + # { + # "cell_type": "markdown", + # "metadata": {}, + # "source": [ + # "```\n", + # "5+5\n", + # "print(\"hello world\\n\")\n", + # "```" + # ] + # } + # ], + # "metadata": { + # "language_info": { + # "name": "python" + # } + # }, + # "nbformat": 4, + # "nbformat_minor": 2 + # } + # - title: code - nested backticks + # mdast: + # type: root + # children: + # - type: code + # value: |- + # 5+5 + # ````{abc} + # ```` + # print("hello world") + # ipynb: |- + # { + # "cells": [ + # { + # "cell_type": "markdown", + # "metadata": {}, + # "source": [ + # "`````\n", + # "5+5\n", + # "````{abc}\n", + # "````\n", + # "print(\"hello world\")\n", + # "`````" + # ] + # } + # ], + # "metadata": { + # "language_info": { + # "name": "python" + # } + # }, + # "nbformat": 4, + # "nbformat_minor": 2 + # } + # - title: code - with language + # mdast: + # type: root + # children: + # - type: block + # kind: notebook-code + # data: + # id: nb-cell-0 + # identifier: nb-cell-0 + # label: nb-cell-0 + # html_id: nb-cell-0 + # children: + # - type: code + # lang: python + # executable: true + # value: print('abc\n') + # identifier: nb-cell-0-code + # enumerator: 1 + # html_id: nb-cell-0-code + # - type: output + # id: T7FMDqDm8dM2bOT1tKeeM + # identifier: nb-cell-0-output + # html_id: nb-cell-0-output + # - type: code + # lang: python + # value: |- + # 5+5 + # print("hello world") + # ipynb: |- + # { + # "cells": [ + # { + # "cell_type": "code", + # "execution_count": null, + # "metadata": {}, + # "outputs": [], + # "source": [ + # "print('abc\\n')" + # ] + # }, + # { + # "cell_type": "markdown", + # "metadata": {}, + # "source": [ + # "```python\n", + # "5+5\n", + # "print(\"hello world\")\n", + # "```" + # ] + # } + # ], + # "metadata": { + # "language_info": { + # "name": "python" + # } + # }, + # "nbformat": 4, + # "nbformat_minor": 2 + # } + # - title: code - with metadata + # mdast: + # type: root + # children: + # - type: code + # lang: python + # meta: highlight-line="2" + # value: |- + # 5+5 + # print("hello world") + # ipynb: |- + # { + # "cells": [ + # { + # "cell_type": "markdown", + # "metadata": {}, + # "source": [ + # "```python highlight-line=\"2\"\n", + # "5+5\n", + # "print(\"hello world\")\n", + # "```" + # ] + # } + # ], + # "metadata": { + # "language_info": { + # "name": "python" + # } + # }, + # "nbformat": 4, + # "nbformat_minor": 2 + # } + # - title: definition + # mdast: + # type: root + # children: + # - type: definition + # identifier: my-def + # label: My-Def + # url: https://example.com + # title: Example + # ipynb: |- + # { + # "cells": [ + # { + # "cell_type": "markdown", + # "metadata": {}, + # "source": [ + # "[My-Def]: https://example.com \"Example\"" + # ] + # } + # ], + # "metadata": { + # "language_info": { + # "name": "python" + # } + # }, + # "nbformat": 4, + # "nbformat_minor": 2 + # } + # - title: break + # mdast: + # type: root + # children: + # - type: paragraph + # children: + # - type: text + # value: Some markdown + # - type: break + # - type: text + # value: Some more markdown + # ipynb: |- + # { + # "cells": [ + # { + # "cell_type": "markdown", + # "metadata": {}, + # "source": [ + # "Some markdown\\\n", + # "Some more markdown" + # ] + # } + # ], + # "metadata": { + # "language_info": { + # "name": "python" + # } + # }, + # "nbformat": 4, + # "nbformat_minor": 2 + # } + # - title: link + # mdast: + # type: root + # children: + # - type: link + # url: https://example.com + # title: my link + # children: + # - type: text + # value: 'Some % ' + # - type: emphasis + # children: + # - type: text + # value: markdown + # ipynb: |- + # { + # "cells": [ + # { + # "cell_type": "markdown", + # "metadata": {}, + # "source": [ + # "[Some % *markdown*](https://example.com \"my link\")" + # ] + # } + # ], + # "metadata": { + # "language_info": { + # "name": "python" + # } + # }, + # "nbformat": 4, + # "nbformat_minor": 2 + # } + # - title: link reference + # mdast: + # type: root + # children: + # - type: linkReference + # identifier: my-link + # label: My-Link + # children: + # - type: text + # value: 'Some % ' + # - type: emphasis + # children: + # - type: text + # value: markdown + # ipynb: |- + # { + # "cells": [ + # { + # "cell_type": "markdown", + # "metadata": {}, + # "source": [ + # "[Some % *markdown*][My-Link]" + # ] + # } + # ], + # "metadata": { + # "language_info": { + # "name": "python" + # } + # }, + # "nbformat": 4, + # "nbformat_minor": 2 + # } + # - title: image reference + # mdast: + # type: root + # children: + # - type: imageReference + # identifier: my-image + # label: My-Image + # alt: Some text + # ipynb: |- + # { + # "cells": [ + # { + # "cell_type": "markdown", + # "metadata": {}, + # "source": [ + # "![Some text][My-Image]" + # ] + # } + # ], + # "metadata": { + # "language_info": { + # "name": "python" + # } + # }, + # "nbformat": 4, + # "nbformat_minor": 2 + # } diff --git a/packages/myst-to-ipynb/tests/run.spec.ts b/packages/myst-to-ipynb/tests/run.spec.ts index 7d6c2e9edc..c128772da1 100644 --- a/packages/myst-to-ipynb/tests/run.spec.ts +++ b/packages/myst-to-ipynb/tests/run.spec.ts @@ -7,7 +7,7 @@ import writeIpynb from '../src'; type TestCase = { title: string; - ipynb: string; + ipynb: Record; mdast: Record; }; @@ -32,8 +32,7 @@ casesList.forEach(({ title, cases }) => { const pipe = unified().use(writeIpynb); pipe.runSync(mdast as any); const file = pipe.stringify(mdast as any); - console.log(file.result); - expect(file.result).toEqual(ipynb); + expect(JSON.parse(file.result)).toEqual(ipynb); }, ); }); From e72ff5c920923935e1d1682efa950befab417a35 Mon Sep 17 00:00:00 2001 From: kp992 <145801876+kp992@users.noreply.github.com> Date: Thu, 10 Jul 2025 07:12:55 -0700 Subject: [PATCH 07/27] Add ipynb in validators (#2159) --- packages/myst-frontmatter/src/exports/validators.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/myst-frontmatter/src/exports/validators.ts b/packages/myst-frontmatter/src/exports/validators.ts index 43a58cd29b..deb8fc8eaa 100644 --- a/packages/myst-frontmatter/src/exports/validators.ts +++ b/packages/myst-frontmatter/src/exports/validators.ts @@ -61,6 +61,7 @@ export const EXT_TO_FORMAT: Record = { '.typ': ExportFormats.typst, '.typst': ExportFormats.typst, '.cff': ExportFormats.cff, + '.ipynb': ExportFormats.ipynb, }; export const RESERVED_EXPORT_KEYS = [ From 4a989de024acbe58bda8eeab1abc8089f9a1affe Mon Sep 17 00:00:00 2001 From: Matt McKay Date: Wed, 25 Feb 2026 12:22:24 +1100 Subject: [PATCH 08/27] fix: myst-to-ipynb bug fixes for kernelspec, markers, and metadata - Use frontmatter kernelspec to populate notebook metadata (name, display_name, language) instead of ignoring the frontmatter parameter - Derive language_info.name from frontmatter instead of hardcoding 'python' - Strip leading +++ block markers from markdown cells (MyST-specific separators that have no meaning in notebooks) - Fix log message from 'Exported MD' to 'Exported IPYNB' - Fix package.json homepage URL to point to myst-to-ipynb (not myst-to-md) Ref: QuantEcon/meta#292 --- packages/myst-cli/src/build/ipynb/index.ts | 2 +- packages/myst-to-ipynb/package.json | 2 +- packages/myst-to-ipynb/src/index.ts | 32 ++++++++++++++++++---- 3 files changed, 28 insertions(+), 8 deletions(-) diff --git a/packages/myst-cli/src/build/ipynb/index.ts b/packages/myst-cli/src/build/ipynb/index.ts index 27f2896a0d..9147c01233 100644 --- a/packages/myst-cli/src/build/ipynb/index.ts +++ b/packages/myst-cli/src/build/ipynb/index.ts @@ -45,7 +45,7 @@ export async function runIpynbExport( vfile.path = output; const mdOut = writeIpynb(vfile, mdast as any, frontmatter); logMessagesFromVFile(session, mdOut); - session.log.info(toc(`📑 Exported MD in %s, copying to ${output}`)); + session.log.info(toc(`📓 Exported IPYNB in %s, copying to ${output}`)); writeFileToFolder(output, mdOut.result as string); return { tempFolders: [] }; } diff --git a/packages/myst-to-ipynb/package.json b/packages/myst-to-ipynb/package.json index 85064ec617..329175ee7a 100644 --- a/packages/myst-to-ipynb/package.json +++ b/packages/myst-to-ipynb/package.json @@ -3,7 +3,7 @@ "version": "1.0.15", "description": "Export from MyST mdast to ipynb", "author": "Rowan Cockett ", - "homepage": "https://github.com/jupyter-book/mystmd/tree/main/packages/myst-to-md", + "homepage": "https://github.com/jupyter-book/mystmd/tree/main/packages/myst-to-ipynb", "license": "MIT", "type": "module", "exports": "./dist/index.js", diff --git a/packages/myst-to-ipynb/src/index.ts b/packages/myst-to-ipynb/src/index.ts index 4593d951c4..a2bbb296d8 100644 --- a/packages/myst-to-ipynb/src/index.ts +++ b/packages/myst-to-ipynb/src/index.ts @@ -12,6 +12,14 @@ function sourceToStringList(src: string): string[] { return lines; } +/** + * Strip leading `+++` cell break markers from markdown content. + * These are MyST-specific block separators that have no meaning in notebooks. + */ +function stripBlockMarkers(md: string): string { + return md.replace(/^\+\+\+[^\n]*\n/, ''); +} + export function writeIpynb(file: VFile, node: Root, frontmatter?: PageFrontmatter) { const cells = (node.children as Block[]).map((block: Block) => { if (block.type === 'block' && block.kind === 'notebook-code') { @@ -25,20 +33,32 @@ export function writeIpynb(file: VFile, node: Root, frontmatter?: PageFrontmatte }; } const md = writeMd(file, { type: 'root', children: [block] }).result as string; + const cleanMd = stripBlockMarkers(md); return { cell_type: 'markdown', metadata: {}, - source: sourceToStringList(md), + source: sourceToStringList(cleanMd), }; }); + // Build notebook metadata from frontmatter kernelspec when available + const languageName = frontmatter?.kernelspec?.language ?? frontmatter?.kernelspec?.name ?? 'python'; + const metadata: Record = { + language_info: { + name: languageName, + }, + }; + if (frontmatter?.kernelspec) { + metadata.kernelspec = { + name: frontmatter.kernelspec.name, + display_name: frontmatter.kernelspec.display_name, + language: languageName, + }; + } + const ipynb = { cells, - metadata: { - language_info: { - name: 'python', - }, - }, + metadata, nbformat: 4, nbformat_minor: 2, }; From 44571dc982591748000b98c845ca2d78b54cf2d9 Mon Sep 17 00:00:00 2001 From: Matt McKay Date: Wed, 25 Feb 2026 12:52:48 +1100 Subject: [PATCH 09/27] feat(myst-to-ipynb): add CommonMark serialization mode Add AST pre-transform that converts MyST-specific nodes to CommonMark equivalents before markdown serialization, producing notebooks compatible with vanilla Jupyter Notebook and Google Colab. New option: markdown: 'commonmark' (default: 'myst') Transforms implemented: - math block directive to $$ delimiters - inline math role to $ delimiters - admonition to blockquote with bold title - exercise to bold header with content - solution to bold header with content (or dropped via option) - proof/theorem/lemma to bold header with content - tab-set to bold tab titles with tab content - figure to image + italic caption - table container to bold caption + table - card/grid to unwrapped content - details to blockquote with summary title - aside/sidebar to blockquote - mystDirective/mystRole to unwrapped content or plain text Uses html-type AST nodes for math content to prevent the markdown serializer from escaping LaTeX special characters (underscores, etc). CLI wiring: reads 'markdown: commonmark' from export config in myst.yml. Ref: QuantEcon/meta#292 --- packages/myst-cli/src/build/ipynb/index.ts | 8 +- packages/myst-to-ipynb/src/commonmark.ts | 465 +++++++++++++++++++++ packages/myst-to-ipynb/src/index.ts | 36 +- 3 files changed, 504 insertions(+), 5 deletions(-) create mode 100644 packages/myst-to-ipynb/src/commonmark.ts diff --git a/packages/myst-cli/src/build/ipynb/index.ts b/packages/myst-cli/src/build/ipynb/index.ts index 9147c01233..dd23621b29 100644 --- a/packages/myst-cli/src/build/ipynb/index.ts +++ b/packages/myst-cli/src/build/ipynb/index.ts @@ -2,6 +2,7 @@ import path from 'node:path'; import { tic, writeFileToFolder } from 'myst-cli-utils'; import { FRONTMATTER_ALIASES, PAGE_FRONTMATTER_KEYS } from 'myst-frontmatter'; import { writeIpynb } from 'myst-to-ipynb'; +import type { IpynbOptions } from 'myst-to-ipynb'; import { filterKeys } from 'simple-validators'; import { VFile } from 'vfile'; import { finalizeMdast } from '../../process/mdast.js'; @@ -43,7 +44,12 @@ export async function runIpynbExport( }); const vfile = new VFile(); vfile.path = output; - const mdOut = writeIpynb(vfile, mdast as any, frontmatter); + // Pass markdown format option from export config (e.g. `markdown: commonmark` in myst.yml) + const ipynbOpts: IpynbOptions | undefined = + (exportOptions as any).markdown === 'commonmark' + ? { markdown: 'commonmark' } + : undefined; + const mdOut = writeIpynb(vfile, mdast as any, frontmatter, ipynbOpts); logMessagesFromVFile(session, mdOut); session.log.info(toc(`📓 Exported IPYNB in %s, copying to ${output}`)); writeFileToFolder(output, mdOut.result as string); diff --git a/packages/myst-to-ipynb/src/commonmark.ts b/packages/myst-to-ipynb/src/commonmark.ts new file mode 100644 index 0000000000..c1cd725add --- /dev/null +++ b/packages/myst-to-ipynb/src/commonmark.ts @@ -0,0 +1,465 @@ +/** + * CommonMark AST pre-transform for myst-to-ipynb + * + * Converts MyST-specific AST nodes into their CommonMark-equivalent AST nodes + * so that `writeMd` from `myst-to-md` produces plain CommonMark output + * compatible with vanilla Jupyter Notebook, JupyterLab, and Google Colab. + * + * This transform is applied before `writeMd` is called for each markdown cell. + * It walks the AST tree and replaces MyST directive/role nodes with standard + * mdast nodes that `writeMd` already handles natively. + */ + +import type { GenericNode } from 'myst-common'; +import { toText } from 'myst-common'; +import { selectAll, select } from 'unist-util-select'; + +/** + * Capitalize the first letter of a string. + */ +function capitalize(s: string): string { + return s.charAt(0).toUpperCase() + s.slice(1); +} + +/** + * Convert an admonition node to a blockquote with bold title. + * + * Input: { type: 'admonition', kind: 'note', children: [admonitionTitle, ...content] } + * Output: { type: 'blockquote', children: [paragraph(bold(title)), ...content] } + */ +function transformAdmonition(node: GenericNode): GenericNode { + const kind = node.kind ?? 'note'; + const titleNode = node.children?.find((c: GenericNode) => c.type === 'admonitionTitle'); + const titleText = titleNode ? toText(titleNode) : capitalize(kind); + const contentChildren = + node.children?.filter((c: GenericNode) => c.type !== 'admonitionTitle') ?? []; + return { + type: 'blockquote', + children: [ + { + type: 'paragraph', + children: [{ type: 'strong', children: [{ type: 'text', value: titleText }] }], + }, + ...contentChildren, + ], + }; +} + +/** + * Convert a math block directive to a raw html node containing `$$...$$`. + * + * We use an `html` type node because mdast serializers output its `value` + * as-is, without escaping underscores or other special characters that + * commonly appear in LaTeX expressions. + * + * Input: { type: 'math', value: 'E=mc^2', label: '...' } + * Output: { type: 'html', value: '$$\nE=mc^2\n$$' } + */ +function transformMathBlock(node: GenericNode): GenericNode { + const value = node.value ?? ''; + const labelComment = node.label ? ` (${node.label})` : ''; + return { + type: 'html', + value: `$$\n${value}\n$$${labelComment}`, + }; +} + +/** + * Convert an inline math role to a raw html node with `$...$` delimiters. + * + * Input: { type: 'inlineMath', value: 'E=mc^2' } + * Output: { type: 'html', value: '$E=mc^2$' } + * + * We use an `html` type node so the markdown serializer outputs the value + * as-is, preventing underscore/backslash escaping in LaTeX expressions. + * Jupyter's markdown renderer supports `$...$` for inline math natively. + */ +function transformInlineMath(node: GenericNode): GenericNode { + return { type: 'html', value: `$${node.value ?? ''}$` }; +} + +/** + * Convert a figure container to an image with caption text. + * + * Input: { type: 'container', kind: 'figure', children: [image, caption, legend] } + * Output: { type: 'image', url: '...', alt: 'caption text' } + * followed by caption paragraph if present + */ +function transformFigure(node: GenericNode): GenericNode { + const imageNode: GenericNode | null = select('image', node); + const captionNode: GenericNode | null = select('caption', node); + const legendNode: GenericNode | null = select('legend', node); + + const url = imageNode?.urlSource ?? imageNode?.url ?? ''; + const alt = imageNode?.alt ?? (captionNode ? toText(captionNode) : ''); + + const children: GenericNode[] = [{ type: 'image', url, alt, title: imageNode?.title }]; + + // Add caption as a paragraph below the image if present + if (captionNode?.children?.length) { + children.push({ + type: 'paragraph', + children: [ + { type: 'emphasis', children: captionNode.children }, + ], + }); + } + + // Add legend content as-is + if (legendNode?.children?.length) { + children.push(...legendNode.children); + } + + return { type: 'root', children }; +} + +/** + * Convert a table container to its inner table node. + * The table node is already handled by myst-to-md's GFM table extension. + */ +function transformTableContainer(node: GenericNode): GenericNode { + const captionNode: GenericNode | null = select('caption', node); + const tableNode: GenericNode | null = select('table', node); + + const children: GenericNode[] = []; + + // Add caption as bold paragraph above the table + if (captionNode?.children?.length) { + children.push({ + type: 'paragraph', + children: [{ type: 'strong', children: captionNode.children }], + }); + } + + if (tableNode) { + children.push(tableNode); + } + + return { type: 'root', children }; +} + +/** + * Convert an exercise node to a bold header with content. + * + * Input: { type: 'exercise', children: [...] } + * Output: { type: 'root', children: [paragraph(**Exercise N**), ...content] } + */ +function transformExercise(node: GenericNode): GenericNode { + const titleNode = node.children?.find((c: GenericNode) => c.type === 'admonitionTitle'); + const titleText = titleNode ? toText(titleNode) : 'Exercise'; + const enumerator = node.enumerator ? ` ${node.enumerator}` : ''; + const contentChildren = + node.children?.filter((c: GenericNode) => c.type !== 'admonitionTitle') ?? []; + + return { + type: 'root', + children: [ + { + type: 'paragraph', + children: [ + { + type: 'strong', + children: [{ type: 'text', value: `${titleText}${enumerator}` }], + }, + ], + }, + ...contentChildren, + ], + }; +} + +/** + * Convert a solution node to a bold header with content. + * Solutions are kept by default but can be configured to be dropped. + */ +function transformSolution(node: GenericNode, dropSolutions: boolean): GenericNode | null { + if (dropSolutions) return null; + + const titleNode = node.children?.find((c: GenericNode) => c.type === 'admonitionTitle'); + const titleText = titleNode ? toText(titleNode) : 'Solution'; + const contentChildren = + node.children?.filter((c: GenericNode) => c.type !== 'admonitionTitle') ?? []; + + return { + type: 'root', + children: [ + { + type: 'paragraph', + children: [ + { + type: 'strong', + children: [{ type: 'text', value: titleText }], + }, + ], + }, + ...contentChildren, + ], + }; +} + +/** + * Convert a proof-type node (theorem, lemma, definition, etc.) to a bold header. + * + * Input: { type: 'proof', kind: 'theorem', children: [...] } + * Output: { type: 'root', children: [paragraph(**Theorem N** (Title)), ...content] } + */ +function transformProof(node: GenericNode): GenericNode { + const kind = node.kind ?? 'proof'; + const titleNode = node.children?.find((c: GenericNode) => c.type === 'admonitionTitle'); + const titleText = titleNode ? ` (${toText(titleNode)})` : ''; + const enumerator = node.enumerator ? ` ${node.enumerator}` : ''; + const contentChildren = + node.children?.filter((c: GenericNode) => c.type !== 'admonitionTitle') ?? []; + + return { + type: 'root', + children: [ + { + type: 'paragraph', + children: [ + { + type: 'strong', + children: [{ type: 'text', value: `${capitalize(kind)}${enumerator}${titleText}` }], + }, + ], + }, + ...contentChildren, + ], + }; +} + +/** + * Convert a tab-set to just the content of each tab, with tab titles as headings. + */ +function transformTabSet(node: GenericNode): GenericNode { + const children: GenericNode[] = []; + + for (const tabItem of node.children ?? []) { + if (tabItem.type === 'tabItem' || tabItem.kind === 'tabItem') { + // Add tab title as bold paragraph + if (tabItem.title) { + children.push({ + type: 'paragraph', + children: [ + { type: 'strong', children: [{ type: 'text', value: tabItem.title }] }, + ], + }); + } + // Add tab content + if (tabItem.children) { + children.push(...tabItem.children); + } + } + } + + return { type: 'root', children }; +} + +/** + * Convert a card to its content with optional title. + */ +function transformCard(node: GenericNode): GenericNode { + const titleNode = node.children?.find((c: GenericNode) => c.type === 'cardTitle'); + const contentChildren = + node.children?.filter( + (c: GenericNode) => !['cardTitle', 'header', 'footer'].includes(c.type), + ) ?? []; + + const children: GenericNode[] = []; + + if (titleNode) { + children.push({ + type: 'paragraph', + children: [ + { type: 'strong', children: titleNode.children ?? [{ type: 'text', value: toText(titleNode) }] }, + ], + }); + } + + children.push(...contentChildren); + + return { type: 'root', children }; +} + +/** + * Convert a grid to its card children (which will be individually transformed). + */ +function transformGrid(node: GenericNode): GenericNode { + return { type: 'root', children: node.children ?? [] }; +} + +/** + * Convert a details/dropdown to a blockquote with summary as title. + */ +function transformDetails(node: GenericNode): GenericNode { + const summaryNode = node.children?.find((c: GenericNode) => c.type === 'summary'); + const contentChildren = + node.children?.filter((c: GenericNode) => c.type !== 'summary') ?? []; + + const titleText = summaryNode ? toText(summaryNode) : 'Details'; + + return { + type: 'blockquote', + children: [ + { + type: 'paragraph', + children: [{ type: 'strong', children: [{ type: 'text', value: titleText }] }], + }, + ...contentChildren, + ], + }; +} + +/** + * Convert an aside/sidebar/margin to a blockquote. + */ +function transformAside(node: GenericNode): GenericNode { + const titleNode = node.children?.find((c: GenericNode) => c.type === 'admonitionTitle'); + const contentChildren = + node.children?.filter((c: GenericNode) => c.type !== 'admonitionTitle') ?? []; + + const children: GenericNode[] = []; + + if (titleNode) { + children.push({ + type: 'paragraph', + children: [{ type: 'strong', children: titleNode.children ?? [] }], + }); + } + + children.push(...contentChildren); + + return { type: 'blockquote', children }; +} + +/** + * Convert a code-block directive to a standard fenced code block. + * (Remove MyST-specific options like label, emphasize-lines, etc.) + */ +function transformCodeBlock(node: GenericNode): GenericNode { + return { + type: 'code', + lang: node.lang, + value: node.value ?? '', + }; +} + +/** + * Convert a mystDirective node to plain content or remove it. + */ +function transformMystDirective(node: GenericNode): GenericNode | null { + // If it has children, keep the content + if (node.children?.length) { + return { type: 'root', children: node.children }; + } + // If it has a value, render as a code block + if (node.value) { + return { type: 'code', lang: node.lang ?? '', value: node.value }; + } + return null; +} + +/** + * Convert a mystRole node to plain text. + */ +function transformMystRole(node: GenericNode): GenericNode { + if (node.children?.length) { + return { type: 'root', children: node.children }; + } + return { type: 'text', value: node.value ?? '' }; +} + +export interface CommonMarkOptions { + /** Drop solution blocks from output (default: false) */ + dropSolutions?: boolean; +} + +/** + * Walk an AST tree and replace MyST-specific nodes with CommonMark equivalents. + * + * This modifies the tree in-place by replacing children arrays. + * Returns the (possibly replaced) root node. + */ +export function transformToCommonMark( + tree: GenericNode, + opts?: CommonMarkOptions, +): GenericNode { + const dropSolutions = opts?.dropSolutions ?? false; + + // Process children recursively (bottom-up so nested directives are handled first) + if (tree.children) { + // First, recurse into children + tree.children = tree.children.map((child: GenericNode) => + transformToCommonMark(child, opts), + ); + + // Then, transform this node's children — replacing nodes that need conversion + const newChildren: GenericNode[] = []; + for (const child of tree.children) { + const transformed = transformNode(child, dropSolutions); + if (transformed === null) { + // Node should be dropped (e.g., solution with dropSolutions=true) + continue; + } + if (transformed.type === 'root' && transformed.children) { + // Flatten: a root wrapper means multiple replacement nodes + newChildren.push(...transformed.children); + } else { + newChildren.push(transformed); + } + } + tree.children = newChildren; + } + + return tree; +} + +/** + * Transform a single node if it's a MyST-specific type. + * Returns the node unchanged if no transformation is needed. + * Returns null if the node should be removed. + */ +function transformNode( + node: GenericNode, + dropSolutions: boolean, +): GenericNode | null { + switch (node.type) { + case 'admonition': + return transformAdmonition(node); + case 'math': + return transformMathBlock(node); + case 'inlineMath': + return transformInlineMath(node); + case 'container': + if (node.kind === 'figure') return transformFigure(node); + if (node.kind === 'table') return transformTableContainer(node); + // code containers — extract the code node + if (node.kind === 'code') { + const codeNode = select('code', node); + return codeNode ? transformCodeBlock(codeNode as GenericNode) : node; + } + return node; + case 'exercise': + return transformExercise(node); + case 'solution': + return transformSolution(node, dropSolutions); + case 'proof': + return transformProof(node); + case 'tabSet': + return transformTabSet(node); + case 'card': + return transformCard(node); + case 'grid': + return transformGrid(node); + case 'details': + return transformDetails(node); + case 'aside': + return transformAside(node); + case 'mystDirective': + return transformMystDirective(node); + case 'mystRole': + return transformMystRole(node); + default: + return node; + } +} diff --git a/packages/myst-to-ipynb/src/index.ts b/packages/myst-to-ipynb/src/index.ts index a2bbb296d8..65b6de9f93 100644 --- a/packages/myst-to-ipynb/src/index.ts +++ b/packages/myst-to-ipynb/src/index.ts @@ -5,6 +5,8 @@ import type { VFile } from 'vfile'; import type { PageFrontmatter } from 'myst-frontmatter'; import { writeMd } from 'myst-to-md'; import { select } from 'unist-util-select'; +import { transformToCommonMark } from './commonmark.js'; +import type { CommonMarkOptions } from './commonmark.js'; function sourceToStringList(src: string): string[] { const lines = src.split('\n').map((s) => `${s}\n`); @@ -20,7 +22,21 @@ function stripBlockMarkers(md: string): string { return md.replace(/^\+\+\+[^\n]*\n/, ''); } -export function writeIpynb(file: VFile, node: Root, frontmatter?: PageFrontmatter) { +export interface IpynbOptions { + /** Markdown format: 'myst' preserves MyST syntax, 'commonmark' converts to plain CommonMark */ + markdown?: 'myst' | 'commonmark'; + /** Options for CommonMark conversion */ + commonmark?: CommonMarkOptions; +} + +export function writeIpynb( + file: VFile, + node: Root, + frontmatter?: PageFrontmatter, + options?: IpynbOptions, +) { + const markdownFormat = options?.markdown ?? 'myst'; + const cells = (node.children as Block[]).map((block: Block) => { if (block.type === 'block' && block.kind === 'notebook-code') { const code = select('code', block) as Code; @@ -32,7 +48,15 @@ export function writeIpynb(file: VFile, node: Root, frontmatter?: PageFrontmatte source: sourceToStringList(code.value), }; } - const md = writeMd(file, { type: 'root', children: [block] }).result as string; + // Build the sub-tree for this markdown cell + let blockTree: any = { type: 'root', children: [block] }; + if (markdownFormat === 'commonmark') { + blockTree = transformToCommonMark( + JSON.parse(JSON.stringify(blockTree)), + options?.commonmark, + ); + } + const md = writeMd(file, blockTree).result as string; const cleanMd = stripBlockMarkers(md); return { cell_type: 'markdown', @@ -67,9 +91,12 @@ export function writeIpynb(file: VFile, node: Root, frontmatter?: PageFrontmatte return file; } -const plugin: Plugin<[PageFrontmatter?], Root, VFile> = function (frontmatter?) { +const plugin: Plugin<[PageFrontmatter?, IpynbOptions?], Root, VFile> = function ( + frontmatter?, + options?, +) { this.Compiler = (node, file) => { - return writeIpynb(file, node, frontmatter); + return writeIpynb(file, node, frontmatter, options); }; return (node: Root) => { @@ -79,3 +106,4 @@ const plugin: Plugin<[PageFrontmatter?], Root, VFile> = function (frontmatter?) }; export default plugin; +export type { CommonMarkOptions } from './commonmark.js'; From 925a194963ea04115b486650a25a39f407a4a681 Mon Sep 17 00:00:00 2001 From: Matt McKay Date: Wed, 25 Feb 2026 13:17:27 +1100 Subject: [PATCH 10/27] test: expand myst-to-ipynb test suite (30 cases) - Rewrite basic.yml with 13 proper YAML-object test cases (was 2 active) - Add frontmatter.yml with 4 kernelspec/metadata test cases - Add commonmark.yml with 13 CommonMark-mode test cases covering: inline math, math blocks, admonitions, exercises, theorems, tabSets, solutions (kept/dropped), underscore preservation - Update run.spec.ts to support frontmatter and options fields in YAML test cases, enabling CommonMark and metadata tests Ref: QuantEcon/meta#292 --- packages/myst-to-ipynb/tests/basic.yml | 774 +++++++------------ packages/myst-to-ipynb/tests/commonmark.yml | 376 +++++++++ packages/myst-to-ipynb/tests/frontmatter.yml | 108 +++ packages/myst-to-ipynb/tests/run.spec.ts | 39 +- 4 files changed, 772 insertions(+), 525 deletions(-) create mode 100644 packages/myst-to-ipynb/tests/commonmark.yml create mode 100644 packages/myst-to-ipynb/tests/frontmatter.yml diff --git a/packages/myst-to-ipynb/tests/basic.yml b/packages/myst-to-ipynb/tests/basic.yml index a0080e27c0..626758e500 100644 --- a/packages/myst-to-ipynb/tests/basic.yml +++ b/packages/myst-to-ipynb/tests/basic.yml @@ -24,13 +24,13 @@ cases: value: style`s ipynb: cells: - - cell_type: "markdown" + - cell_type: markdown metadata: {} source: - - "Some % *markdown* with **different** ``style`s``" + - "Some % *markdown* with **different** ``style`s``" metadata: language_info: - name: "python" + name: python nbformat: 4 nbformat_minor: 2 @@ -58,503 +58,293 @@ cases: value: fourth ipynb: cells: - - cell_type: "markdown" + - cell_type: markdown metadata: {} source: - - "# first" - - cell_type: "markdown" + - "# first" + - cell_type: markdown metadata: {} source: - - "Some % *markdown*" - - cell_type: "markdown" + - "Some % *markdown*" + - cell_type: markdown metadata: {} source: - - "#### fourth" + - "#### fourth" metadata: language_info: - name: "python" + name: python nbformat: 4 nbformat_minor: 2 - # - title: thematic break - # mdast: - # type: root - # children: - # - type: paragraph - # children: - # - type: text - # value: Some markdown - # - type: thematicBreak - # - type: paragraph - # children: - # - type: text - # value: Some more markdown - # ipynb: |- - # { - # "cells": [ - # { - # "cell_type": "markdown", - # "metadata": {}, - # "source": [ - # "Some markdown\n", - # "\n", - # "---\n", - # "\n", - # "Some more markdown" - # ] - # } - # ], - # "metadata": { - # "language_info": { - # "name": "python" - # } - # }, - # "nbformat": 4, - # "nbformat_minor": 2 - # } - # - title: block quote - # mdast: - # type: root - # children: - # - type: blockquote - # children: - # - type: paragraph - # children: - # - type: text - # value: 'Some % ' - # - type: emphasis - # children: - # - type: text - # value: markdown - # ipynb: |- - # { - # "cells": [ - # { - # "cell_type": "markdown", - # "metadata": {}, - # "source": [ - # "> Some % *markdown*" - # ] - # } - # ], - # "metadata": { - # "language_info": { - # "name": "python" - # } - # }, - # "nbformat": 4, - # "nbformat_minor": 2 - # } - # - title: unordered list - # mdast: - # type: root - # children: - # - type: list - # ordered: false - # children: - # - type: listItem - # children: - # - type: paragraph - # children: - # - type: text - # value: Some markdown - # - type: listItem - # children: - # - type: paragraph - # children: - # - type: text - # value: Some more markdown - # ipynb: |- - # { - # "cells": [ - # { - # "cell_type": "markdown", - # "metadata": {}, - # "source": [ - # "* Some markdown\n", - # "\n", - # "* Some more markdown" - # ] - # } - # ], - # "metadata": { - # "language_info": { - # "name": "python" - # } - # }, - # "nbformat": 4, - # "nbformat_minor": 2 - # } - # - title: ordered list - # mdast: - # type: root - # children: - # - type: list - # ordered: true - # start: 5 - # children: - # - type: listItem - # children: - # - type: paragraph - # children: - # - type: text - # value: Some markdown - # - type: listItem - # children: - # - type: paragraph - # children: - # - type: text - # value: Some more markdown - # ipynb: |- - # { - # "cells": [ - # { - # "cell_type": "markdown", - # "metadata": {}, - # "source": [ - # "5. Some markdown\n", - # "\n", - # "6. Some more markdown" - # ] - # } - # ], - # "metadata": { - # "language_info": { - # "name": "python" - # } - # }, - # "nbformat": 4, - # "nbformat_minor": 2 - # } - # - title: html - # mdast: - # type: root - # children: - # - type: html - # value:
*Not markdown*
- # ipynb: |- - # { - # "cells": [ - # { - # "cell_type": "markdown", - # "metadata": {}, - # "source": [ - # "
*Not markdown*
" - # ] - # } - # ], - # "metadata": { - # "language_info": { - # "name": "python" - # } - # }, - # "nbformat": 4, - # "nbformat_minor": 2 - # } - # - title: code - plain - # mdast: - # type: root - # children: - # - type: code - # value: |- - # 5+5 - # print("hello world\n") - # ipynb: |- - # { - # "cells": [ - # { - # "cell_type": "markdown", - # "metadata": {}, - # "source": [ - # "```\n", - # "5+5\n", - # "print(\"hello world\\n\")\n", - # "```" - # ] - # } - # ], - # "metadata": { - # "language_info": { - # "name": "python" - # } - # }, - # "nbformat": 4, - # "nbformat_minor": 2 - # } - # - title: code - nested backticks - # mdast: - # type: root - # children: - # - type: code - # value: |- - # 5+5 - # ````{abc} - # ```` - # print("hello world") - # ipynb: |- - # { - # "cells": [ - # { - # "cell_type": "markdown", - # "metadata": {}, - # "source": [ - # "`````\n", - # "5+5\n", - # "````{abc}\n", - # "````\n", - # "print(\"hello world\")\n", - # "`````" - # ] - # } - # ], - # "metadata": { - # "language_info": { - # "name": "python" - # } - # }, - # "nbformat": 4, - # "nbformat_minor": 2 - # } - # - title: code - with language - # mdast: - # type: root - # children: - # - type: block - # kind: notebook-code - # data: - # id: nb-cell-0 - # identifier: nb-cell-0 - # label: nb-cell-0 - # html_id: nb-cell-0 - # children: - # - type: code - # lang: python - # executable: true - # value: print('abc\n') - # identifier: nb-cell-0-code - # enumerator: 1 - # html_id: nb-cell-0-code - # - type: output - # id: T7FMDqDm8dM2bOT1tKeeM - # identifier: nb-cell-0-output - # html_id: nb-cell-0-output - # - type: code - # lang: python - # value: |- - # 5+5 - # print("hello world") - # ipynb: |- - # { - # "cells": [ - # { - # "cell_type": "code", - # "execution_count": null, - # "metadata": {}, - # "outputs": [], - # "source": [ - # "print('abc\\n')" - # ] - # }, - # { - # "cell_type": "markdown", - # "metadata": {}, - # "source": [ - # "```python\n", - # "5+5\n", - # "print(\"hello world\")\n", - # "```" - # ] - # } - # ], - # "metadata": { - # "language_info": { - # "name": "python" - # } - # }, - # "nbformat": 4, - # "nbformat_minor": 2 - # } - # - title: code - with metadata - # mdast: - # type: root - # children: - # - type: code - # lang: python - # meta: highlight-line="2" - # value: |- - # 5+5 - # print("hello world") - # ipynb: |- - # { - # "cells": [ - # { - # "cell_type": "markdown", - # "metadata": {}, - # "source": [ - # "```python highlight-line=\"2\"\n", - # "5+5\n", - # "print(\"hello world\")\n", - # "```" - # ] - # } - # ], - # "metadata": { - # "language_info": { - # "name": "python" - # } - # }, - # "nbformat": 4, - # "nbformat_minor": 2 - # } - # - title: definition - # mdast: - # type: root - # children: - # - type: definition - # identifier: my-def - # label: My-Def - # url: https://example.com - # title: Example - # ipynb: |- - # { - # "cells": [ - # { - # "cell_type": "markdown", - # "metadata": {}, - # "source": [ - # "[My-Def]: https://example.com \"Example\"" - # ] - # } - # ], - # "metadata": { - # "language_info": { - # "name": "python" - # } - # }, - # "nbformat": 4, - # "nbformat_minor": 2 - # } - # - title: break - # mdast: - # type: root - # children: - # - type: paragraph - # children: - # - type: text - # value: Some markdown - # - type: break - # - type: text - # value: Some more markdown - # ipynb: |- - # { - # "cells": [ - # { - # "cell_type": "markdown", - # "metadata": {}, - # "source": [ - # "Some markdown\\\n", - # "Some more markdown" - # ] - # } - # ], - # "metadata": { - # "language_info": { - # "name": "python" - # } - # }, - # "nbformat": 4, - # "nbformat_minor": 2 - # } - # - title: link - # mdast: - # type: root - # children: - # - type: link - # url: https://example.com - # title: my link - # children: - # - type: text - # value: 'Some % ' - # - type: emphasis - # children: - # - type: text - # value: markdown - # ipynb: |- - # { - # "cells": [ - # { - # "cell_type": "markdown", - # "metadata": {}, - # "source": [ - # "[Some % *markdown*](https://example.com \"my link\")" - # ] - # } - # ], - # "metadata": { - # "language_info": { - # "name": "python" - # } - # }, - # "nbformat": 4, - # "nbformat_minor": 2 - # } - # - title: link reference - # mdast: - # type: root - # children: - # - type: linkReference - # identifier: my-link - # label: My-Link - # children: - # - type: text - # value: 'Some % ' - # - type: emphasis - # children: - # - type: text - # value: markdown - # ipynb: |- - # { - # "cells": [ - # { - # "cell_type": "markdown", - # "metadata": {}, - # "source": [ - # "[Some % *markdown*][My-Link]" - # ] - # } - # ], - # "metadata": { - # "language_info": { - # "name": "python" - # } - # }, - # "nbformat": 4, - # "nbformat_minor": 2 - # } - # - title: image reference - # mdast: - # type: root - # children: - # - type: imageReference - # identifier: my-image - # label: My-Image - # alt: Some text - # ipynb: |- - # { - # "cells": [ - # { - # "cell_type": "markdown", - # "metadata": {}, - # "source": [ - # "![Some text][My-Image]" - # ] - # } - # ], - # "metadata": { - # "language_info": { - # "name": "python" - # } - # }, - # "nbformat": 4, - # "nbformat_minor": 2 - # } + - title: thematic break + mdast: + type: root + children: + - type: thematicBreak + ipynb: + cells: + - cell_type: markdown + metadata: {} + source: + - "---" + metadata: + language_info: + name: python + nbformat: 4 + nbformat_minor: 2 + + - title: block quote + mdast: + type: root + children: + - type: blockquote + children: + - type: paragraph + children: + - type: text + value: 'Some % ' + - type: emphasis + children: + - type: text + value: markdown + ipynb: + cells: + - cell_type: markdown + metadata: {} + source: + - "> Some % *markdown*" + metadata: + language_info: + name: python + nbformat: 4 + nbformat_minor: 2 + + - title: unordered list + mdast: + type: root + children: + - type: list + ordered: false + children: + - type: listItem + children: + - type: paragraph + children: + - type: text + value: Item one + - type: listItem + children: + - type: paragraph + children: + - type: text + value: Item two + ipynb: + cells: + - cell_type: markdown + metadata: {} + source: + - "* Item one\n" + - "\n" + - "* Item two" + metadata: + language_info: + name: python + nbformat: 4 + nbformat_minor: 2 + + - title: html + mdast: + type: root + children: + - type: html + value: '
*Not markdown*
' + ipynb: + cells: + - cell_type: markdown + metadata: {} + source: + - "
*Not markdown*
" + metadata: + language_info: + name: python + nbformat: 4 + nbformat_minor: 2 + + - title: code - plain fenced + mdast: + type: root + children: + - type: code + value: "5+5\nprint(\"hello world\\n\")" + ipynb: + cells: + - cell_type: markdown + metadata: {} + source: + - "```\n" + - "5+5\n" + - "print(\"hello world\\n\")\n" + - "```" + metadata: + language_info: + name: python + nbformat: 4 + nbformat_minor: 2 + + - title: code cell in block + mdast: + type: root + children: + - type: block + kind: notebook-code + children: + - type: code + lang: python + executable: true + value: 'print("hello")' + - type: output + id: test-output + ipynb: + cells: + - cell_type: code + execution_count: null + metadata: {} + outputs: [] + source: + - 'print("hello")' + metadata: + language_info: + name: python + nbformat: 4 + nbformat_minor: 2 + + - title: mixed markdown and code cells + mdast: + type: root + children: + - type: heading + depth: 1 + children: + - type: text + value: Title + - type: block + kind: notebook-code + children: + - type: code + lang: python + executable: true + value: x = 1 + - type: paragraph + children: + - type: text + value: After code + ipynb: + cells: + - cell_type: markdown + metadata: {} + source: + - "# Title" + - cell_type: code + execution_count: null + metadata: {} + outputs: [] + source: + - "x = 1" + - cell_type: markdown + metadata: {} + source: + - "After code" + metadata: + language_info: + name: python + nbformat: 4 + nbformat_minor: 2 + + - title: block markers stripped + mdast: + type: root + children: + - type: block + children: + - type: paragraph + children: + - type: text + value: Content after marker + ipynb: + cells: + - cell_type: markdown + metadata: {} + source: + - "Content after marker" + metadata: + language_info: + name: python + nbformat: 4 + nbformat_minor: 2 + + - title: link with title + mdast: + type: root + children: + - type: paragraph + children: + - type: link + url: https://example.com + title: my link + children: + - type: text + value: Click here + ipynb: + cells: + - cell_type: markdown + metadata: {} + source: + - '[Click here](https://example.com "my link")' + metadata: + language_info: + name: python + nbformat: 4 + nbformat_minor: 2 + + - title: image + mdast: + type: root + children: + - type: image + url: fig.png + alt: A figure + ipynb: + cells: + - cell_type: markdown + metadata: {} + source: + - "![A figure](fig.png)" + metadata: + language_info: + name: python + nbformat: 4 + nbformat_minor: 2 + + - title: break in paragraph + mdast: + type: root + children: + - type: paragraph + children: + - type: text + value: Some markdown + - type: break + - type: text + value: Some more markdown + ipynb: + cells: + - cell_type: markdown + metadata: {} + source: + - "Some markdown\\\n" + - "Some more markdown" + metadata: + language_info: + name: python + nbformat: 4 + nbformat_minor: 2 diff --git a/packages/myst-to-ipynb/tests/commonmark.yml b/packages/myst-to-ipynb/tests/commonmark.yml new file mode 100644 index 0000000000..cf1bf2a202 --- /dev/null +++ b/packages/myst-to-ipynb/tests/commonmark.yml @@ -0,0 +1,376 @@ +title: myst-to-ipynb CommonMark mode +cases: + - title: inline math converted to dollar signs + options: + markdown: commonmark + mdast: + type: root + children: + - type: paragraph + children: + - type: text + value: 'The value ' + - type: inlineMath + value: 'E = mc^2' + - type: text + value: ' is famous.' + ipynb: + cells: + - cell_type: markdown + metadata: {} + source: + - "The value $E = mc^2$ is famous." + metadata: + language_info: + name: python + nbformat: 4 + nbformat_minor: 2 + + - title: math block converted to dollar-dollar + options: + markdown: commonmark + mdast: + type: root + children: + - type: math + value: "\\int_0^\\infty e^{-x^2} dx" + label: eq-gauss + ipynb: + cells: + - cell_type: markdown + metadata: {} + source: + - "$$\n" + - "\\int_0^\\infty e^{-x^2} dx\n" + - "$$ (eq-gauss)" + metadata: + language_info: + name: python + nbformat: 4 + nbformat_minor: 2 + + - title: math block without label + options: + markdown: commonmark + mdast: + type: root + children: + - type: math + value: 'a^2 + b^2 = c^2' + ipynb: + cells: + - cell_type: markdown + metadata: {} + source: + - "$$\n" + - "a^2 + b^2 = c^2\n" + - "$$" + metadata: + language_info: + name: python + nbformat: 4 + nbformat_minor: 2 + + - title: admonition converted to blockquote + options: + markdown: commonmark + mdast: + type: root + children: + - type: admonition + kind: note + children: + - type: admonitionTitle + children: + - type: text + value: Important + - type: paragraph + children: + - type: text + value: This is a note. + ipynb: + cells: + - cell_type: markdown + metadata: {} + source: + - "> **Important**\n" + - ">\n" + - "> This is a note." + metadata: + language_info: + name: python + nbformat: 4 + nbformat_minor: 2 + + - title: admonition preserved in myst mode + mdast: + type: root + children: + - type: admonition + kind: note + children: + - type: admonitionTitle + children: + - type: text + value: Important + - type: paragraph + children: + - type: text + value: This is a note. + ipynb: + cells: + - cell_type: markdown + metadata: {} + source: + - ":::{note} Important\n" + - "This is a note.\n" + - ":::" + metadata: + language_info: + name: python + nbformat: 4 + nbformat_minor: 2 + + - title: exercise with enumerator + options: + markdown: commonmark + mdast: + type: root + children: + - type: exercise + enumerator: '1' + children: + - type: admonitionTitle + children: + - type: text + value: Exercise + - type: paragraph + children: + - type: text + value: 'Solve ' + - type: inlineMath + value: 'x^2 = 1' + ipynb: + cells: + - cell_type: markdown + metadata: {} + source: + - "**Exercise 1**\n" + - "\n" + - "Solve $x^2 = 1$" + metadata: + language_info: + name: python + nbformat: 4 + nbformat_minor: 2 + + - title: theorem with title + options: + markdown: commonmark + mdast: + type: root + children: + - type: proof + kind: theorem + enumerator: '1' + children: + - type: admonitionTitle + children: + - type: text + value: Pythagorean + - type: paragraph + children: + - type: inlineMath + value: 'a^2 + b^2 = c^2' + ipynb: + cells: + - cell_type: markdown + metadata: {} + source: + - "**Theorem 1 (Pythagorean)**\n" + - "\n" + - "$a^2 + b^2 = c^2$" + metadata: + language_info: + name: python + nbformat: 4 + nbformat_minor: 2 + + - title: tabSet flattened to bold titles + options: + markdown: commonmark + mdast: + type: root + children: + - type: tabSet + children: + - type: tabItem + title: Python + children: + - type: code + lang: python + value: 'print("hi")' + - type: tabItem + title: Julia + children: + - type: code + lang: julia + value: 'println("hi")' + ipynb: + cells: + - cell_type: markdown + metadata: {} + source: + - "**Python**\n" + - "\n" + - "```python\n" + - "print(\"hi\")\n" + - "```\n" + - "\n" + - "**Julia**\n" + - "\n" + - "```julia\n" + - "println(\"hi\")\n" + - "```" + metadata: + language_info: + name: python + nbformat: 4 + nbformat_minor: 2 + + - title: solution dropped when configured + options: + markdown: commonmark + commonmark: + dropSolutions: true + mdast: + type: root + children: + - type: solution + children: + - type: admonitionTitle + children: + - type: text + value: Solution + - type: paragraph + children: + - type: text + value: The answer is 42. + ipynb: + cells: + - cell_type: markdown + metadata: {} + source: + - "" + metadata: + language_info: + name: python + nbformat: 4 + nbformat_minor: 2 + + - title: solution kept by default in commonmark mode + options: + markdown: commonmark + mdast: + type: root + children: + - type: solution + children: + - type: admonitionTitle + children: + - type: text + value: Solution to Exercise 1 + - type: paragraph + children: + - type: text + value: The answer is 42. + ipynb: + cells: + - cell_type: markdown + metadata: {} + source: + - "**Solution to Exercise 1**\n" + - "\n" + - "The answer is 42." + metadata: + language_info: + name: python + nbformat: 4 + nbformat_minor: 2 + + - title: inline math with underscores not escaped + options: + markdown: commonmark + mdast: + type: root + children: + - type: paragraph + children: + - type: inlineMath + value: 'x_1 + x_2' + ipynb: + cells: + - cell_type: markdown + metadata: {} + source: + - "$x_1 + x_2$" + metadata: + language_info: + name: python + nbformat: 4 + nbformat_minor: 2 + + - title: math block with underscores not escaped + options: + markdown: commonmark + mdast: + type: root + children: + - type: math + value: "\\int_0^\\infty e^{-x^2} dx = \\frac{\\sqrt{\\pi}}{2}" + ipynb: + cells: + - cell_type: markdown + metadata: {} + source: + - "$$\n" + - "\\int_0^\\infty e^{-x^2} dx = \\frac{\\sqrt{\\pi}}{2}\n" + - "$$" + metadata: + language_info: + name: python + nbformat: 4 + nbformat_minor: 2 + + - title: CommonMark with frontmatter kernelspec + frontmatter: + kernelspec: + name: python3 + display_name: Python 3 + language: python + options: + markdown: commonmark + mdast: + type: root + children: + - type: paragraph + children: + - type: text + value: 'Value: ' + - type: inlineMath + value: 'x = 1' + ipynb: + cells: + - cell_type: markdown + metadata: {} + source: + - "Value: $x = 1$" + metadata: + language_info: + name: python + kernelspec: + name: python3 + display_name: Python 3 + language: python + nbformat: 4 + nbformat_minor: 2 diff --git a/packages/myst-to-ipynb/tests/frontmatter.yml b/packages/myst-to-ipynb/tests/frontmatter.yml new file mode 100644 index 0000000000..9d48cacdc8 --- /dev/null +++ b/packages/myst-to-ipynb/tests/frontmatter.yml @@ -0,0 +1,108 @@ +title: myst-to-ipynb frontmatter and metadata +cases: + - title: empty frontmatter defaults to python + mdast: + type: root + children: + - type: paragraph + children: + - type: text + value: Hello world! + ipynb: + cells: + - cell_type: markdown + metadata: {} + source: + - "Hello world!" + metadata: + language_info: + name: python + nbformat: 4 + nbformat_minor: 2 + + - title: kernelspec from frontmatter + frontmatter: + kernelspec: + name: julia-1.10 + display_name: Julia 1.10 + language: julia + mdast: + type: root + children: + - type: paragraph + children: + - type: text + value: Hello + ipynb: + cells: + - cell_type: markdown + metadata: {} + source: + - "Hello" + metadata: + language_info: + name: julia + kernelspec: + name: julia-1.10 + display_name: Julia 1.10 + language: julia + nbformat: 4 + nbformat_minor: 2 + + - title: kernelspec python3 + frontmatter: + kernelspec: + name: python3 + display_name: Python 3 + language: python + mdast: + type: root + children: + - type: paragraph + children: + - type: text + value: Test + ipynb: + cells: + - cell_type: markdown + metadata: {} + source: + - "Test" + metadata: + language_info: + name: python + kernelspec: + name: python3 + display_name: Python 3 + language: python + nbformat: 4 + nbformat_minor: 2 + + - title: kernelspec R language + frontmatter: + kernelspec: + name: ir + display_name: R + language: R + mdast: + type: root + children: + - type: paragraph + children: + - type: text + value: R notebook + ipynb: + cells: + - cell_type: markdown + metadata: {} + source: + - "R notebook" + metadata: + language_info: + name: R + kernelspec: + name: ir + display_name: R + language: R + nbformat: 4 + nbformat_minor: 2 diff --git a/packages/myst-to-ipynb/tests/run.spec.ts b/packages/myst-to-ipynb/tests/run.spec.ts index c128772da1..9df4d579d5 100644 --- a/packages/myst-to-ipynb/tests/run.spec.ts +++ b/packages/myst-to-ipynb/tests/run.spec.ts @@ -4,11 +4,15 @@ import path from 'node:path'; import yaml from 'js-yaml'; import { unified } from 'unified'; import writeIpynb from '../src'; +import type { PageFrontmatter } from 'myst-frontmatter'; +import type { IpynbOptions } from '../src'; type TestCase = { title: string; ipynb: Record; mdast: Record; + frontmatter?: PageFrontmatter; + options?: IpynbOptions; }; type TestCases = { @@ -28,8 +32,8 @@ casesList.forEach(({ title, cases }) => { describe(title, () => { test.each(cases.map((c): [string, TestCase] => [c.title, c]))( '%s', - (_, { ipynb, mdast }) => { - const pipe = unified().use(writeIpynb); + (_, { ipynb, mdast, frontmatter, options }) => { + const pipe = unified().use(writeIpynb, frontmatter, options); pipe.runSync(mdast as any); const file = pipe.stringify(mdast as any); expect(JSON.parse(file.result)).toEqual(ipynb); @@ -37,34 +41,3 @@ casesList.forEach(({ title, cases }) => { ); }); }); - -describe('myst-to-ipynb frontmatter', () => { - test('empty frontmatter passes', () => { - const pipe = unified().use(writeIpynb, {}); - const mdast = { - type: 'root', - children: [{ type: 'paragraph', children: [{ type: 'text', value: 'Hello world!' }] }], - }; - pipe.runSync(mdast as any); - const file = pipe.stringify(mdast as any); - expect(file.result).toEqual(`{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Hello world!" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -}` - ); - }); -}); From 51d65248e46a09f1eefe1a107295bdd274ba3cf0 Mon Sep 17 00:00:00 2001 From: Matt McKay Date: Wed, 25 Feb 2026 13:48:44 +1100 Subject: [PATCH 11/27] fix: strip identifier/label from nodes, drop mystTarget/comment, filter empty cells Real-world validation with QuantEcon lecture content revealed: - myst-to-md labelWrapper was adding (identifier)= prefixes to headings, paragraphs, blockquotes, and lists with identifier/label properties - mystTarget nodes need to be dropped in CommonMark mode - comment nodes (% syntax) need to be dropped in CommonMark mode - code blocks with extra MyST attributes rendered as code-block directives - +++ block markers appearing mid-cell (not just leading) - Empty markdown cells from dropped nodes should be filtered out Changes: - commonmark.ts: strip identifier/label from all transformed children, add mystTarget and comment handlers, add code handler - index.ts: filter empty markdown cells, fix stripBlockMarkers /gm regex - commonmark.yml: add 5 new tests, update solution-dropped test --- packages/myst-to-ipynb/src/commonmark.ts | 19 +++ packages/myst-to-ipynb/src/index.ts | 59 +++++---- packages/myst-to-ipynb/tests/commonmark.yml | 134 +++++++++++++++++++- 3 files changed, 182 insertions(+), 30 deletions(-) diff --git a/packages/myst-to-ipynb/src/commonmark.ts b/packages/myst-to-ipynb/src/commonmark.ts index c1cd725add..bd2c54d3ef 100644 --- a/packages/myst-to-ipynb/src/commonmark.ts +++ b/packages/myst-to-ipynb/src/commonmark.ts @@ -409,6 +409,15 @@ export function transformToCommonMark( } } tree.children = newChildren; + + // Strip identifier/label from all transformed children to prevent + // myst-to-md's labelWrapper from adding `(identifier)=\n` prefixes + // to headings, paragraphs, blockquotes, lists, etc. + // This runs AFTER transformNode so transforms can still use label/identifier. + for (const child of tree.children) { + delete child.identifier; + delete child.label; + } } return tree; @@ -459,6 +468,16 @@ function transformNode( return transformMystDirective(node); case 'mystRole': return transformMystRole(node); + case 'mystTarget': + // Drop MyST target labels — they have no CommonMark equivalent + return null; + case 'comment': + // Drop MyST comments (% comment syntax) — not valid in CommonMark + return null; + case 'code': + // Strip extra MyST attributes (class, emphasize-lines, etc.) so myst-to-md + // renders this as a plain fenced code block instead of a ```{code-block} directive + return transformCodeBlock(node); default: return node; } diff --git a/packages/myst-to-ipynb/src/index.ts b/packages/myst-to-ipynb/src/index.ts index 65b6de9f93..3a076d6404 100644 --- a/packages/myst-to-ipynb/src/index.ts +++ b/packages/myst-to-ipynb/src/index.ts @@ -19,7 +19,7 @@ function sourceToStringList(src: string): string[] { * These are MyST-specific block separators that have no meaning in notebooks. */ function stripBlockMarkers(md: string): string { - return md.replace(/^\+\+\+[^\n]*\n/, ''); + return md.replace(/^\+\+\+[^\n]*\n/gm, ''); } export interface IpynbOptions { @@ -37,33 +37,42 @@ export function writeIpynb( ) { const markdownFormat = options?.markdown ?? 'myst'; - const cells = (node.children as Block[]).map((block: Block) => { - if (block.type === 'block' && block.kind === 'notebook-code') { - const code = select('code', block) as Code; + const cells = (node.children as Block[]) + .map((block: Block) => { + if (block.type === 'block' && block.kind === 'notebook-code') { + const code = select('code', block) as Code; + return { + cell_type: 'code' as const, + execution_count: null, + metadata: {}, + outputs: [], + source: sourceToStringList(code.value), + }; + } + // Build the sub-tree for this markdown cell + let blockTree: any = { type: 'root', children: [block] }; + if (markdownFormat === 'commonmark') { + blockTree = transformToCommonMark( + JSON.parse(JSON.stringify(blockTree)), + options?.commonmark, + ); + } + const md = writeMd(file, blockTree).result as string; + const cleanMd = stripBlockMarkers(md); return { - cell_type: 'code', - execution_count: null, + cell_type: 'markdown' as const, metadata: {}, - outputs: [], - source: sourceToStringList(code.value), + source: sourceToStringList(cleanMd), }; - } - // Build the sub-tree for this markdown cell - let blockTree: any = { type: 'root', children: [block] }; - if (markdownFormat === 'commonmark') { - blockTree = transformToCommonMark( - JSON.parse(JSON.stringify(blockTree)), - options?.commonmark, - ); - } - const md = writeMd(file, blockTree).result as string; - const cleanMd = stripBlockMarkers(md); - return { - cell_type: 'markdown', - metadata: {}, - source: sourceToStringList(cleanMd), - }; - }); + }) + .filter((cell) => { + // Remove empty markdown cells (e.g., from dropped mystTarget/comment nodes) + if (cell.cell_type === 'markdown') { + const content = cell.source.join('').trim(); + return content.length > 0; + } + return true; + }); // Build notebook metadata from frontmatter kernelspec when available const languageName = frontmatter?.kernelspec?.language ?? frontmatter?.kernelspec?.name ?? 'python'; diff --git a/packages/myst-to-ipynb/tests/commonmark.yml b/packages/myst-to-ipynb/tests/commonmark.yml index cf1bf2a202..2ec441b774 100644 --- a/packages/myst-to-ipynb/tests/commonmark.yml +++ b/packages/myst-to-ipynb/tests/commonmark.yml @@ -257,11 +257,7 @@ cases: - type: text value: The answer is 42. ipynb: - cells: - - cell_type: markdown - metadata: {} - source: - - "" + cells: [] metadata: language_info: name: python @@ -374,3 +370,131 @@ cases: language: python nbformat: 4 nbformat_minor: 2 + + - title: heading identifier stripped in commonmark mode + options: + markdown: commonmark + mdast: + type: root + children: + - type: heading + depth: 2 + identifier: my-section + label: my-section + children: + - type: text + value: My Section + ipynb: + cells: + - cell_type: markdown + metadata: {} + source: + - "## My Section" + metadata: + language_info: + name: python + nbformat: 4 + nbformat_minor: 2 + + - title: paragraph identifier stripped in commonmark mode + options: + markdown: commonmark + mdast: + type: root + children: + - type: paragraph + identifier: labeled-para + children: + - type: text + value: A labeled paragraph. + ipynb: + cells: + - cell_type: markdown + metadata: {} + source: + - "A labeled paragraph." + metadata: + language_info: + name: python + nbformat: 4 + nbformat_minor: 2 + + - title: mystTarget nodes dropped in commonmark mode + options: + markdown: commonmark + mdast: + type: root + children: + - type: mystTarget + label: my-target + - type: paragraph + children: + - type: text + value: After target. + ipynb: + cells: + - cell_type: markdown + metadata: {} + source: + - "After target." + metadata: + language_info: + name: python + nbformat: 4 + nbformat_minor: 2 + + - title: comment nodes dropped in commonmark mode + options: + markdown: commonmark + mdast: + type: root + children: + - type: paragraph + children: + - type: text + value: Before comment. + - type: comment + value: This is a MyST comment + - type: paragraph + children: + - type: text + value: After comment. + ipynb: + cells: + - cell_type: markdown + metadata: {} + source: + - "Before comment." + - cell_type: markdown + metadata: {} + source: + - "After comment." + metadata: + language_info: + name: python + nbformat: 4 + nbformat_minor: 2 + + - title: code block with extra attributes stripped in commonmark mode + options: + markdown: commonmark + mdast: + type: root + children: + - type: code + lang: python + class: no-execute + value: "print('hello')" + ipynb: + cells: + - cell_type: markdown + metadata: {} + source: + - "```python\n" + - "print('hello')\n" + - "```" + metadata: + language_info: + name: python + nbformat: 4 + nbformat_minor: 2 From f6a858614fb12377f82d7e44e7d325dd50a5ebfc Mon Sep 17 00:00:00 2001 From: Matt McKay Date: Wed, 25 Feb 2026 14:03:02 +1100 Subject: [PATCH 12/27] feat: add image node handler to CommonMark transform Standalone {image} directives with class/width/align properties were being serialized as ```{image} directives by myst-to-md. Added transformImage handler that strips directive-specific properties so they render as plain ![alt](url) markdown syntax. Found during full-project validation against lecture-python-programming.myst (24 lectures, all clean after this fix). --- packages/myst-to-ipynb/src/commonmark.ts | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/packages/myst-to-ipynb/src/commonmark.ts b/packages/myst-to-ipynb/src/commonmark.ts index bd2c54d3ef..66448e913a 100644 --- a/packages/myst-to-ipynb/src/commonmark.ts +++ b/packages/myst-to-ipynb/src/commonmark.ts @@ -344,6 +344,20 @@ function transformCodeBlock(node: GenericNode): GenericNode { }; } +/** + * Convert an image node to a plain markdown image by stripping + * directive-specific properties (class, width, align) that cause + * myst-to-md to render it as a ```{image} directive. + */ +function transformImage(node: GenericNode): GenericNode { + return { + type: 'image', + url: node.url ?? node.urlSource ?? '', + alt: node.alt ?? '', + title: node.title, + }; +} + /** * Convert a mystDirective node to plain content or remove it. */ @@ -478,6 +492,10 @@ function transformNode( // Strip extra MyST attributes (class, emphasize-lines, etc.) so myst-to-md // renders this as a plain fenced code block instead of a ```{code-block} directive return transformCodeBlock(node); + case 'image': + // Strip directive-specific properties (class, width, align) so myst-to-md + // renders this as ![alt](url) instead of a ```{image} directive + return transformImage(node); default: return node; } From 85b6fcc0fab51f4b815ab7095a721314391c5735 Mon Sep 17 00:00:00 2001 From: Matt McKay Date: Wed, 25 Feb 2026 14:46:43 +1100 Subject: [PATCH 13/27] feat: add image attachment embedding option for ipynb export Add 'images: attachment' option that embeds local images as base64 cell attachments in exported notebooks, producing self-contained .ipynb files that don't depend on external image files. Architecture (two-phase hybrid): - Phase 1 (myst-cli): collectImageData() walks AST image nodes, resolves filesystem paths, reads files, and base64-encodes them - Phase 2 (myst-to-ipynb): embedImagesAsAttachments() rewrites serialized markdown image refs to attachment: references Usage in frontmatter: exports: - format: ipynb images: attachment New files: - packages/myst-to-ipynb/src/attachments.ts - packages/myst-to-ipynb/tests/attachments.spec.ts (7 tests) - packages/myst-to-ipynb/tests/attachments.yml (5 tests) 47/47 tests passing. --- packages/myst-cli/src/build/ipynb/index.ts | 76 +++++++- packages/myst-to-ipynb/src/attachments.ts | 103 ++++++++++ packages/myst-to-ipynb/src/index.ts | 44 +++++ .../myst-to-ipynb/tests/attachments.spec.ts | 85 +++++++++ packages/myst-to-ipynb/tests/attachments.yml | 176 ++++++++++++++++++ 5 files changed, 478 insertions(+), 6 deletions(-) create mode 100644 packages/myst-to-ipynb/src/attachments.ts create mode 100644 packages/myst-to-ipynb/tests/attachments.spec.ts create mode 100644 packages/myst-to-ipynb/tests/attachments.yml diff --git a/packages/myst-cli/src/build/ipynb/index.ts b/packages/myst-cli/src/build/ipynb/index.ts index dd23621b29..ef203361a2 100644 --- a/packages/myst-cli/src/build/ipynb/index.ts +++ b/packages/myst-cli/src/build/ipynb/index.ts @@ -1,9 +1,12 @@ +import fs from 'node:fs'; import path from 'node:path'; +import mime from 'mime-types'; import { tic, writeFileToFolder } from 'myst-cli-utils'; import { FRONTMATTER_ALIASES, PAGE_FRONTMATTER_KEYS } from 'myst-frontmatter'; import { writeIpynb } from 'myst-to-ipynb'; -import type { IpynbOptions } from 'myst-to-ipynb'; +import type { IpynbOptions, ImageData } from 'myst-to-ipynb'; import { filterKeys } from 'simple-validators'; +import { selectAll } from 'unist-util-select'; import { VFile } from 'vfile'; import { finalizeMdast } from '../../process/mdast.js'; import type { ISession } from '../../session/types.js'; @@ -12,6 +15,7 @@ import { KNOWN_IMAGE_EXTENSIONS } from '../../utils/resolveExtension.js'; import type { ExportWithOutput, ExportFnOptions } from '../types.js'; import { cleanOutput } from '../utils/cleanOutput.js'; import { getFileContent } from '../utils/getFileContent.js'; +import { getSourceFolder } from '../../transforms/links.js'; export async function runIpynbExport( session: ISession, @@ -44,14 +48,74 @@ export async function runIpynbExport( }); const vfile = new VFile(); vfile.path = output; - // Pass markdown format option from export config (e.g. `markdown: commonmark` in myst.yml) - const ipynbOpts: IpynbOptions | undefined = - (exportOptions as any).markdown === 'commonmark' - ? { markdown: 'commonmark' } - : undefined; + // Build ipynb options from export config + const ipynbOpts: IpynbOptions = {}; + if ((exportOptions as any).markdown === 'commonmark') { + ipynbOpts.markdown = 'commonmark'; + } + if ((exportOptions as any).images === 'attachment') { + ipynbOpts.images = 'attachment'; + // Collect image data from the AST — read files and base64-encode + ipynbOpts.imageData = collectImageData( + session, + mdast, + article.file, + ); + } const mdOut = writeIpynb(vfile, mdast as any, frontmatter, ipynbOpts); logMessagesFromVFile(session, mdOut); session.log.info(toc(`📓 Exported IPYNB in %s, copying to ${output}`)); writeFileToFolder(output, mdOut.result as string); return { tempFolders: [] }; } + +/** + * Collect base64-encoded image data from the mdast tree (Phase 1 of attachment embedding). + * + * Walks all image nodes via `selectAll('image', mdast)`, resolves their + * filesystem paths using `getSourceFolder` (handles both absolute `/_static/...` + * and relative paths), reads the files, and base64-encodes them into a map. + * + * The returned `Record` is passed to `writeIpynb` as + * `options.imageData`. Phase 2 (in `embedImagesAsAttachments`) then rewrites + * the serialized markdown to use `attachment:` references. + * + * Remote URLs (http/https) and data URIs are skipped — only local files are embedded. + */ +function collectImageData( + session: ISession, + mdast: any, + sourceFile: string, +): Record { + const imageData: Record = {}; + const imageNodes = selectAll('image', mdast) as any[]; + const sourcePath = session.sourcePath(); + + for (const img of imageNodes) { + const url = img.url ?? img.urlSource; + if (!url || url.startsWith('http://') || url.startsWith('https://') || url.startsWith('data:')) { + continue; + } + if (imageData[url]) continue; // already processed + + const sourceFolder = getSourceFolder(url, sourceFile, sourcePath); + const filePath = path.join(sourceFolder, url); + + try { + if (!fs.existsSync(filePath)) { + session.log.debug(`Image not found for attachment embedding: ${filePath}`); + continue; + } + const buffer = fs.readFileSync(filePath); + const mimeType = (mime.lookup(filePath) || 'application/octet-stream') as string; + imageData[url] = { + mime: mimeType, + data: buffer.toString('base64'), + }; + } catch (err) { + session.log.debug(`Failed to read image for attachment: ${filePath}`); + } + } + + return imageData; +} diff --git a/packages/myst-to-ipynb/src/attachments.ts b/packages/myst-to-ipynb/src/attachments.ts new file mode 100644 index 0000000000..b2d061b1e3 --- /dev/null +++ b/packages/myst-to-ipynb/src/attachments.ts @@ -0,0 +1,103 @@ +/** + * Image attachment embedding for ipynb export. + * + * Converts markdown image references `![alt](url)` into Jupyter cell + * attachments `![alt](attachment:name)` with base64-encoded image data + * stored in the cell's `attachments` field. + * + * This enables self-contained notebooks that don't depend on external + * image files — useful for distribution, Colab uploads, etc. + * + * Architecture (two-phase hybrid): + * + * Phase 1 — AST-driven collection (myst-cli, build/ipynb/index.ts): + * `collectImageData()` walks AST image nodes via `selectAll('image', mdast)`, + * resolves filesystem paths, reads files, and base64-encodes them into a + * `Record` map passed to `writeIpynb` as `options.imageData`. + * + * Phase 2 — Post-serialization rewriting (this module): + * `embedImagesAsAttachments()` runs AFTER `writeMd` has serialized the AST + * to a markdown string. It regex-matches `![alt](url)` patterns, looks up + * URLs in the `imageData` map, and rewrites them to `![alt](attachment:name)`. + * + * Why regex instead of AST rewriting? + * By the time we build cell attachments, `writeMd` has already consumed the AST + * and produced a markdown string. Rewriting at the AST level would require the + * transform phase to return per-cell attachment metadata alongside the tree, + * coupling the pure AST transform to notebook cell structure. The current split + * keeps `myst-to-ipynb` (pure, no filesystem) separate from `myst-cli` + * (filesystem-aware). + */ + +import type { ImageData } from './index.js'; + +/** + * Extract the basename (filename) from a URL or path. + */ +function basename(url: string): string { + // Strip query string and fragment + const clean = url.split('?')[0].split('#')[0]; + const parts = clean.split('/'); + return parts[parts.length - 1] || 'image'; +} + +/** + * Scan markdown text for image references, replace matching URLs with + * `attachment:` references, and build the cell attachments object. + * + * @param md - The markdown string to process + * @param imageData - Map of image URL → { mime, data } with base64-encoded content + * @returns Object with rewritten markdown and optional attachments dict + */ +export function embedImagesAsAttachments( + md: string, + imageData: Record, +): { md: string; attachments?: Record> } { + if (!imageData || Object.keys(imageData).length === 0) return { md }; + + const attachments: Record> = {}; + const usedNames = new Set(); + let updatedMd = md; + + // Match markdown image syntax: ![alt](url) and ![alt](url "title") + const imgRegex = /!\[([^\]]*)\]\(([^)\s]+)(?:\s+"[^"]*")?\)/g; + const replacements: Array<{ original: string; replacement: string }> = []; + + let match; + while ((match = imgRegex.exec(md)) !== null) { + const [fullMatch, alt, url] = match; + const data = imageData[url]; + if (!data) continue; + + // Generate a unique attachment name from the basename + const base = basename(url); + let name = base; + let counter = 1; + while (usedNames.has(name)) { + const dot = base.lastIndexOf('.'); + if (dot >= 0) { + name = `${base.slice(0, dot)}_${counter}${base.slice(dot)}`; + } else { + name = `${base}_${counter}`; + } + counter++; + } + usedNames.add(name); + + attachments[name] = { [data.mime]: data.data }; + replacements.push({ + original: fullMatch, + replacement: `![${alt}](attachment:${name})`, + }); + } + + // Apply replacements (iterate in reverse order to preserve positions) + for (const { original, replacement } of replacements) { + updatedMd = updatedMd.replace(original, replacement); + } + + if (Object.keys(attachments).length > 0) { + return { md: updatedMd, attachments }; + } + return { md }; +} diff --git a/packages/myst-to-ipynb/src/index.ts b/packages/myst-to-ipynb/src/index.ts index 3a076d6404..d9eba954a3 100644 --- a/packages/myst-to-ipynb/src/index.ts +++ b/packages/myst-to-ipynb/src/index.ts @@ -7,6 +7,7 @@ import { writeMd } from 'myst-to-md'; import { select } from 'unist-util-select'; import { transformToCommonMark } from './commonmark.js'; import type { CommonMarkOptions } from './commonmark.js'; +import { embedImagesAsAttachments } from './attachments.js'; function sourceToStringList(src: string): string[] { const lines = src.split('\n').map((s) => `${s}\n`); @@ -22,11 +23,37 @@ function stripBlockMarkers(md: string): string { return md.replace(/^\+\+\+[^\n]*\n/gm, ''); } +/** Image data for embedding as cell attachments */ +export interface ImageData { + /** MIME type (e.g. 'image/png') */ + mime: string; + /** Base64-encoded image data */ + data: string; +} + export interface IpynbOptions { /** Markdown format: 'myst' preserves MyST syntax, 'commonmark' converts to plain CommonMark */ markdown?: 'myst' | 'commonmark'; /** Options for CommonMark conversion */ commonmark?: CommonMarkOptions; + /** + * How to handle images: 'reference' keeps URL references (default), + * 'attachment' embeds as base64 cell attachments for self-contained notebooks. + * + * When 'attachment', image data is read from disk by `collectImageData()` + * in myst-cli (Phase 1), then post-serialization regex rewriting in + * `embedImagesAsAttachments()` converts `![alt](url)` → `![alt](attachment:name)` + * and adds the `attachments` field to each cell (Phase 2). + */ + images?: 'reference' | 'attachment'; + /** + * Map of image URL → { mime, data } for attachment embedding. + * Only used when `images` is 'attachment'. Populated by `collectImageData()` + * in myst-cli which walks AST image nodes and reads files from disk. + * Keys must match the image URLs as they appear in the serialized markdown + * (e.g. '/_static/img/foo.png'). + */ + imageData?: Record; } export function writeIpynb( @@ -59,6 +86,22 @@ export function writeIpynb( } const md = writeMd(file, blockTree).result as string; const cleanMd = stripBlockMarkers(md); + // Embed images as cell attachments if requested + if (options?.images === 'attachment' && options?.imageData) { + const { md: attachedMd, attachments } = embedImagesAsAttachments( + cleanMd, + options.imageData, + ); + const cell: Record = { + cell_type: 'markdown' as const, + metadata: {}, + source: sourceToStringList(attachedMd), + }; + if (attachments) { + cell.attachments = attachments; + } + return cell; + } return { cell_type: 'markdown' as const, metadata: {}, @@ -116,3 +159,4 @@ const plugin: Plugin<[PageFrontmatter?, IpynbOptions?], Root, VFile> = function export default plugin; export type { CommonMarkOptions } from './commonmark.js'; +export { embedImagesAsAttachments } from './attachments.js'; diff --git a/packages/myst-to-ipynb/tests/attachments.spec.ts b/packages/myst-to-ipynb/tests/attachments.spec.ts new file mode 100644 index 0000000000..da36cac927 --- /dev/null +++ b/packages/myst-to-ipynb/tests/attachments.spec.ts @@ -0,0 +1,85 @@ +import { describe, expect, test } from 'vitest'; +import { embedImagesAsAttachments } from '../src/attachments'; + +describe('embedImagesAsAttachments', () => { + test('replaces image URL with attachment reference', () => { + const md = '![Chart](/_static/img/chart.png)'; + const imageData = { + '/_static/img/chart.png': { mime: 'image/png', data: 'base64data' }, + }; + const result = embedImagesAsAttachments(md, imageData); + expect(result.md).toBe('![Chart](attachment:chart.png)'); + expect(result.attachments).toEqual({ + 'chart.png': { 'image/png': 'base64data' }, + }); + }); + + test('handles multiple images', () => { + const md = '![A](/_static/a.png)\n\n![B](/_static/b.jpg)'; + const imageData = { + '/_static/a.png': { mime: 'image/png', data: 'AAAA' }, + '/_static/b.jpg': { mime: 'image/jpeg', data: 'BBBB' }, + }; + const result = embedImagesAsAttachments(md, imageData); + expect(result.md).toBe('![A](attachment:a.png)\n\n![B](attachment:b.jpg)'); + expect(result.attachments).toEqual({ + 'a.png': { 'image/png': 'AAAA' }, + 'b.jpg': { 'image/jpeg': 'BBBB' }, + }); + }); + + test('deduplicates same-basename images with counter suffix', () => { + const md = '![A](/dir1/img.png)\n\n![B](/dir2/img.png)'; + const imageData = { + '/dir1/img.png': { mime: 'image/png', data: 'AAAA' }, + '/dir2/img.png': { mime: 'image/png', data: 'BBBB' }, + }; + const result = embedImagesAsAttachments(md, imageData); + expect(result.md).toBe('![A](attachment:img.png)\n\n![B](attachment:img_1.png)'); + expect(result.attachments).toEqual({ + 'img.png': { 'image/png': 'AAAA' }, + 'img_1.png': { 'image/png': 'BBBB' }, + }); + }); + + test('skips images not in imageData', () => { + const md = '![A](/a.png)\n\n![B](/b.png)'; + const imageData = { + '/a.png': { mime: 'image/png', data: 'AAAA' }, + }; + const result = embedImagesAsAttachments(md, imageData); + expect(result.md).toBe('![A](attachment:a.png)\n\n![B](/b.png)'); + expect(result.attachments).toEqual({ + 'a.png': { 'image/png': 'AAAA' }, + }); + }); + + test('returns no attachments when imageData is empty', () => { + const md = '![A](/a.png)'; + const result = embedImagesAsAttachments(md, {}); + expect(result.md).toBe('![A](/a.png)'); + expect(result.attachments).toBeUndefined(); + }); + + test('returns no attachments when no images match', () => { + const md = '![A](/a.png)'; + const imageData = { + '/other.png': { mime: 'image/png', data: 'XXXX' }, + }; + const result = embedImagesAsAttachments(md, imageData); + expect(result.md).toBe('![A](/a.png)'); + expect(result.attachments).toBeUndefined(); + }); + + test('handles image with no alt text', () => { + const md = '![](/_static/chart.png)'; + const imageData = { + '/_static/chart.png': { mime: 'image/png', data: 'DATA' }, + }; + const result = embedImagesAsAttachments(md, imageData); + expect(result.md).toBe('![](attachment:chart.png)'); + expect(result.attachments).toEqual({ + 'chart.png': { 'image/png': 'DATA' }, + }); + }); +}); diff --git a/packages/myst-to-ipynb/tests/attachments.yml b/packages/myst-to-ipynb/tests/attachments.yml new file mode 100644 index 0000000000..a8e31e1eb5 --- /dev/null +++ b/packages/myst-to-ipynb/tests/attachments.yml @@ -0,0 +1,176 @@ +title: Image Attachments +cases: + - title: single image with attachment embedding + options: + images: attachment + imageData: + /_static/img/chart.png: + mime: image/png + data: iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg== + mdast: + type: root + children: + - type: block + children: + - type: paragraph + children: + - type: text + value: "Here is a chart:" + - type: image + url: /_static/img/chart.png + alt: A chart + ipynb: + cells: + - cell_type: markdown + metadata: {} + attachments: + chart.png: + image/png: iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg== + source: + - "Here is a chart:\n" + - "\n" + - "![A chart](attachment:chart.png)" + metadata: + language_info: + name: python + nbformat: 4 + nbformat_minor: 2 + + - title: multiple images in one cell with attachment embedding + options: + images: attachment + imageData: + /_static/img/alpha.png: + mime: image/png + data: AAAA + /_static/img/beta.jpg: + mime: image/jpeg + data: BBBB + mdast: + type: root + children: + - type: block + children: + - type: paragraph + children: + - type: image + url: /_static/img/alpha.png + alt: Alpha + - type: paragraph + children: + - type: image + url: /_static/img/beta.jpg + alt: Beta + ipynb: + cells: + - cell_type: markdown + metadata: {} + attachments: + alpha.png: + image/png: AAAA + beta.jpg: + image/jpeg: BBBB + source: + - "![Alpha](attachment:alpha.png)\n" + - "\n" + - "![Beta](attachment:beta.jpg)" + metadata: + language_info: + name: python + nbformat: 4 + nbformat_minor: 2 + + - title: image without matching data stays as reference + options: + images: attachment + imageData: + /_static/img/other.png: + mime: image/png + data: CCCC + mdast: + type: root + children: + - type: block + children: + - type: paragraph + children: + - type: image + url: /_static/img/missing.png + alt: Missing + ipynb: + cells: + - cell_type: markdown + metadata: {} + source: + - "![Missing](/_static/img/missing.png)" + metadata: + language_info: + name: python + nbformat: 4 + nbformat_minor: 2 + + - title: images as reference (default) keeps URLs + options: + imageData: + /_static/img/chart.png: + mime: image/png + data: DDDD + mdast: + type: root + children: + - type: block + children: + - type: paragraph + children: + - type: image + url: /_static/img/chart.png + alt: Chart + ipynb: + cells: + - cell_type: markdown + metadata: {} + source: + - "![Chart](/_static/img/chart.png)" + metadata: + language_info: + name: python + nbformat: 4 + nbformat_minor: 2 + + - title: attachment embedding with commonmark conversion + options: + markdown: commonmark + images: attachment + imageData: + /_static/img/plot.png: + mime: image/png + data: EEEE + mdast: + type: root + children: + - type: block + children: + - type: paragraph + children: + - type: text + value: A plot + - type: image + url: /_static/img/plot.png + alt: My plot + class: width-80 + ipynb: + cells: + - cell_type: markdown + metadata: {} + attachments: + plot.png: + image/png: EEEE + source: + - "A plot\n" + - "\n" + - "![My plot](attachment:plot.png)" + metadata: + language_info: + name: python + nbformat: 4 + nbformat_minor: 2 From 34a59d61e24a35e5108feed5992de6fcf5b6ef4b Mon Sep 17 00:00:00 2001 From: Matt McKay Date: Wed, 25 Feb 2026 15:09:12 +1100 Subject: [PATCH 14/27] fix: lint formatting + add ipynb export documentation - Fix prettier formatting in commonmark.ts, index.ts, and myst-cli ipynb/index.ts - Add docs/creating-notebooks.md with full ipynb export documentation (CommonMark markdown, image attachments, export options table) - Add ipynb to export format table in docs/documents-exports.md - Add --ipynb CLI example to docs/documents-exports.md - Add ipynb to format list in docs/frontmatter.md - Add creating-notebooks.md to docs/myst.yml TOC - Update packages/myst-to-ipynb/README.md with features and usage --- docs/creating-notebooks.md | 117 +++++++++++++++++++++ docs/documents-exports.md | 7 +- docs/frontmatter.md | 2 +- docs/myst.yml | 1 + packages/myst-cli/src/build/ipynb/index.ts | 13 +-- packages/myst-to-ipynb/README.md | 30 +++++- packages/myst-to-ipynb/src/commonmark.ts | 30 ++---- packages/myst-to-ipynb/src/index.ts | 3 +- 8 files changed, 173 insertions(+), 30 deletions(-) create mode 100644 docs/creating-notebooks.md diff --git a/docs/creating-notebooks.md b/docs/creating-notebooks.md new file mode 100644 index 0000000000..19573ec181 --- /dev/null +++ b/docs/creating-notebooks.md @@ -0,0 +1,117 @@ +--- +title: Creating Jupyter Notebooks +description: Export MyST documents to Jupyter Notebook (.ipynb) format with optional CommonMark markdown and embedded images. +--- + +You can export MyST documents to Jupyter Notebook (`.ipynb`) format using `myst build`. The exported notebooks can use either MyST markdown (for use with [jupyterlab-myst](https://github.com/jupyter-book/jupyterlab-myst)) or plain CommonMark markdown compatible with vanilla Jupyter Notebook, JupyterLab, and Google Colab. + +## Basic usage + +Add an `exports` entry with `format: ipynb` to your page frontmatter: + +```{code-block} yaml +:filename: my-document.md +--- +exports: + - format: ipynb + output: exports/my-document.ipynb +--- +``` + +Build the notebook with: + +```bash +myst build my-document.md --ipynb +``` + +Or build all ipynb exports in the project: + +```bash +myst build --ipynb +``` + +## CommonMark markdown + +By default, exported notebooks use MyST markdown in their cells. If you need compatibility with environments that don't support MyST (vanilla Jupyter, Colab, etc.), set `markdown: commonmark`: + +```{code-block} yaml +:filename: my-document.md +--- +exports: + - format: ipynb + markdown: commonmark + output: exports/my-document.ipynb +--- +``` + +With `markdown: commonmark`, MyST-specific syntax is converted to plain CommonMark equivalents: + +```{list-table} CommonMark conversions +:header-rows: 1 +- * MyST syntax + * CommonMark output +- * `:::{note}` admonitions + * `> **Note**` blockquotes +- * `` {math}`E=mc^2` `` roles + * `$E=mc^2$` dollar math +- * `$$` math blocks + * `$$...$$` (preserved) +- * `:::{exercise}` directives + * **Exercise N** bold headers +- * `:::{proof:theorem}` directives + * **Theorem N** bold headers +- * Figures with captions + * `![alt](url)` with italic caption +- * Tab sets + * Bold tab titles with content +- * `{image}` directives + * `![alt](url)` images +- * `(label)=` targets + * Dropped (no CommonMark equivalent) +- * `% comments` + * Dropped +``` + +## Embedding images as cell attachments + +By default, images in exported notebooks reference external files. To create fully self-contained notebooks with images embedded as base64 cell attachments, set `images: attachment`: + +```{code-block} yaml +:filename: my-document.md +--- +exports: + - format: ipynb + markdown: commonmark + images: attachment + output: exports/my-document.ipynb +--- +``` + +With `images: attachment`: +- Local images are read from disk and base64-encoded +- Image references become `![alt](attachment:filename.png)` +- Each cell includes an `attachments` field with the image data +- Remote images (http/https URLs) are left as references + +This is useful for distributing notebooks, uploading to Google Colab, or sharing via email where external image files may not be available. + +## Export options + +```{list-table} ipynb export options +:header-rows: 1 +- * Option + * Values + * Description +- * `format` + * `ipynb` + * Required — specifies notebook export +- * `output` + * string + * Output filename or folder +- * `markdown` + * `myst` (default), `commonmark` + * Markdown format for notebook cells +- * `images` + * `reference` (default), `attachment` + * How to handle images — references or embedded attachments +``` diff --git a/docs/documents-exports.md b/docs/documents-exports.md index dcffdd55a6..2dacf30152 100644 --- a/docs/documents-exports.md +++ b/docs/documents-exports.md @@ -1,6 +1,6 @@ --- title: Exporting overview -description: Create an export for PDF, LaTeX, Typst, Docx, JATS, or CITATION.cff in your page or project frontmatter, and use `myst build` to build the export. +description: Create an export for PDF, LaTeX, Typst, Docx, JATS, Jupyter Notebook (ipynb), or CITATION.cff in your page or project frontmatter, and use `myst build` to build the export. --- You can export MyST content into one or more static documents, and optionally bundle them with a MyST website. This section gives an overview of the Exporting process and major configuration options. @@ -29,6 +29,8 @@ Below are supported export types and links to documentation for further reading: * [](./creating-citation-cff.md) - * `MyST Markdown` * [](#export:myst) +- * `Jupyter Notebook` + * [](./creating-notebooks.md) ``` ## Where to configure options for exports @@ -127,6 +129,9 @@ You can configure the CLI command in a number of ways: `myst build --pdf --docx` : Build `pdf` (LaTeX or Typst) exports and `docx` in the project +`myst build --ipynb` +: Build `ipynb` (Jupyter Notebook) exports in the project + `myst build my-paper.md` : Build all exports in a specific page diff --git a/docs/frontmatter.md b/docs/frontmatter.md index cce36dd895..c77e3eb587 100644 --- a/docs/frontmatter.md +++ b/docs/frontmatter.md @@ -438,7 +438,7 @@ For usage information, see [](./documents-exports.md). * - `id` - a string - a local identifier that can be used to reference the export * - `format` - - one of `pdf` (built with $\LaTeX$ or Typst, depending on the template), `tex` (raw $\LaTeX$ files), `pdf+tex` (both PDF and raw $\LaTeX$ files) `typst` (raw Typst files and built PDF file), `docx`, `md`, `jats`, or `meca` + - one of `pdf` (built with $\LaTeX$ or Typst, depending on the template), `tex` (raw $\LaTeX$ files), `pdf+tex` (both PDF and raw $\LaTeX$ files) `typst` (raw Typst files and built PDF file), `docx`, `md`, `jats`, `meca`, or `ipynb` * - `template` - a string - name of an existing [MyST template](https://github.com/myst-templates) or a local path to a template folder. Templates are only available for `pdf`, `tex`, `typst`, and `docx` formats. * - `output` diff --git a/docs/myst.yml b/docs/myst.yml index d89e1de0fe..c02e17de15 100644 --- a/docs/myst.yml +++ b/docs/myst.yml @@ -122,6 +122,7 @@ project: - file: creating-word-documents.md - file: creating-jats-xml.md - file: creating-citation-cff.md + - file: creating-notebooks.md - file: plugins.md children: - file: javascript-plugins.md diff --git a/packages/myst-cli/src/build/ipynb/index.ts b/packages/myst-cli/src/build/ipynb/index.ts index ef203361a2..0b321f75ac 100644 --- a/packages/myst-cli/src/build/ipynb/index.ts +++ b/packages/myst-cli/src/build/ipynb/index.ts @@ -56,11 +56,7 @@ export async function runIpynbExport( if ((exportOptions as any).images === 'attachment') { ipynbOpts.images = 'attachment'; // Collect image data from the AST — read files and base64-encode - ipynbOpts.imageData = collectImageData( - session, - mdast, - article.file, - ); + ipynbOpts.imageData = collectImageData(session, mdast, article.file); } const mdOut = writeIpynb(vfile, mdast as any, frontmatter, ipynbOpts); logMessagesFromVFile(session, mdOut); @@ -93,7 +89,12 @@ function collectImageData( for (const img of imageNodes) { const url = img.url ?? img.urlSource; - if (!url || url.startsWith('http://') || url.startsWith('https://') || url.startsWith('data:')) { + if ( + !url || + url.startsWith('http://') || + url.startsWith('https://') || + url.startsWith('data:') + ) { continue; } if (imageData[url]) continue; // already processed diff --git a/packages/myst-to-ipynb/README.md b/packages/myst-to-ipynb/README.md index 516b66e962..8d90241dd1 100644 --- a/packages/myst-to-ipynb/README.md +++ b/packages/myst-to-ipynb/README.md @@ -1,3 +1,31 @@ # myst-to-ipynb -Convert a MyST AST to ipynb notebook. +Convert a MyST AST to Jupyter Notebook (`.ipynb`) format. + +Part of the [mystmd](https://github.com/jupyter-book/mystmd) monorepo. + +## Features + +- **MyST markdown** (default) — preserves MyST syntax for use with [jupyterlab-myst](https://github.com/jupyter-book/jupyterlab-myst) +- **CommonMark markdown** (`markdown: commonmark`) — converts MyST directives/roles to plain CommonMark for vanilla Jupyter, JupyterLab, and Google Colab +- **Image attachments** (`images: attachment`) — embeds local images as base64 cell attachments for self-contained notebooks + +## Usage + +Configure exports in your page frontmatter: + +```yaml +exports: + - format: ipynb + markdown: commonmark + images: attachment + output: exports/my-document.ipynb +``` + +Build with: + +```bash +myst build --ipynb +``` + +See the [Creating Jupyter Notebooks](https://mystmd.org/guide/creating-notebooks) documentation for full details. diff --git a/packages/myst-to-ipynb/src/commonmark.ts b/packages/myst-to-ipynb/src/commonmark.ts index 66448e913a..2f2de70e7a 100644 --- a/packages/myst-to-ipynb/src/commonmark.ts +++ b/packages/myst-to-ipynb/src/commonmark.ts @@ -99,9 +99,7 @@ function transformFigure(node: GenericNode): GenericNode { if (captionNode?.children?.length) { children.push({ type: 'paragraph', - children: [ - { type: 'emphasis', children: captionNode.children }, - ], + children: [{ type: 'emphasis', children: captionNode.children }], }); } @@ -240,9 +238,7 @@ function transformTabSet(node: GenericNode): GenericNode { if (tabItem.title) { children.push({ type: 'paragraph', - children: [ - { type: 'strong', children: [{ type: 'text', value: tabItem.title }] }, - ], + children: [{ type: 'strong', children: [{ type: 'text', value: tabItem.title }] }], }); } // Add tab content @@ -271,7 +267,10 @@ function transformCard(node: GenericNode): GenericNode { children.push({ type: 'paragraph', children: [ - { type: 'strong', children: titleNode.children ?? [{ type: 'text', value: toText(titleNode) }] }, + { + type: 'strong', + children: titleNode.children ?? [{ type: 'text', value: toText(titleNode) }], + }, ], }); } @@ -293,8 +292,7 @@ function transformGrid(node: GenericNode): GenericNode { */ function transformDetails(node: GenericNode): GenericNode { const summaryNode = node.children?.find((c: GenericNode) => c.type === 'summary'); - const contentChildren = - node.children?.filter((c: GenericNode) => c.type !== 'summary') ?? []; + const contentChildren = node.children?.filter((c: GenericNode) => c.type !== 'summary') ?? []; const titleText = summaryNode ? toText(summaryNode) : 'Details'; @@ -394,18 +392,13 @@ export interface CommonMarkOptions { * This modifies the tree in-place by replacing children arrays. * Returns the (possibly replaced) root node. */ -export function transformToCommonMark( - tree: GenericNode, - opts?: CommonMarkOptions, -): GenericNode { +export function transformToCommonMark(tree: GenericNode, opts?: CommonMarkOptions): GenericNode { const dropSolutions = opts?.dropSolutions ?? false; // Process children recursively (bottom-up so nested directives are handled first) if (tree.children) { // First, recurse into children - tree.children = tree.children.map((child: GenericNode) => - transformToCommonMark(child, opts), - ); + tree.children = tree.children.map((child: GenericNode) => transformToCommonMark(child, opts)); // Then, transform this node's children — replacing nodes that need conversion const newChildren: GenericNode[] = []; @@ -442,10 +435,7 @@ export function transformToCommonMark( * Returns the node unchanged if no transformation is needed. * Returns null if the node should be removed. */ -function transformNode( - node: GenericNode, - dropSolutions: boolean, -): GenericNode | null { +function transformNode(node: GenericNode, dropSolutions: boolean): GenericNode | null { switch (node.type) { case 'admonition': return transformAdmonition(node); diff --git a/packages/myst-to-ipynb/src/index.ts b/packages/myst-to-ipynb/src/index.ts index d9eba954a3..54a0935d22 100644 --- a/packages/myst-to-ipynb/src/index.ts +++ b/packages/myst-to-ipynb/src/index.ts @@ -118,7 +118,8 @@ export function writeIpynb( }); // Build notebook metadata from frontmatter kernelspec when available - const languageName = frontmatter?.kernelspec?.language ?? frontmatter?.kernelspec?.name ?? 'python'; + const languageName = + frontmatter?.kernelspec?.language ?? frontmatter?.kernelspec?.name ?? 'python'; const metadata: Record = { language_info: { name: languageName, From fe2c295daf9cb2afefd55473a78b2a8bb7af45a9 Mon Sep 17 00:00:00 2001 From: Matt McKay Date: Wed, 25 Feb 2026 15:16:16 +1100 Subject: [PATCH 15/27] fix: break circular dependency between attachments.ts and index.ts Move ImageData interface to shared types.ts so attachments.ts and index.ts no longer import from each other. Fixes madge lint:circular check. --- packages/myst-to-ipynb/src/attachments.ts | 2 +- packages/myst-to-ipynb/src/index.ts | 10 ++-------- packages/myst-to-ipynb/src/types.ts | 7 +++++++ 3 files changed, 10 insertions(+), 9 deletions(-) create mode 100644 packages/myst-to-ipynb/src/types.ts diff --git a/packages/myst-to-ipynb/src/attachments.ts b/packages/myst-to-ipynb/src/attachments.ts index b2d061b1e3..795bb75ddb 100644 --- a/packages/myst-to-ipynb/src/attachments.ts +++ b/packages/myst-to-ipynb/src/attachments.ts @@ -29,7 +29,7 @@ * (filesystem-aware). */ -import type { ImageData } from './index.js'; +import type { ImageData } from './types.js'; /** * Extract the basename (filename) from a URL or path. diff --git a/packages/myst-to-ipynb/src/index.ts b/packages/myst-to-ipynb/src/index.ts index 54a0935d22..7687478a0e 100644 --- a/packages/myst-to-ipynb/src/index.ts +++ b/packages/myst-to-ipynb/src/index.ts @@ -8,6 +8,8 @@ import { select } from 'unist-util-select'; import { transformToCommonMark } from './commonmark.js'; import type { CommonMarkOptions } from './commonmark.js'; import { embedImagesAsAttachments } from './attachments.js'; +export type { ImageData } from './types.js'; +import type { ImageData } from './types.js'; function sourceToStringList(src: string): string[] { const lines = src.split('\n').map((s) => `${s}\n`); @@ -23,14 +25,6 @@ function stripBlockMarkers(md: string): string { return md.replace(/^\+\+\+[^\n]*\n/gm, ''); } -/** Image data for embedding as cell attachments */ -export interface ImageData { - /** MIME type (e.g. 'image/png') */ - mime: string; - /** Base64-encoded image data */ - data: string; -} - export interface IpynbOptions { /** Markdown format: 'myst' preserves MyST syntax, 'commonmark' converts to plain CommonMark */ markdown?: 'myst' | 'commonmark'; diff --git a/packages/myst-to-ipynb/src/types.ts b/packages/myst-to-ipynb/src/types.ts new file mode 100644 index 0000000000..12b7e90f8a --- /dev/null +++ b/packages/myst-to-ipynb/src/types.ts @@ -0,0 +1,7 @@ +/** Image data for embedding as cell attachments */ +export interface ImageData { + /** MIME type (e.g. 'image/png') */ + mime: string; + /** Base64-encoded image data */ + data: string; +} From 27c2c15cf4c907d198062305fb5950c17672760b Mon Sep 17 00:00:00 2001 From: Matt McKay Date: Wed, 25 Feb 2026 15:44:13 +1100 Subject: [PATCH 16/27] fix: strip leading slash from image URLs + fix misleading comment Address Copilot review comments: - Strip leading '/' from image URLs before path.join in collectImageData() so project-root URLs like '/_static/img/foo.png' resolve correctly. - Fix misleading 'reverse order' comment in embedImagesAsAttachments(). --- packages/myst-cli/src/build/ipynb/index.ts | 3 ++- packages/myst-to-ipynb/src/attachments.ts | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/packages/myst-cli/src/build/ipynb/index.ts b/packages/myst-cli/src/build/ipynb/index.ts index 0b321f75ac..1baf3139a0 100644 --- a/packages/myst-cli/src/build/ipynb/index.ts +++ b/packages/myst-cli/src/build/ipynb/index.ts @@ -100,7 +100,8 @@ function collectImageData( if (imageData[url]) continue; // already processed const sourceFolder = getSourceFolder(url, sourceFile, sourcePath); - const filePath = path.join(sourceFolder, url); + const relativeUrl = url.replace(/^[\/\\]+/, ''); + const filePath = path.join(sourceFolder, relativeUrl); try { if (!fs.existsSync(filePath)) { diff --git a/packages/myst-to-ipynb/src/attachments.ts b/packages/myst-to-ipynb/src/attachments.ts index 795bb75ddb..4b5295dccd 100644 --- a/packages/myst-to-ipynb/src/attachments.ts +++ b/packages/myst-to-ipynb/src/attachments.ts @@ -91,7 +91,7 @@ export function embedImagesAsAttachments( }); } - // Apply replacements (iterate in reverse order to preserve positions) + // Apply replacements sequentially using simple string replacement for (const { original, replacement } of replacements) { updatedMd = updatedMd.replace(original, replacement); } From c5eea1c14ac1c8066c0579160d6d04da6c1c5f72 Mon Sep 17 00:00:00 2001 From: Matt McKay Date: Thu, 26 Feb 2026 17:00:50 +1100 Subject: [PATCH 17/27] fix: unwrap resolved include directives in CommonMark transform Include nodes that have been resolved by includeDirectiveTransform retain type 'include', causing myst-to-md to serialize them back as ```{include} directive syntax. Add an 'include' case to transformNode() that unwraps resolved children into the parent, so the included content (e.g. admonitions) is emitted as plain CommonMark in notebook cells. --- packages/myst-to-ipynb/src/commonmark.ts | 8 +++ packages/myst-to-ipynb/tests/commonmark.yml | 68 +++++++++++++++++++++ 2 files changed, 76 insertions(+) diff --git a/packages/myst-to-ipynb/src/commonmark.ts b/packages/myst-to-ipynb/src/commonmark.ts index 2f2de70e7a..84f3a81d03 100644 --- a/packages/myst-to-ipynb/src/commonmark.ts +++ b/packages/myst-to-ipynb/src/commonmark.ts @@ -468,6 +468,14 @@ function transformNode(node: GenericNode, dropSolutions: boolean): GenericNode | return transformDetails(node); case 'aside': return transformAside(node); + case 'include': + // Include directives are resolved during transformMdast — their children + // contain the fully-parsed content from the included file. Unwrap them + // so the resolved content is emitted instead of the directive syntax. + if (node.children?.length) { + return { type: 'root', children: node.children }; + } + return null; case 'mystDirective': return transformMystDirective(node); case 'mystRole': diff --git a/packages/myst-to-ipynb/tests/commonmark.yml b/packages/myst-to-ipynb/tests/commonmark.yml index 2ec441b774..ea10936697 100644 --- a/packages/myst-to-ipynb/tests/commonmark.yml +++ b/packages/myst-to-ipynb/tests/commonmark.yml @@ -498,3 +498,71 @@ cases: name: python nbformat: 4 nbformat_minor: 2 + + - title: resolved include directive unwrapped to content + options: + markdown: commonmark + mdast: + type: root + children: + - type: block + children: + - type: include + file: _admonition/gpu.md + children: + - type: admonition + kind: note + children: + - type: admonitionTitle + children: + - type: text + value: GPU + - type: paragraph + children: + - type: text + value: This lecture requires a GPU-enabled machine. + ipynb: + cells: + - cell_type: markdown + metadata: {} + source: + - "> **GPU**\n" + - ">\n" + - "> This lecture requires a GPU-enabled machine." + metadata: + language_info: + name: python + nbformat: 4 + nbformat_minor: 2 + + - title: unresolved include directive with no children is dropped + options: + markdown: commonmark + mdast: + type: root + children: + - type: block + children: + - type: paragraph + children: + - type: text + value: Before include. + - type: include + file: _admonition/missing.md + - type: paragraph + children: + - type: text + value: After include. + ipynb: + cells: + - cell_type: markdown + metadata: {} + source: + - "Before include.\n" + - "\n" + - "After include." + metadata: + language_info: + name: python + nbformat: 4 + nbformat_minor: 2 From 6837df283feebd2d6d07c7084ea676e340b6175a Mon Sep 17 00:00:00 2001 From: Matt McKay Date: Thu, 26 Feb 2026 17:42:19 +1100 Subject: [PATCH 18/27] fix: lift code-cell blocks from gated exercise/solution nodes in ipynb export When gated syntax ({exercise-start}/{exercise-end}, {solution-start}/ {solution-end}) is used, joinGatesTransform nests all content between the gates, including {code-cell} blocks, as children of the exercise/solution node. During ipynb export these were absorbed into a single markdown cell, silently dropping executable code cells. Add liftCodeCellsFromGatedNodes() preprocessing step in writeIpynb that detects exercise/solution nodes containing code-cell blocks and splits them into alternating top-level markdown and code cells, preserving document order. When dropSolutions is true, solution nodes are left intact for transformToCommonMark to drop entirely. Also fix stripBlockMarkers regex to handle +++ at end-of-string without trailing newline, preventing empty markdown cells. Closes QuantEcon/mystmd#5 --- packages/myst-to-ipynb/src/index.ts | 127 +++++++++++- packages/myst-to-ipynb/tests/commonmark.yml | 208 ++++++++++++++++++++ 2 files changed, 333 insertions(+), 2 deletions(-) diff --git a/packages/myst-to-ipynb/src/index.ts b/packages/myst-to-ipynb/src/index.ts index 7687478a0e..3fe6b78d0b 100644 --- a/packages/myst-to-ipynb/src/index.ts +++ b/packages/myst-to-ipynb/src/index.ts @@ -1,7 +1,8 @@ -import type { Root } from 'myst-spec'; +import type { Root, Node } from 'myst-spec'; import type { Block, Code } from 'myst-spec-ext'; import type { Plugin } from 'unified'; import type { VFile } from 'vfile'; +import type { GenericNode } from 'myst-common'; import type { PageFrontmatter } from 'myst-frontmatter'; import { writeMd } from 'myst-to-md'; import { select } from 'unist-util-select'; @@ -22,7 +23,7 @@ function sourceToStringList(src: string): string[] { * These are MyST-specific block separators that have no meaning in notebooks. */ function stripBlockMarkers(md: string): string { - return md.replace(/^\+\+\+[^\n]*\n/gm, ''); + return md.replace(/^\+\+\+[^\n]*(\n|$)/gm, ''); } export interface IpynbOptions { @@ -50,6 +51,123 @@ export interface IpynbOptions { imageData?: Record; } +/** + * Check whether a node is a code-cell block (i.e. a `{code-cell}` directive + * that should become a notebook code cell). + */ +function isCodeCellBlock(node: GenericNode): boolean { + return node.type === 'block' && node.kind === 'notebook-code'; +} + +/** + * Lift code-cell blocks out of exercise/solution nodes that used gated syntax. + * + * When gated syntax (`{exercise-start}`/`{exercise-end}`) is used, the + * `joinGatesTransform` nests all content between the gates — including + * `{code-cell}` blocks — as children of the exercise/solution node. This + * means those code-cell blocks never appear as top-level notebook cells; + * they are absorbed into a single markdown cell and silently dropped. + * + * This function walks the root's children and, for any exercise/solution + * node that contains code-cell blocks, splits them into alternating + * markdown cells and code cells at the top level: + * + * BEFORE: block { solution { title, para, block{code}, para } } + * AFTER: block { solution { title, para } } + * block{code} + * block { para } + * + * When `dropSolutions` is true, solution nodes are left intact so that + * `transformToCommonMark` can drop them entirely (including their code cells). + */ +function liftCodeCellsFromGatedNodes(root: Root, opts?: CommonMarkOptions): Root { + const newChildren: Node[] = []; + let modified = false; + + for (const child of root.children) { + // Determine if this child is (or wraps) an exercise/solution with code cells. + // After blockNestingTransform, the structure is: + // block > exercise/solution > [content..., block{notebook-code}, ...] + // In tests, exercise/solution may appear directly as root children. + let targetNode: GenericNode | null = null; + let isWrappedInBlock = false; + + const c = child as GenericNode; + if (c.type === 'exercise' || c.type === 'solution') { + targetNode = c; + } else if ( + c.type === 'block' && + c.children?.length === 1 && + (c.children[0].type === 'exercise' || c.children[0].type === 'solution') + ) { + targetNode = c.children[0]; + isWrappedInBlock = true; + } + + // If not a gated node, no code cells inside, or a solution that should + // be dropped (leave it intact for transformToCommonMark to handle), skip. + if ( + !targetNode || + !targetNode.children?.some(isCodeCellBlock) || + (targetNode.type === 'solution' && opts?.dropSolutions) + ) { + newChildren.push(child); + continue; + } + + modified = true; + + // Split exercise/solution children into groups separated by code-cell blocks. + // The first markdown group retains the exercise/solution wrapper (for title + // rendering). Subsequent markdown groups are plain content blocks. + const mdContent: GenericNode[] = []; + let isFirstGroup = true; + + const flushMarkdown = () => { + if (mdContent.length === 0) return; + const content = [...mdContent]; + mdContent.length = 0; + + if (isFirstGroup) { + // Wrap in exercise/solution node to preserve type, title, enumerator, etc. + const node: GenericNode = { ...targetNode!, children: content }; + if (isWrappedInBlock) { + newChildren.push({ type: 'block', children: [node] } as unknown as Node); + } else { + newChildren.push(node as unknown as Node); + } + isFirstGroup = false; + } else { + // Continuation content — wrap in a plain block + if (isWrappedInBlock) { + newChildren.push({ type: 'block', children: content } as unknown as Node); + } else { + // When not block-wrapped (test scenarios), push content directly + for (const n of content) { + newChildren.push(n as unknown as Node); + } + } + } + }; + + for (const gatedChild of targetNode.children ?? []) { + if (isCodeCellBlock(gatedChild)) { + flushMarkdown(); + // Lift code cell to top level + newChildren.push(gatedChild as unknown as Node); + } else { + mdContent.push(gatedChild); + } + } + + // Flush any remaining markdown content + flushMarkdown(); + } + + if (!modified) return root; + return { ...root, children: newChildren } as Root; +} + export function writeIpynb( file: VFile, node: Root, @@ -58,6 +176,11 @@ export function writeIpynb( ) { const markdownFormat = options?.markdown ?? 'myst'; + // Lift code-cell blocks out of gated exercise/solution nodes + // so they become proper notebook code cells instead of being + // absorbed into markdown cells. + node = liftCodeCellsFromGatedNodes(node, options?.commonmark); + const cells = (node.children as Block[]) .map((block: Block) => { if (block.type === 'block' && block.kind === 'notebook-code') { diff --git a/packages/myst-to-ipynb/tests/commonmark.yml b/packages/myst-to-ipynb/tests/commonmark.yml index ea10936697..a5bbbad222 100644 --- a/packages/myst-to-ipynb/tests/commonmark.yml +++ b/packages/myst-to-ipynb/tests/commonmark.yml @@ -294,6 +294,214 @@ cases: nbformat: 4 nbformat_minor: 2 + - title: gated solution with code cell lifted to top level + options: + markdown: commonmark + mdast: + type: root + children: + - type: block + children: + - type: solution + children: + - type: admonitionTitle + children: + - type: text + value: Solution to Exercise 1 + - type: paragraph + children: + - type: text + value: "Here's one solution:" + - type: block + kind: notebook-code + children: + - type: code + lang: python3 + executable: true + value: "def factorial(n):\n k = 1\n for i in range(n):\n k = k * (i + 1)\n return k\n\nfactorial(4)" + - type: outputs + children: [] + ipynb: + cells: + - cell_type: markdown + metadata: {} + source: + - "**Solution to Exercise 1**\n" + - "\n" + - "Here's one solution:" + - cell_type: code + execution_count: null + metadata: {} + outputs: [] + source: + - "def factorial(n):\n" + - " k = 1\n" + - " for i in range(n):\n" + - " k = k * (i + 1)\n" + - " return k\n" + - "\n" + - "factorial(4)" + metadata: + language_info: + name: python + nbformat: 4 + nbformat_minor: 2 + + - title: gated solution with multiple code cells interleaved with markdown + options: + markdown: commonmark + mdast: + type: root + children: + - type: block + children: + - type: solution + children: + - type: admonitionTitle + children: + - type: text + value: Solution + - type: paragraph + children: + - type: text + value: First approach + - type: block + kind: notebook-code + children: + - type: code + lang: python3 + executable: true + value: "x = 1" + - type: outputs + children: [] + - type: paragraph + children: + - type: text + value: Second approach + - type: block + kind: notebook-code + children: + - type: code + lang: python3 + executable: true + value: "x = 2" + - type: outputs + children: [] + ipynb: + cells: + - cell_type: markdown + metadata: {} + source: + - "**Solution**\n" + - "\n" + - "First approach" + - cell_type: code + execution_count: null + metadata: {} + outputs: [] + source: + - "x = 1" + - cell_type: markdown + metadata: {} + source: + - "Second approach" + - cell_type: code + execution_count: null + metadata: {} + outputs: [] + source: + - "x = 2" + metadata: + language_info: + name: python + nbformat: 4 + nbformat_minor: 2 + + - title: gated exercise with code cell + options: + markdown: commonmark + mdast: + type: root + children: + - type: block + children: + - type: exercise + enumerator: '1' + children: + - type: admonitionTitle + children: + - type: text + value: Exercise + - type: paragraph + children: + - type: text + value: Write a factorial function. + - type: block + kind: notebook-code + children: + - type: code + lang: python3 + executable: true + value: "# your code here" + - type: outputs + children: [] + ipynb: + cells: + - cell_type: markdown + metadata: {} + source: + - "**Exercise 1**\n" + - "\n" + - "Write a factorial function." + - cell_type: code + execution_count: null + metadata: {} + outputs: [] + source: + - "# your code here" + metadata: + language_info: + name: python + nbformat: 4 + nbformat_minor: 2 + + - title: gated solution dropped when configured + options: + markdown: commonmark + commonmark: + dropSolutions: true + mdast: + type: root + children: + - type: block + children: + - type: solution + children: + - type: admonitionTitle + children: + - type: text + value: Solution + - type: paragraph + children: + - type: text + value: "Here's the answer:" + - type: block + kind: notebook-code + children: + - type: code + lang: python3 + executable: true + value: "x = 42" + - type: outputs + children: [] + ipynb: + cells: [] + metadata: + language_info: + name: python + nbformat: 4 + nbformat_minor: 2 + - title: inline math with underscores not escaped options: markdown: commonmark From c0c4e339e2a50a9c61de797e0ec917e118fd601b Mon Sep 17 00:00:00 2001 From: Matt McKay Date: Thu, 26 Feb 2026 18:07:28 +1100 Subject: [PATCH 19/27] fix: handle real AST structure where exercise/solution share a block The previous fix (05bdc24) assumed exercise/solution nodes would be the sole child of a block. In reality, blockNestingTransform groups all consecutive non-block siblings into a single wrapper block, so the AST is: root > block { para, exercise {...}, solution {..., block{code}}, para } The fix now scans inside each block's children for exercise/solution nodes containing code-cell blocks, and splits the block accordingly. Extracted helper functions for clarity: - isGatedNodeWithCodeCells: identifies target nodes - liftFromExerciseSolution: splits a single node's children - splitBlockWithGatedNodes: processes a block with mixed children Added tests for the shared-block structure (exercise + solution + other content in the same block) and for dropSolutions with shared blocks. Refs QuantEcon/mystmd#5, QuantEcon/mystmd#6 --- packages/myst-to-ipynb/src/index.ts | 187 ++++++++++++-------- packages/myst-to-ipynb/tests/commonmark.yml | 132 ++++++++++++++ 2 files changed, 247 insertions(+), 72 deletions(-) diff --git a/packages/myst-to-ipynb/src/index.ts b/packages/myst-to-ipynb/src/index.ts index 3fe6b78d0b..00341328c2 100644 --- a/packages/myst-to-ipynb/src/index.ts +++ b/packages/myst-to-ipynb/src/index.ts @@ -59,21 +59,36 @@ function isCodeCellBlock(node: GenericNode): boolean { return node.type === 'block' && node.kind === 'notebook-code'; } +/** + * Check whether a node is an exercise or solution that contains code-cell blocks. + */ +function isGatedNodeWithCodeCells(node: GenericNode, opts?: CommonMarkOptions): boolean { + if (node.type !== 'exercise' && node.type !== 'solution') return false; + // Skip solutions that should be dropped — leave intact for transformToCommonMark + if (node.type === 'solution' && opts?.dropSolutions) return false; + return node.children?.some(isCodeCellBlock) ?? false; +} + /** * Lift code-cell blocks out of exercise/solution nodes that used gated syntax. * * When gated syntax (`{exercise-start}`/`{exercise-end}`) is used, the * `joinGatesTransform` nests all content between the gates — including - * `{code-cell}` blocks — as children of the exercise/solution node. This - * means those code-cell blocks never appear as top-level notebook cells; - * they are absorbed into a single markdown cell and silently dropped. + * `{code-cell}` blocks — as children of the exercise/solution node. Then + * `blockNestingTransform` groups the exercise/solution with neighboring + * non-block siblings into a single wrapper block. The real AST structure is: + * + * root > block { para, exercise { para, block{code} }, solution { ... }, para } * - * This function walks the root's children and, for any exercise/solution - * node that contains code-cell blocks, splits them into alternating - * markdown cells and code cells at the top level: + * This means code-cell blocks inside exercise/solution never appear as + * top-level notebook cells; they are absorbed into a single markdown cell. * - * BEFORE: block { solution { title, para, block{code}, para } } - * AFTER: block { solution { title, para } } + * This function walks each block's children, finds exercise/solution nodes + * that contain code-cell blocks, and splits the block so code cells are + * emitted as top-level notebook code cells: + * + * BEFORE: block { para, solution { title, para, block{code}, para } } + * AFTER: block { para, solution { title, para } } * block{code} * block { para } * @@ -85,87 +100,115 @@ function liftCodeCellsFromGatedNodes(root: Root, opts?: CommonMarkOptions): Root let modified = false; for (const child of root.children) { - // Determine if this child is (or wraps) an exercise/solution with code cells. - // After blockNestingTransform, the structure is: - // block > exercise/solution > [content..., block{notebook-code}, ...] - // In tests, exercise/solution may appear directly as root children. - let targetNode: GenericNode | null = null; - let isWrappedInBlock = false; - const c = child as GenericNode; - if (c.type === 'exercise' || c.type === 'solution') { - targetNode = c; - } else if ( - c.type === 'block' && - c.children?.length === 1 && - (c.children[0].type === 'exercise' || c.children[0].type === 'solution') - ) { - targetNode = c.children[0]; - isWrappedInBlock = true; + + // Case 1: exercise/solution directly as root child (e.g. in tests) + if (isGatedNodeWithCodeCells(c, opts)) { + modified = true; + liftFromExerciseSolution(c, newChildren, false); + continue; } - // If not a gated node, no code cells inside, or a solution that should - // be dropped (leave it intact for transformToCommonMark to handle), skip. - if ( - !targetNode || - !targetNode.children?.some(isCodeCellBlock) || - (targetNode.type === 'solution' && opts?.dropSolutions) - ) { - newChildren.push(child); + // Case 2: block containing exercise/solution among its children + if (c.type === 'block' && c.children?.some((ch: GenericNode) => isGatedNodeWithCodeCells(ch, opts))) { + modified = true; + splitBlockWithGatedNodes(c, newChildren, opts); continue; } - modified = true; + // No gated nodes — keep as-is + newChildren.push(child); + } - // Split exercise/solution children into groups separated by code-cell blocks. - // The first markdown group retains the exercise/solution wrapper (for title - // rendering). Subsequent markdown groups are plain content blocks. - const mdContent: GenericNode[] = []; - let isFirstGroup = true; + if (!modified) return root; + return { ...root, children: newChildren } as Root; +} - const flushMarkdown = () => { - if (mdContent.length === 0) return; - const content = [...mdContent]; - mdContent.length = 0; +/** + * Split a single exercise/solution node's children into alternating + * markdown content and top-level code cells. + * + * The first group of markdown content retains the exercise/solution wrapper + * (for title/enumerator rendering). Subsequent groups become plain content. + * + * @param wrapInBlock If true, wraps output groups in block nodes. + */ +function liftFromExerciseSolution( + node: GenericNode, + output: Node[], + wrapInBlock: boolean, +): void { + const mdContent: GenericNode[] = []; + let isFirstGroup = true; - if (isFirstGroup) { - // Wrap in exercise/solution node to preserve type, title, enumerator, etc. - const node: GenericNode = { ...targetNode!, children: content }; - if (isWrappedInBlock) { - newChildren.push({ type: 'block', children: [node] } as unknown as Node); - } else { - newChildren.push(node as unknown as Node); - } - isFirstGroup = false; + const flushMarkdown = () => { + if (mdContent.length === 0) return; + const content = [...mdContent]; + mdContent.length = 0; + + if (isFirstGroup) { + // Preserve the exercise/solution wrapper for title rendering + const wrapper: GenericNode = { ...node, children: content }; + if (wrapInBlock) { + output.push({ type: 'block', children: [wrapper] } as unknown as Node); } else { - // Continuation content — wrap in a plain block - if (isWrappedInBlock) { - newChildren.push({ type: 'block', children: content } as unknown as Node); - } else { - // When not block-wrapped (test scenarios), push content directly - for (const n of content) { - newChildren.push(n as unknown as Node); - } - } + output.push(wrapper as unknown as Node); } - }; - - for (const gatedChild of targetNode.children ?? []) { - if (isCodeCellBlock(gatedChild)) { - flushMarkdown(); - // Lift code cell to top level - newChildren.push(gatedChild as unknown as Node); + isFirstGroup = false; + } else { + if (wrapInBlock) { + output.push({ type: 'block', children: content } as unknown as Node); } else { - mdContent.push(gatedChild); + for (const n of content) { + output.push(n as unknown as Node); + } } } + }; - // Flush any remaining markdown content - flushMarkdown(); + for (const gatedChild of node.children ?? []) { + if (isCodeCellBlock(gatedChild)) { + flushMarkdown(); + output.push(gatedChild as unknown as Node); + } else { + mdContent.push(gatedChild); + } } + flushMarkdown(); +} - if (!modified) return root; - return { ...root, children: newChildren } as Root; +/** + * Process a block that contains one or more exercise/solution nodes with + * embedded code cells, along with other child nodes. Splits the block into + * multiple top-level blocks and code cells as needed. + * + * For non-exercise/solution children, they accumulate in a markdown block. + * When an exercise/solution with code cells is encountered, the accumulated + * block is flushed, then the exercise/solution is expanded via + * liftFromExerciseSolution. + */ +function splitBlockWithGatedNodes( + block: GenericNode, + output: Node[], + opts?: CommonMarkOptions, +): void { + const pending: GenericNode[] = []; + + const flushPending = () => { + if (pending.length === 0) return; + output.push({ type: 'block', children: [...pending] } as unknown as Node); + pending.length = 0; + }; + + for (const child of block.children ?? []) { + if (isGatedNodeWithCodeCells(child, opts)) { + flushPending(); + liftFromExerciseSolution(child, output, true); + } else { + pending.push(child); + } + } + flushPending(); } export function writeIpynb( diff --git a/packages/myst-to-ipynb/tests/commonmark.yml b/packages/myst-to-ipynb/tests/commonmark.yml index a5bbbad222..e39d95df90 100644 --- a/packages/myst-to-ipynb/tests/commonmark.yml +++ b/packages/myst-to-ipynb/tests/commonmark.yml @@ -502,6 +502,138 @@ cases: nbformat: 4 nbformat_minor: 2 + - title: gated exercise and solution sharing a block with other content + options: + markdown: commonmark + mdast: + type: root + children: + - type: block + children: + - type: paragraph + children: + - type: text + value: Text before exercise. + - type: exercise + enumerator: '1' + children: + - type: admonitionTitle + children: + - type: text + value: Exercise + - type: paragraph + children: + - type: text + value: Write a factorial function. + - type: solution + children: + - type: admonitionTitle + children: + - type: text + value: Solution to Exercise 1 + - type: paragraph + children: + - type: text + value: "Here's one solution:" + - type: block + kind: notebook-code + children: + - type: code + lang: python3 + executable: true + value: "factorial(4)" + - type: outputs + children: [] + - type: paragraph + children: + - type: text + value: Text after solution. + ipynb: + cells: + - cell_type: markdown + metadata: {} + source: + - "Text before exercise.\n" + - "\n" + - "**Exercise 1**\n" + - "\n" + - "Write a factorial function." + - cell_type: markdown + metadata: {} + source: + - "**Solution to Exercise 1**\n" + - "\n" + - "Here's one solution:" + - cell_type: code + execution_count: null + metadata: {} + outputs: [] + source: + - "factorial(4)" + - cell_type: markdown + metadata: {} + source: + - "Text after solution." + metadata: + language_info: + name: python + nbformat: 4 + nbformat_minor: 2 + + - title: gated solution dropped from shared block when configured + options: + markdown: commonmark + commonmark: + dropSolutions: true + mdast: + type: root + children: + - type: block + children: + - type: exercise + enumerator: '1' + children: + - type: admonitionTitle + children: + - type: text + value: Exercise + - type: paragraph + children: + - type: text + value: Solve this problem. + - type: solution + children: + - type: admonitionTitle + children: + - type: text + value: Solution + - type: paragraph + children: + - type: text + value: "The answer:" + - type: block + kind: notebook-code + children: + - type: code + lang: python3 + executable: true + value: "x = 42" + - type: outputs + children: [] + ipynb: + cells: + - cell_type: markdown + metadata: {} + source: + - "**Exercise 1**\n" + - "\n" + - "Solve this problem." + metadata: + language_info: + name: python + nbformat: 4 + nbformat_minor: 2 + - title: inline math with underscores not escaped options: markdown: commonmark From 033cd77a96250c09eec10e11da2d981be2ba0f85 Mon Sep 17 00:00:00 2001 From: Matt McKay Date: Fri, 27 Feb 2026 10:16:33 +1100 Subject: [PATCH 20/27] fix: serialize epigraph/pull-quote/blockquote containers in CommonMark/ipynb export The container handler in myst-to-md only handled figure, table, and code kinds. Containers with kind 'quote' (produced by the epigraph, pull-quote, and blockquote directives) fell through and returned empty string, silently dropping all content during ipynb export. Add a 'quote' branch that serializes the blockquote child as a standard markdown blockquote, with optional attribution rendered as an em-dash line. Closes QuantEcon/mystmd#7 --- packages/myst-to-md/src/directives.ts | 18 +++++++- packages/myst-to-md/tests/directives.yml | 56 ++++++++++++++++++++++++ 2 files changed, 72 insertions(+), 2 deletions(-) diff --git a/packages/myst-to-md/src/directives.ts b/packages/myst-to-md/src/directives.ts index 93558289b1..86b477cffb 100644 --- a/packages/myst-to-md/src/directives.ts +++ b/packages/myst-to-md/src/directives.ts @@ -181,7 +181,7 @@ function containerValidator(node: any, file: VFile) { ruleId: RuleId.mdRenders, }); } - if (kind !== 'figure' && kind !== 'table' && kind !== 'code') { + if (kind !== 'figure' && kind !== 'table' && kind !== 'code' && kind !== 'quote') { fileError(file, `Unknown kind on container node: ${kind}`, { node, source: 'myst-to-md', @@ -203,7 +203,21 @@ function container(node: any, _: Parent, state: NestedState, info: Info): string const captionNode: GenericNode | null = select('caption', node); const legendNode: GenericNode | null = select('legend', node); const children = [...(captionNode?.children || []), ...(legendNode?.children || [])]; - if (node.kind === 'figure') { + if (node.kind === 'quote') { + const blockquoteNode: GenericNode | null = select('blockquote', node); + const captionNode: GenericNode | null = select('caption', node); + if (!blockquoteNode) return ''; + // Serialize the blockquote content using the default blockquote handler + let result = defaultHandlers.blockquote(blockquoteNode as any, _ as any, state, info); + // Append attribution (caption) as a blockquote line with em-dash prefix + if (captionNode) { + const attribution = state.containerPhrasing(captionNode as any, info); + if (attribution) { + result += '\n>\n> \u2014 ' + attribution; + } + } + return result; + } else if (node.kind === 'figure') { const imageNodes: GenericNode[] = selectAll('image', node); const imageNode = imageNodes.find((img) => !img.placeholder); if (imageNode?.data?.altTextIsAutoGenerated) { diff --git a/packages/myst-to-md/tests/directives.yml b/packages/myst-to-md/tests/directives.yml index f56124b645..623e1edf84 100644 --- a/packages/myst-to-md/tests/directives.yml +++ b/packages/myst-to-md/tests/directives.yml @@ -960,3 +960,59 @@ cases: :::{topic} Topic content ::: + - title: epigraph blockquote + mdast: + type: root + children: + - type: container + kind: quote + class: epigraph + children: + - type: blockquote + children: + - type: paragraph + children: + - type: text + value: 'Python has gotten sufficiently weapons grade that we don''t descend into R anymore.' + markdown: |- + > Python has gotten sufficiently weapons grade that we don't descend into R anymore. + - title: epigraph blockquote with attribution + mdast: + type: root + children: + - type: container + kind: quote + class: epigraph + children: + - type: blockquote + children: + - type: paragraph + children: + - type: text + value: 'Debugging is twice as hard as writing the code in the first place.' + - type: caption + children: + - type: paragraph + children: + - type: text + value: Brian Kernighan + markdown: |- + > Debugging is twice as hard as writing the code in the first place. + > + > — Brian Kernighan + - title: pull-quote blockquote + mdast: + type: root + children: + - type: container + kind: quote + class: pull-quote + children: + - type: blockquote + children: + - type: paragraph + children: + - type: text + value: An important quote. + markdown: |- + > An important quote. From 6d6ba988e48f1836548ff26481663e29246c7aad Mon Sep 17 00:00:00 2001 From: Matt McKay Date: Fri, 27 Feb 2026 11:33:11 +1100 Subject: [PATCH 21/27] debug: add crossReference empty-URL instrumentation and node.url fallback Add MYST_DEBUG_XREF env var to dump full AST node details when a crossReference resolves with an empty URL during CommonMark serialization. This helps diagnose QuantEcon/mystmd#8 where some {ref} roles produce [text]() links in ipynb export. Also add a defensive fallback to use node.url (set by MultiPageReferenceResolver for cross-page refs) when urlSource, label, and identifier are all missing. This prevents empty URLs for resolved remote references without changing behaviour for any existing case. --- packages/myst-to-md/src/references.ts | 25 ++++++++++++++++++++++-- packages/myst-to-md/tests/references.yml | 11 +++++++++++ 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/packages/myst-to-md/src/references.ts b/packages/myst-to-md/src/references.ts index a044ef2a4d..1ebf6fb143 100644 --- a/packages/myst-to-md/src/references.ts +++ b/packages/myst-to-md/src/references.ts @@ -11,10 +11,31 @@ function labelWrapper(handler: Handle) { } function crossReference(node: any, _: Parent, state: NestedState, info: Info): string { - const { urlSource, label, identifier } = node; + const { urlSource, label, identifier, url } = node; + const resolvedUrl = + urlSource ?? (label ? `#${label}` : identifier ? `#${identifier}` : url ?? ''); + if (!resolvedUrl && process.env.MYST_DEBUG_XREF) { + const childText = node.children + ?.map((c: any) => c.value ?? '') + .join('') + .slice(0, 80); + console.warn( + `[myst-to-md] crossReference has empty URL:\n` + + ` identifier : ${JSON.stringify(node.identifier)}\n` + + ` label : ${JSON.stringify(node.label)}\n` + + ` urlSource : ${JSON.stringify(node.urlSource)}\n` + + ` url : ${JSON.stringify(node.url)}\n` + + ` kind : ${JSON.stringify(node.kind)}\n` + + ` resolved : ${JSON.stringify(node.resolved)}\n` + + ` remote : ${JSON.stringify(node.remote)}\n` + + ` html_id : ${JSON.stringify(node.html_id)}\n` + + ` childText : ${JSON.stringify(childText)}\n` + + ` full node : ${JSON.stringify(node, null, 2)}`, + ); + } const nodeCopy = { ...node, - url: urlSource ?? (label ? `#${label}` : identifier ? `#${identifier}` : ''), + url: resolvedUrl, }; return defaultHandlers.link(nodeCopy, _, state, info); } diff --git a/packages/myst-to-md/tests/references.yml b/packages/myst-to-md/tests/references.yml index c8499a3d6a..6bf7db30bd 100644 --- a/packages/myst-to-md/tests/references.yml +++ b/packages/myst-to-md/tests/references.yml @@ -181,3 +181,14 @@ cases: value: markdown markdown: |- [Some % *markdown*](#example) + - title: crossReference - url fallback for remote refs + mdast: + type: root + children: + - type: crossReference + url: /other-page#section + children: + - type: text + value: Section 7 + markdown: |- + [Section 7](/other-page#section) From bff99f881e5e715a69b3950cea0031a1e4466602 Mon Sep 17 00:00:00 2001 From: Matt McKay Date: Fri, 27 Feb 2026 11:47:48 +1100 Subject: [PATCH 22/27] fix: use html_id as fallback for crossReference URLs in CommonMark export MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The reference resolver (addChildrenFromTargetNode) marks crossReferences as resolved and sets html_id + kind, but for same-page targets the identifier and label fields end up undefined. The CommonMark serializer then generates empty URLs like [Section 7](). Add html_id to the URL fallback chain: urlSource → #label → #identifier → #html_id → url → '' This fixes all 23 unique empty-URL crossReferences found in the QuantEcon lectures (headings, equations, exercises, code blocks, paragraphs). Closes QuantEcon/mystmd#8 --- packages/myst-to-md/src/references.ts | 7 +++--- packages/myst-to-md/tests/references.yml | 27 ++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/packages/myst-to-md/src/references.ts b/packages/myst-to-md/src/references.ts index 1ebf6fb143..7d05f6b7f4 100644 --- a/packages/myst-to-md/src/references.ts +++ b/packages/myst-to-md/src/references.ts @@ -11,9 +11,10 @@ function labelWrapper(handler: Handle) { } function crossReference(node: any, _: Parent, state: NestedState, info: Info): string { - const { urlSource, label, identifier, url } = node; + const { urlSource, label, identifier, url, html_id } = node; const resolvedUrl = - urlSource ?? (label ? `#${label}` : identifier ? `#${identifier}` : url ?? ''); + urlSource ?? + (label ? `#${label}` : identifier ? `#${identifier}` : html_id ? `#${html_id}` : url ?? ''); if (!resolvedUrl && process.env.MYST_DEBUG_XREF) { const childText = node.children ?.map((c: any) => c.value ?? '') @@ -25,10 +26,10 @@ function crossReference(node: any, _: Parent, state: NestedState, info: Info): s ` label : ${JSON.stringify(node.label)}\n` + ` urlSource : ${JSON.stringify(node.urlSource)}\n` + ` url : ${JSON.stringify(node.url)}\n` + + ` html_id : ${JSON.stringify(node.html_id)}\n` + ` kind : ${JSON.stringify(node.kind)}\n` + ` resolved : ${JSON.stringify(node.resolved)}\n` + ` remote : ${JSON.stringify(node.remote)}\n` + - ` html_id : ${JSON.stringify(node.html_id)}\n` + ` childText : ${JSON.stringify(childText)}\n` + ` full node : ${JSON.stringify(node, null, 2)}`, ); diff --git a/packages/myst-to-md/tests/references.yml b/packages/myst-to-md/tests/references.yml index 6bf7db30bd..4bde3188d7 100644 --- a/packages/myst-to-md/tests/references.yml +++ b/packages/myst-to-md/tests/references.yml @@ -192,3 +192,30 @@ cases: value: Section 7 markdown: |- [Section 7](/other-page#section) + - title: crossReference - html_id fallback for resolved refs + mdast: + type: root + children: + - type: crossReference + kind: heading + resolved: true + html_id: oop-solow-growth + children: + - type: text + value: the next section + markdown: |- + [the next section](#oop-solow-growth) + - title: crossReference - html_id fallback for equation + mdast: + type: root + children: + - type: crossReference + kind: equation + resolved: true + html_id: solow-lom + enumerator: '1' + children: + - type: text + value: (1) + markdown: |- + [(1)](#solow-lom) From 89a57e52018b16e7581376146153f1270b7f11a4 Mon Sep 17 00:00:00 2001 From: Matt McKay Date: Fri, 27 Feb 2026 12:25:25 +1100 Subject: [PATCH 23/27] refactor(myst-to-md): remove MYST_DEBUG_XREF instrumentation The ad-hoc debug logging served its purpose for diagnosing the html_id fallback issue and is no longer needed. A system-wide debug infrastructure should be designed separately. --- packages/myst-to-md/src/references.ts | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/packages/myst-to-md/src/references.ts b/packages/myst-to-md/src/references.ts index 7d05f6b7f4..75f89d83bd 100644 --- a/packages/myst-to-md/src/references.ts +++ b/packages/myst-to-md/src/references.ts @@ -15,25 +15,6 @@ function crossReference(node: any, _: Parent, state: NestedState, info: Info): s const resolvedUrl = urlSource ?? (label ? `#${label}` : identifier ? `#${identifier}` : html_id ? `#${html_id}` : url ?? ''); - if (!resolvedUrl && process.env.MYST_DEBUG_XREF) { - const childText = node.children - ?.map((c: any) => c.value ?? '') - .join('') - .slice(0, 80); - console.warn( - `[myst-to-md] crossReference has empty URL:\n` + - ` identifier : ${JSON.stringify(node.identifier)}\n` + - ` label : ${JSON.stringify(node.label)}\n` + - ` urlSource : ${JSON.stringify(node.urlSource)}\n` + - ` url : ${JSON.stringify(node.url)}\n` + - ` html_id : ${JSON.stringify(node.html_id)}\n` + - ` kind : ${JSON.stringify(node.kind)}\n` + - ` resolved : ${JSON.stringify(node.resolved)}\n` + - ` remote : ${JSON.stringify(node.remote)}\n` + - ` childText : ${JSON.stringify(childText)}\n` + - ` full node : ${JSON.stringify(node, null, 2)}`, - ); - } const nodeCopy = { ...node, url: resolvedUrl, From 59267a82dd773cd31c6193ff79c254d85c597467 Mon Sep 17 00:00:00 2001 From: Matt McKay Date: Fri, 27 Feb 2026 13:29:49 +1100 Subject: [PATCH 24/27] style: fix prettier formatting in myst-to-ipynb and myst-to-md --- packages/myst-to-ipynb/src/index.ts | 11 +++++------ packages/myst-to-md/src/references.ts | 2 +- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/packages/myst-to-ipynb/src/index.ts b/packages/myst-to-ipynb/src/index.ts index 00341328c2..585da7eb72 100644 --- a/packages/myst-to-ipynb/src/index.ts +++ b/packages/myst-to-ipynb/src/index.ts @@ -110,7 +110,10 @@ function liftCodeCellsFromGatedNodes(root: Root, opts?: CommonMarkOptions): Root } // Case 2: block containing exercise/solution among its children - if (c.type === 'block' && c.children?.some((ch: GenericNode) => isGatedNodeWithCodeCells(ch, opts))) { + if ( + c.type === 'block' && + c.children?.some((ch: GenericNode) => isGatedNodeWithCodeCells(ch, opts)) + ) { modified = true; splitBlockWithGatedNodes(c, newChildren, opts); continue; @@ -133,11 +136,7 @@ function liftCodeCellsFromGatedNodes(root: Root, opts?: CommonMarkOptions): Root * * @param wrapInBlock If true, wraps output groups in block nodes. */ -function liftFromExerciseSolution( - node: GenericNode, - output: Node[], - wrapInBlock: boolean, -): void { +function liftFromExerciseSolution(node: GenericNode, output: Node[], wrapInBlock: boolean): void { const mdContent: GenericNode[] = []; let isFirstGroup = true; diff --git a/packages/myst-to-md/src/references.ts b/packages/myst-to-md/src/references.ts index 75f89d83bd..ebeb689d59 100644 --- a/packages/myst-to-md/src/references.ts +++ b/packages/myst-to-md/src/references.ts @@ -14,7 +14,7 @@ function crossReference(node: any, _: Parent, state: NestedState, info: Info): s const { urlSource, label, identifier, url, html_id } = node; const resolvedUrl = urlSource ?? - (label ? `#${label}` : identifier ? `#${identifier}` : html_id ? `#${html_id}` : url ?? ''); + (label ? `#${label}` : identifier ? `#${identifier}` : html_id ? `#${html_id}` : (url ?? '')); const nodeCopy = { ...node, url: resolvedUrl, From 7ec2d37b50b8d74ef26fdc48bca3894ae2d857d6 Mon Sep 17 00:00:00 2001 From: Matt McKay Date: Fri, 27 Feb 2026 14:23:08 +1100 Subject: [PATCH 25/27] Fix image attachment regex for escaped markdown characters - Update regex to handle escaped brackets in alt text and escaped parentheses in URLs produced by mdast-util-to-markdown - Unescape URLs before looking up in imageData dictionary - Refactor to single-pass replacement using md.replace(regex, callback) - Add tests for escaped parentheses in URLs and escaped brackets in alt text --- packages/myst-to-ipynb/src/attachments.ts | 32 ++++++++----------- .../myst-to-ipynb/tests/attachments.spec.ts | 24 ++++++++++++++ 2 files changed, 37 insertions(+), 19 deletions(-) diff --git a/packages/myst-to-ipynb/src/attachments.ts b/packages/myst-to-ipynb/src/attachments.ts index 4b5295dccd..ecba088a0b 100644 --- a/packages/myst-to-ipynb/src/attachments.ts +++ b/packages/myst-to-ipynb/src/attachments.ts @@ -57,20 +57,22 @@ export function embedImagesAsAttachments( const attachments: Record> = {}; const usedNames = new Set(); - let updatedMd = md; // Match markdown image syntax: ![alt](url) and ![alt](url "title") - const imgRegex = /!\[([^\]]*)\]\(([^)\s]+)(?:\s+"[^"]*")?\)/g; - const replacements: Array<{ original: string; replacement: string }> = []; + // Handles escaped brackets in alt text and escaped parentheses in URLs. + // The escaped sequences (\] and \)) must appear BEFORE the single-char + // alternatives so the regex engine matches them as pairs first. + const imgRegex = /!\[((?:\\\]|[^\]])*)\]\(((?:\\\)|[^)\s])+)(?:\s+"[^"]*")?\)/g; - let match; - while ((match = imgRegex.exec(md)) !== null) { - const [fullMatch, alt, url] = match; - const data = imageData[url]; - if (!data) continue; + const updatedMd = md.replace(imgRegex, (fullMatch, alt, url) => { + // Unescape markdown characters that mdast-util-to-markdown might have added + const unescapedUrl = url.replace(/\\([()[\]])/g, '$1'); + + const data = imageData[unescapedUrl]; + if (!data) return fullMatch; // Generate a unique attachment name from the basename - const base = basename(url); + const base = basename(unescapedUrl); let name = base; let counter = 1; while (usedNames.has(name)) { @@ -85,16 +87,8 @@ export function embedImagesAsAttachments( usedNames.add(name); attachments[name] = { [data.mime]: data.data }; - replacements.push({ - original: fullMatch, - replacement: `![${alt}](attachment:${name})`, - }); - } - - // Apply replacements sequentially using simple string replacement - for (const { original, replacement } of replacements) { - updatedMd = updatedMd.replace(original, replacement); - } + return `![${alt}](attachment:${name})`; + }); if (Object.keys(attachments).length > 0) { return { md: updatedMd, attachments }; diff --git a/packages/myst-to-ipynb/tests/attachments.spec.ts b/packages/myst-to-ipynb/tests/attachments.spec.ts index da36cac927..5e889f1b48 100644 --- a/packages/myst-to-ipynb/tests/attachments.spec.ts +++ b/packages/myst-to-ipynb/tests/attachments.spec.ts @@ -82,4 +82,28 @@ describe('embedImagesAsAttachments', () => { 'chart.png': { 'image/png': 'DATA' }, }); }); + + test('handles escaped parentheses in URL', () => { + const md = '![Chart](/_static/img\\(1\\).png)'; + const imageData = { + '/_static/img(1).png': { mime: 'image/png', data: 'base64data' }, + }; + const result = embedImagesAsAttachments(md, imageData); + expect(result.md).toBe('![Chart](attachment:img(1).png)'); + expect(result.attachments).toEqual({ + 'img(1).png': { 'image/png': 'base64data' }, + }); + }); + + test('handles escaped brackets in alt text', () => { + const md = '![alt \\[text\\]](/_static/chart.png)'; + const imageData = { + '/_static/chart.png': { mime: 'image/png', data: 'DATA' }, + }; + const result = embedImagesAsAttachments(md, imageData); + expect(result.md).toBe('![alt \\[text\\]](attachment:chart.png)'); + expect(result.attachments).toEqual({ + 'chart.png': { 'image/png': 'DATA' }, + }); + }); }); From 65e71cef84bd06d50da836394a6ce65768872cbc Mon Sep 17 00:00:00 2001 From: Matt McKay Date: Fri, 27 Feb 2026 14:33:33 +1100 Subject: [PATCH 26/27] Fix prettier formatting --- packages/myst-to-ipynb/src/attachments.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/myst-to-ipynb/src/attachments.ts b/packages/myst-to-ipynb/src/attachments.ts index ecba088a0b..7e5149bb72 100644 --- a/packages/myst-to-ipynb/src/attachments.ts +++ b/packages/myst-to-ipynb/src/attachments.ts @@ -67,7 +67,7 @@ export function embedImagesAsAttachments( const updatedMd = md.replace(imgRegex, (fullMatch, alt, url) => { // Unescape markdown characters that mdast-util-to-markdown might have added const unescapedUrl = url.replace(/\\([()[\]])/g, '$1'); - + const data = imageData[unescapedUrl]; if (!data) return fullMatch; From 43edd9e14419b36390c2859702932c9bd740e976 Mon Sep 17 00:00:00 2001 From: Matt McKay Date: Fri, 27 Feb 2026 14:41:37 +1100 Subject: [PATCH 27/27] Fix lint errors: remove shadowed variable and useless escape --- packages/myst-cli/src/build/ipynb/index.ts | 2 +- packages/myst-to-md/src/directives.ts | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/packages/myst-cli/src/build/ipynb/index.ts b/packages/myst-cli/src/build/ipynb/index.ts index 1baf3139a0..dc675ef934 100644 --- a/packages/myst-cli/src/build/ipynb/index.ts +++ b/packages/myst-cli/src/build/ipynb/index.ts @@ -100,7 +100,7 @@ function collectImageData( if (imageData[url]) continue; // already processed const sourceFolder = getSourceFolder(url, sourceFile, sourcePath); - const relativeUrl = url.replace(/^[\/\\]+/, ''); + const relativeUrl = url.replace(/^[/\\]+/, ''); const filePath = path.join(sourceFolder, relativeUrl); try { diff --git a/packages/myst-to-md/src/directives.ts b/packages/myst-to-md/src/directives.ts index 86b477cffb..dda85f7c27 100644 --- a/packages/myst-to-md/src/directives.ts +++ b/packages/myst-to-md/src/directives.ts @@ -205,7 +205,6 @@ function container(node: any, _: Parent, state: NestedState, info: Info): string const children = [...(captionNode?.children || []), ...(legendNode?.children || [])]; if (node.kind === 'quote') { const blockquoteNode: GenericNode | null = select('blockquote', node); - const captionNode: GenericNode | null = select('caption', node); if (!blockquoteNode) return ''; // Serialize the blockquote content using the default blockquote handler let result = defaultHandlers.blockquote(blockquoteNode as any, _ as any, state, info);