diff --git a/packages/layout-engine/painters/dom/src/features/math/converters/accent.ts b/packages/layout-engine/painters/dom/src/features/math/converters/accent.ts new file mode 100644 index 0000000000..3ffecdc257 --- /dev/null +++ b/packages/layout-engine/painters/dom/src/features/math/converters/accent.ts @@ -0,0 +1,107 @@ +import type { MathObjectConverter } from '../types.js'; + +const MATHML_NS = 'http://www.w3.org/1998/Math/MathML'; + +/** Default accent character when m:chr is absent (combining circumflex). */ +const DEFAULT_ACCENT_CHAR = '\u0302'; + +/** + * Maps combining diacritical marks (which Word emits in m:chr) to their + * non-combining accent equivalents, preferring characters that MathML Core's + * operator dictionary registers as stretchy accents. + * + * Why: combining marks (U+0300–U+036F) placed bare inside render against + * a dotted-circle placeholder in some engines. For the common accents we map + * to ASCII-range characters (`^`, `~`, `¯`, `"`, `` ` ``, `´`) because those + * are marked stretchy in the MathML Core operator dictionary, so MathML + * renderers stretch them across wide bases (e.g. a tilde over "x+1"). For + * accents without an ASCII-range equivalent we fall back to the Unicode + * spacing modifier letter. + * + * Covers the accents Word's equation editor emits; anything outside this table + * passes through unchanged. + */ +const COMBINING_TO_SPACING: Record = { + '\u0300': '\u0060', // grave → ` (U+0060) + '\u0301': '\u00B4', // acute → ´ (U+00B4) + '\u0302': '\u005E', // circumflex / hat → ^ (U+005E, stretchy) + '\u0303': '\u007E', // tilde → ~ (U+007E, stretchy) + '\u0304': '\u00AF', // macron → ¯ (U+00AF, stretchy) + '\u0306': '\u02D8', // breve → ˘ + '\u0307': '\u02D9', // dot above → ˙ + '\u0308': '\u00A8', // diaeresis → ¨ + '\u030A': '\u02DA', // ring above → ˚ + '\u030B': '\u02DD', // double acute → ˝ + '\u030C': '\u02C7', // caron / háček → ˇ + '\u20D6': '\u2190', // combining left arrow above → ← (U+2190, stretchy) + '\u20D7': '\u2192', // combining right arrow above → → (U+2192, stretchy) +}; + +/** + * Convert m:acc (accent / diacritical mark) to MathML . + * + * OMML structure: + * m:acc → m:accPr? (optional: m:chr@m:val), m:e (base expression, required) + * + * MathML output: + * + * base + * accent-char + * + * + * ECMA-376 §22.1.2.20 (chr) defines three m:chr states: + * 1. m:chr element absent → default accent char (U+0302) + * 2. m:chr present, m:val absent → character is absent (render bare base) + * 3. m:chr present, m:val = "x" → use x as the accent character + * + * When the accent character is absent, the base is returned wrapped in + * with no wrapper. When m:e itself is absent (invalid per the schema), + * the converter returns null so the caller can drop the malformed element. + * + * @spec ECMA-376 §22.1.2.1 (acc), §22.1.2.2 (accPr), §22.1.2.20 (chr) + */ +export const convertAccent: MathObjectConverter = (node, doc, convertChildren) => { + const elements = node.elements ?? []; + const base = elements.find((e) => e.name === 'm:e'); + + // m:e is required by CT_Acc. Missing it means the input is malformed; decline + // to render rather than emit a floating accent with no base. + if (!base) return null; + + const accPr = elements.find((e) => e.name === 'm:accPr'); + const chr = accPr?.elements?.find((e) => e.name === 'm:chr'); + const rawVal = chr?.attributes?.['m:val']; + + // Resolve the accent character per §22.1.2.20. + // - chr element absent → default U+0302 + // - chr present, m:val absent/"" → character absent (no accent) + // - chr present, m:val = "x" → "x" + const accentChar = chr === undefined ? DEFAULT_ACCENT_CHAR : rawVal && rawVal.length > 0 ? rawVal : ''; + + const baseRow = doc.createElementNS(MATHML_NS, 'mrow'); + baseRow.appendChild(convertChildren(base.elements ?? [])); + + if (!accentChar) { + // No accent character: render the base alone. + return baseRow; + } + + // Map combining marks to their spacing forms so MathML renderers can use the + // stretchy accent operators. Non-combining or unmapped characters pass through. + const renderChar = COMBINING_TO_SPACING[accentChar] ?? accentChar; + + const mover = doc.createElementNS(MATHML_NS, 'mover'); + mover.setAttribute('accent', 'true'); + mover.appendChild(baseRow); + + const mo = doc.createElementNS(MATHML_NS, 'mo'); + // stretchy is a hint: renderers that honor it (e.g. MathJax, Firefox's + // accent-stretch path) will stretch the accent across wide bases. Chrome's + // current MathML Core implementation ignores this for accent operators, so + // the accent renders at glyph width there — acceptable baseline behavior. + mo.setAttribute('stretchy', 'true'); + mo.textContent = renderChar; + mover.appendChild(mo); + + return mover; +}; diff --git a/packages/layout-engine/painters/dom/src/features/math/converters/index.ts b/packages/layout-engine/painters/dom/src/features/math/converters/index.ts index 24aa1f87fd..db3c53cce1 100644 --- a/packages/layout-engine/painters/dom/src/features/math/converters/index.ts +++ b/packages/layout-engine/painters/dom/src/features/math/converters/index.ts @@ -14,6 +14,7 @@ export { convertDelimiter } from './delimiter.js'; export { convertSubscript } from './subscript.js'; export { convertSuperscript } from './superscript.js'; export { convertSubSuperscript } from './sub-superscript.js'; +export { convertAccent } from './accent.js'; export { convertPreSubSuperscript } from './pre-sub-superscript.js'; export { convertRadical } from './radical.js'; export { convertLowerLimit } from './lower-limit.js'; diff --git a/packages/layout-engine/painters/dom/src/features/math/omml-to-mathml.test.ts b/packages/layout-engine/painters/dom/src/features/math/omml-to-mathml.test.ts index 2e7da8c13e..b6d994f442 100644 --- a/packages/layout-engine/painters/dom/src/features/math/omml-to-mathml.test.ts +++ b/packages/layout-engine/painters/dom/src/features/math/omml-to-mathml.test.ts @@ -1899,6 +1899,130 @@ describe('m:func converter', () => { }); }); +describe('m:acc converter', () => { + // Helper: build an m:acc node with an optional accPr and a base string. + const buildAcc = (accPrElements: unknown[] | null, baseText: string | null, extraBaseRuns: string[] = []) => { + const elements: unknown[] = []; + if (accPrElements !== null) { + elements.push({ name: 'm:accPr', elements: accPrElements }); + } + if (baseText !== null) { + const runs = [baseText, ...extraBaseRuns].map((t) => ({ + name: 'm:r', + elements: [{ name: 'm:t', elements: [{ type: 'text', text: t }] }], + })); + elements.push({ name: 'm:e', elements: runs }); + } + return { name: 'm:oMath', elements: [{ name: 'm:acc', elements }] }; + }; + + it('converts accent with tilde to ', () => { + const result = convertOmmlToMathml(buildAcc([{ name: 'm:chr', attributes: { 'm:val': '\u0303' } }], 'x'), doc); + expect(result).not.toBeNull(); + const mover = result!.querySelector('mover'); + expect(mover).not.toBeNull(); + expect(mover!.getAttribute('accent')).toBe('true'); + expect(mover!.children[0]!.textContent).toBe('x'); + // Combining tilde (U+0303) is mapped to ASCII tilde (U+007E, "~") which + // MathML Core's operator dictionary marks as a stretchy accent. + const mo = mover!.querySelector('mo'); + expect(mo!.textContent).toBe('\u007E'); + }); + + it('defaults to circumflex when m:accPr is absent (spec §22.1.2.1)', () => { + const result = convertOmmlToMathml(buildAcc(null, 'a'), doc); + const mover = result!.querySelector('mover'); + expect(mover).not.toBeNull(); + expect(mover!.getAttribute('accent')).toBe('true'); + // Combining circumflex (U+0302) maps to ASCII circumflex (U+005E, "^"). + expect(mover!.querySelector('mo')!.textContent).toBe('\u005E'); + }); + + it('defaults to circumflex when m:accPr is present but m:chr is absent (spec §22.1.2.20)', () => { + const result = convertOmmlToMathml(buildAcc([{ name: 'm:ctrlPr' }], 'a'), doc); + const mover = result!.querySelector('mover'); + expect(mover).not.toBeNull(); + expect(mover!.getAttribute('accent')).toBe('true'); + expect(mover!.querySelector('mo')!.textContent).toBe('\u005E'); + }); + + it('renders dot accent', () => { + const result = convertOmmlToMathml(buildAcc([{ name: 'm:chr', attributes: { 'm:val': '\u0307' } }], 'y'), doc); + const mover = result!.querySelector('mover'); + expect(mover!.getAttribute('accent')).toBe('true'); + // U+0307 → U+02D9 (spacing dot above) — no ASCII-range equivalent. + expect(mover!.querySelector('mo')!.textContent).toBe('\u02D9'); + }); + + it('maps combining right-arrow (U+20D7) to stretchy right arrow (U+2192)', () => { + const result = convertOmmlToMathml(buildAcc([{ name: 'm:chr', attributes: { 'm:val': '\u20D7' } }], 'v'), doc); + expect(result!.querySelector('mover mo')!.textContent).toBe('\u2192'); + }); + + it('passes unmapped accent characters through unchanged', () => { + // A character outside the combining→spacing table should pass through as-is. + const result = convertOmmlToMathml(buildAcc([{ name: 'm:chr', attributes: { 'm:val': '*' } }], 'x'), doc); + expect(result!.querySelector('mover mo')!.textContent).toBe('*'); + }); + + // ── Spec §22.1.2.20: m:chr present with missing/empty m:val means the + // character is absent (not "use the default"). Render the base alone. + it('renders the base alone when m:chr is present with no m:val attribute', () => { + const result = convertOmmlToMathml(buildAcc([{ name: 'm:chr' }], 'x'), doc); + expect(result).not.toBeNull(); + // No wrapper — just the base inside an . + expect(result!.querySelector('mover')).toBeNull(); + expect(result!.textContent).toBe('x'); + }); + + it('renders the base alone when m:chr has an explicitly empty m:val', () => { + const result = convertOmmlToMathml(buildAcc([{ name: 'm:chr', attributes: { 'm:val': '' } }], 'x'), doc); + expect(result).not.toBeNull(); + expect(result!.querySelector('mover')).toBeNull(); + expect(result!.textContent).toBe('x'); + }); + + it('wraps multi-run base in so a wide base like x+1 renders as a group', () => { + const result = convertOmmlToMathml( + buildAcc([{ name: 'm:chr', attributes: { 'm:val': '\u0303' } }], 'x', ['+', '1']), + doc, + ); + const mover = result!.querySelector('mover'); + expect(mover).not.toBeNull(); + const baseRow = mover!.children[0]!; + expect(baseRow.tagName.toLowerCase()).toBe('mrow'); + expect(baseRow.children.length).toBe(3); + expect(baseRow.textContent).toBe('x+1'); + }); + + it('ignores non-chr siblings in m:accPr (e.g. m:ctrlPr)', () => { + const result = convertOmmlToMathml( + buildAcc([{ name: 'm:ctrlPr' }, { name: 'm:chr', attributes: { 'm:val': '\u0303' } }], 'x'), + doc, + ); + const mover = result!.querySelector('mover'); + expect(mover).not.toBeNull(); + expect(mover!.children.length).toBe(2); + expect(mover!.querySelector('mo')!.textContent).toBe('\u007E'); + }); + + it('returns null when m:e is absent (invalid per CT_Acc)', () => { + const omml = { + name: 'm:oMath', + elements: [ + { + name: 'm:acc', + elements: [{ name: 'm:accPr', elements: [{ name: 'm:chr', attributes: { 'm:val': '\u0303' } }] }], + }, + ], + }; + const result = convertOmmlToMathml(omml, doc); + // The outer is produced only if it has children. With m:acc dropped, + // there are no math children, so convertOmmlToMathml returns null. + expect(result).toBeNull(); + }); +}); + describe('m:limLow converter', () => { it('converts m:limLow to with base and lower limit', () => { // lim_{n→∞} diff --git a/packages/layout-engine/painters/dom/src/features/math/omml-to-mathml.ts b/packages/layout-engine/painters/dom/src/features/math/omml-to-mathml.ts index bbb47c5979..ce57a759fe 100644 --- a/packages/layout-engine/painters/dom/src/features/math/omml-to-mathml.ts +++ b/packages/layout-engine/painters/dom/src/features/math/omml-to-mathml.ts @@ -19,6 +19,7 @@ import { convertSubscript, convertSuperscript, convertSubSuperscript, + convertAccent, convertPreSubSuperscript, convertRadical, convertLowerLimit, @@ -43,6 +44,7 @@ export const MATHML_NS = 'http://www.w3.org/1998/Math/MathML'; const MATH_OBJECT_REGISTRY: Record = { // ── Implemented ────────────────────────────────────────────────────────── 'm:r': convertMathRun, + 'm:acc': convertAccent, // Accent (diacritical mark above base) 'm:bar': convertBar, // Bar (overbar/underbar) 'm:d': convertDelimiter, // Delimiter (parentheses, brackets, braces) 'm:f': convertFraction, // Fraction (numerator/denominator) @@ -56,7 +58,6 @@ const MATH_OBJECT_REGISTRY: Record = { 'm:sPre': convertPreSubSuperscript, // Pre-sub-superscript (left of base) // ── Not yet implemented (community contributions welcome) ──────────────── - 'm:acc': null, // Accent (diacritical mark above base) 'm:borderBox': null, // Border box (border around math content) 'm:box': null, // Box (invisible grouping container) 'm:eqArr': null, // Equation array (vertical array of equations) diff --git a/tests/behavior/tests/importing/math-equations.spec.ts b/tests/behavior/tests/importing/math-equations.spec.ts index f6c58495a9..a48811cf04 100644 --- a/tests/behavior/tests/importing/math-equations.spec.ts +++ b/tests/behavior/tests/importing/math-equations.spec.ts @@ -88,6 +88,37 @@ test.describe('math equation import and rendering', () => { } }); + test('renders m:acc as with spacing-form accent char', async ({ superdoc }) => { + await superdoc.loadDocument(ALL_OBJECTS_DOC); + await superdoc.waitForStable(); + + // The fixture has m:acc with m:chr m:val="U+0302" (combining circumflex). + // convertAccent should: + // 1. Produce a wrapper + // 2. Emit ASCII circumflex U+005E (not the combining U+0302) since that's + // what MathML Core's operator dictionary marks as a stretchy accent. + const accentData = await superdoc.page.evaluate(() => { + const mover = document.querySelector('mover[accent="true"]'); + if (!mover) return null; + const mo = mover.querySelector('mo'); + return { + childCount: mover.children.length, + baseText: mover.children[0]?.textContent, + accentChar: mo?.textContent, + accentCodepoint: mo?.textContent + ? 'U+' + (mo.textContent.codePointAt(0) ?? 0).toString(16).padStart(4, '0').toUpperCase() + : null, + }; + }); + + expect(accentData).not.toBeNull(); + expect(accentData!.childCount).toBe(2); + expect(accentData!.baseText).toBe('x'); + // Combining circumflex (U+0302) in OMML must be rendered as ASCII circumflex (U+005E). + expect(accentData!.accentChar).toBe('\u005E'); + expect(accentData!.accentCodepoint).toBe('U+005E'); + }); + test('renders sub-superscript as with base, subscript, and superscript', async ({ superdoc }) => { await superdoc.loadDocument(ALL_OBJECTS_DOC); await superdoc.waitForStable();