Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
import type { MathObjectConverter } from '../types.js';

const MATHML_NS = 'http://www.w3.org/1998/Math/MathML';

/** Default accent character when m:chr is absent (combining circumflex). */
const DEFAULT_ACCENT_CHAR = '\u0302';

/**
* Maps combining diacritical marks (which Word emits in m:chr) to their
* non-combining accent equivalents, preferring characters that MathML Core's
* operator dictionary registers as stretchy accents.
*
* Why: combining marks (U+0300–U+036F) placed bare inside <mo> render against
* a dotted-circle placeholder in some engines. For the common accents we map
* to ASCII-range characters (`^`, `~`, `¯`, `"`, `` ` ``, `´`) because those
* are marked stretchy in the MathML Core operator dictionary, so MathML
* renderers stretch them across wide bases (e.g. a tilde over "x+1"). For
* accents without an ASCII-range equivalent we fall back to the Unicode
* spacing modifier letter.
*
* Covers the accents Word's equation editor emits; anything outside this table
* passes through unchanged.
*/
const COMBINING_TO_SPACING: Record<string, string> = {
'\u0300': '\u0060', // grave → ` (U+0060)
'\u0301': '\u00B4', // acute → ´ (U+00B4)
'\u0302': '\u005E', // circumflex / hat → ^ (U+005E, stretchy)
'\u0303': '\u007E', // tilde → ~ (U+007E, stretchy)
'\u0304': '\u00AF', // macron → ¯ (U+00AF, stretchy)
'\u0306': '\u02D8', // breve → ˘
'\u0307': '\u02D9', // dot above → ˙
'\u0308': '\u00A8', // diaeresis → ¨
'\u030A': '\u02DA', // ring above → ˚
'\u030B': '\u02DD', // double acute → ˝
'\u030C': '\u02C7', // caron / háček → ˇ
'\u20D6': '\u2190', // combining left arrow above → ← (U+2190, stretchy)
'\u20D7': '\u2192', // combining right arrow above → → (U+2192, stretchy)
};

/**
* Convert m:acc (accent / diacritical mark) to MathML <mover accent="true">.
*
* OMML structure:
* m:acc → m:accPr? (optional: m:chr@m:val), m:e (base expression, required)
*
* MathML output:
* <mover accent="true">
* <mrow>base</mrow>
* <mo>accent-char</mo>
* </mover>
*
* ECMA-376 §22.1.2.20 (chr) defines three m:chr states:
* 1. m:chr element absent → default accent char (U+0302)
* 2. m:chr present, m:val absent → character is absent (render bare base)
* 3. m:chr present, m:val = "x" → use x as the accent character
*
* When the accent character is absent, the base is returned wrapped in <mrow>
* with no <mover> wrapper. When m:e itself is absent (invalid per the schema),
* the converter returns null so the caller can drop the malformed element.
*
* @spec ECMA-376 §22.1.2.1 (acc), §22.1.2.2 (accPr), §22.1.2.20 (chr)
*/
export const convertAccent: MathObjectConverter = (node, doc, convertChildren) => {
const elements = node.elements ?? [];
const base = elements.find((e) => e.name === 'm:e');

// m:e is required by CT_Acc. Missing it means the input is malformed; decline
// to render rather than emit a floating accent with no base.
if (!base) return null;

const accPr = elements.find((e) => e.name === 'm:accPr');
const chr = accPr?.elements?.find((e) => e.name === 'm:chr');
const rawVal = chr?.attributes?.['m:val'];

// Resolve the accent character per §22.1.2.20.
// - chr element absent → default U+0302
// - chr present, m:val absent/"" → character absent (no accent)
// - chr present, m:val = "x" → "x"
const accentChar = chr === undefined ? DEFAULT_ACCENT_CHAR : rawVal && rawVal.length > 0 ? rawVal : '';

const baseRow = doc.createElementNS(MATHML_NS, 'mrow');
baseRow.appendChild(convertChildren(base.elements ?? []));

if (!accentChar) {
// No accent character: render the base alone.
return baseRow;
}

// Map combining marks to their spacing forms so MathML renderers can use the
// stretchy accent operators. Non-combining or unmapped characters pass through.
const renderChar = COMBINING_TO_SPACING[accentChar] ?? accentChar;

const mover = doc.createElementNS(MATHML_NS, 'mover');
mover.setAttribute('accent', 'true');
mover.appendChild(baseRow);

const mo = doc.createElementNS(MATHML_NS, 'mo');
// stretchy is a hint: renderers that honor it (e.g. MathJax, Firefox's
// accent-stretch path) will stretch the accent across wide bases. Chrome's
// current MathML Core implementation ignores this for accent operators, so
// the accent renders at glyph width there — acceptable baseline behavior.
mo.setAttribute('stretchy', 'true');
mo.textContent = renderChar;
mover.appendChild(mo);

return mover;
};
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ export { convertDelimiter } from './delimiter.js';
export { convertSubscript } from './subscript.js';
export { convertSuperscript } from './superscript.js';
export { convertSubSuperscript } from './sub-superscript.js';
export { convertAccent } from './accent.js';
export { convertPreSubSuperscript } from './pre-sub-superscript.js';
export { convertRadical } from './radical.js';
export { convertLowerLimit } from './lower-limit.js';
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1899,6 +1899,130 @@ describe('m:func converter', () => {
});
});

describe('m:acc converter', () => {
// Helper: build an m:acc node with an optional accPr and a base string.
const buildAcc = (accPrElements: unknown[] | null, baseText: string | null, extraBaseRuns: string[] = []) => {
const elements: unknown[] = [];
if (accPrElements !== null) {
elements.push({ name: 'm:accPr', elements: accPrElements });
}
if (baseText !== null) {
const runs = [baseText, ...extraBaseRuns].map((t) => ({
name: 'm:r',
elements: [{ name: 'm:t', elements: [{ type: 'text', text: t }] }],
}));
elements.push({ name: 'm:e', elements: runs });
}
return { name: 'm:oMath', elements: [{ name: 'm:acc', elements }] };
};

it('converts accent with tilde to <mover accent="true">', () => {
const result = convertOmmlToMathml(buildAcc([{ name: 'm:chr', attributes: { 'm:val': '\u0303' } }], 'x'), doc);
expect(result).not.toBeNull();
const mover = result!.querySelector('mover');
expect(mover).not.toBeNull();
expect(mover!.getAttribute('accent')).toBe('true');
expect(mover!.children[0]!.textContent).toBe('x');
// Combining tilde (U+0303) is mapped to ASCII tilde (U+007E, "~") which
// MathML Core's operator dictionary marks as a stretchy accent.
const mo = mover!.querySelector('mo');
expect(mo!.textContent).toBe('\u007E');
});

it('defaults to circumflex when m:accPr is absent (spec §22.1.2.1)', () => {
const result = convertOmmlToMathml(buildAcc(null, 'a'), doc);
const mover = result!.querySelector('mover');
expect(mover).not.toBeNull();
expect(mover!.getAttribute('accent')).toBe('true');
// Combining circumflex (U+0302) maps to ASCII circumflex (U+005E, "^").
expect(mover!.querySelector('mo')!.textContent).toBe('\u005E');
});

it('defaults to circumflex when m:accPr is present but m:chr is absent (spec §22.1.2.20)', () => {
const result = convertOmmlToMathml(buildAcc([{ name: 'm:ctrlPr' }], 'a'), doc);
const mover = result!.querySelector('mover');
expect(mover).not.toBeNull();
expect(mover!.getAttribute('accent')).toBe('true');
expect(mover!.querySelector('mo')!.textContent).toBe('\u005E');
});

it('renders dot accent', () => {
const result = convertOmmlToMathml(buildAcc([{ name: 'm:chr', attributes: { 'm:val': '\u0307' } }], 'y'), doc);
const mover = result!.querySelector('mover');
expect(mover!.getAttribute('accent')).toBe('true');
// U+0307 → U+02D9 (spacing dot above) — no ASCII-range equivalent.
expect(mover!.querySelector('mo')!.textContent).toBe('\u02D9');
});

it('maps combining right-arrow (U+20D7) to stretchy right arrow (U+2192)', () => {
const result = convertOmmlToMathml(buildAcc([{ name: 'm:chr', attributes: { 'm:val': '\u20D7' } }], 'v'), doc);
expect(result!.querySelector('mover mo')!.textContent).toBe('\u2192');
});

it('passes unmapped accent characters through unchanged', () => {
// A character outside the combining→spacing table should pass through as-is.
const result = convertOmmlToMathml(buildAcc([{ name: 'm:chr', attributes: { 'm:val': '*' } }], 'x'), doc);
expect(result!.querySelector('mover mo')!.textContent).toBe('*');
});

// ── Spec §22.1.2.20: m:chr present with missing/empty m:val means the
// character is absent (not "use the default"). Render the base alone.
it('renders the base alone when m:chr is present with no m:val attribute', () => {
const result = convertOmmlToMathml(buildAcc([{ name: 'm:chr' }], 'x'), doc);
expect(result).not.toBeNull();
// No <mover> wrapper — just the base inside an <mrow>.
expect(result!.querySelector('mover')).toBeNull();
expect(result!.textContent).toBe('x');
});

it('renders the base alone when m:chr has an explicitly empty m:val', () => {
const result = convertOmmlToMathml(buildAcc([{ name: 'm:chr', attributes: { 'm:val': '' } }], 'x'), doc);
expect(result).not.toBeNull();
expect(result!.querySelector('mover')).toBeNull();
expect(result!.textContent).toBe('x');
});

it('wraps multi-run base in <mrow> so a wide base like x+1 renders as a group', () => {
const result = convertOmmlToMathml(
buildAcc([{ name: 'm:chr', attributes: { 'm:val': '\u0303' } }], 'x', ['+', '1']),
doc,
);
const mover = result!.querySelector('mover');
expect(mover).not.toBeNull();
const baseRow = mover!.children[0]!;
expect(baseRow.tagName.toLowerCase()).toBe('mrow');
expect(baseRow.children.length).toBe(3);
expect(baseRow.textContent).toBe('x+1');
});

it('ignores non-chr siblings in m:accPr (e.g. m:ctrlPr)', () => {
const result = convertOmmlToMathml(
buildAcc([{ name: 'm:ctrlPr' }, { name: 'm:chr', attributes: { 'm:val': '\u0303' } }], 'x'),
doc,
);
const mover = result!.querySelector('mover');
expect(mover).not.toBeNull();
expect(mover!.children.length).toBe(2);
expect(mover!.querySelector('mo')!.textContent).toBe('\u007E');
});

it('returns null when m:e is absent (invalid per CT_Acc)', () => {
const omml = {
name: 'm:oMath',
elements: [
{
name: 'm:acc',
elements: [{ name: 'm:accPr', elements: [{ name: 'm:chr', attributes: { 'm:val': '\u0303' } }] }],
},
],
};
const result = convertOmmlToMathml(omml, doc);
// The outer <math> is produced only if it has children. With m:acc dropped,
// there are no math children, so convertOmmlToMathml returns null.
expect(result).toBeNull();
});
});

describe('m:limLow converter', () => {
it('converts m:limLow to <munder> with base and lower limit', () => {
// lim_{n→∞}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import {
convertSubscript,
convertSuperscript,
convertSubSuperscript,
convertAccent,
convertPreSubSuperscript,
convertRadical,
convertLowerLimit,
Expand All @@ -43,6 +44,7 @@ export const MATHML_NS = 'http://www.w3.org/1998/Math/MathML';
const MATH_OBJECT_REGISTRY: Record<string, MathObjectConverter | null> = {
// ── Implemented ──────────────────────────────────────────────────────────
'm:r': convertMathRun,
'm:acc': convertAccent, // Accent (diacritical mark above base)
'm:bar': convertBar, // Bar (overbar/underbar)
'm:d': convertDelimiter, // Delimiter (parentheses, brackets, braces)
'm:f': convertFraction, // Fraction (numerator/denominator)
Expand All @@ -56,7 +58,6 @@ const MATH_OBJECT_REGISTRY: Record<string, MathObjectConverter | null> = {
'm:sPre': convertPreSubSuperscript, // Pre-sub-superscript (left of base)

// ── Not yet implemented (community contributions welcome) ────────────────
'm:acc': null, // Accent (diacritical mark above base)
'm:borderBox': null, // Border box (border around math content)
'm:box': null, // Box (invisible grouping container)
'm:eqArr': null, // Equation array (vertical array of equations)
Expand Down
31 changes: 31 additions & 0 deletions tests/behavior/tests/importing/math-equations.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,37 @@ test.describe('math equation import and rendering', () => {
}
});

test('renders m:acc as <mover accent="true"> with spacing-form accent char', async ({ superdoc }) => {
await superdoc.loadDocument(ALL_OBJECTS_DOC);
await superdoc.waitForStable();

// The fixture has m:acc with m:chr m:val="U+0302" (combining circumflex).
// convertAccent should:
// 1. Produce a <mover accent="true"> wrapper
// 2. Emit ASCII circumflex U+005E (not the combining U+0302) since that's
// what MathML Core's operator dictionary marks as a stretchy accent.
const accentData = await superdoc.page.evaluate(() => {
const mover = document.querySelector('mover[accent="true"]');
if (!mover) return null;
const mo = mover.querySelector('mo');
return {
childCount: mover.children.length,
baseText: mover.children[0]?.textContent,
accentChar: mo?.textContent,
accentCodepoint: mo?.textContent
? 'U+' + (mo.textContent.codePointAt(0) ?? 0).toString(16).padStart(4, '0').toUpperCase()
: null,
};
});

expect(accentData).not.toBeNull();
expect(accentData!.childCount).toBe(2);
expect(accentData!.baseText).toBe('x');
// Combining circumflex (U+0302) in OMML must be rendered as ASCII circumflex (U+005E).
expect(accentData!.accentChar).toBe('\u005E');
expect(accentData!.accentCodepoint).toBe('U+005E');
});

test('renders sub-superscript as <msubsup> with base, subscript, and superscript', async ({ superdoc }) => {
await superdoc.loadDocument(ALL_OBJECTS_DOC);
await superdoc.waitForStable();
Expand Down
Loading