Skip to content

Commit 97fb818

Browse files
committed
Fix roundtripping of attention by encoding surroundings
Related-to: syntax-tree/unist#60.
1 parent df0d6a6 commit 97fb818

11 files changed

+359
-49
lines changed

Diff for: lib/handle/emphasis.js

+32-9
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,11 @@
44
*/
55

66
import {checkEmphasis} from '../util/check-emphasis.js'
7+
import {encodeCharacterReference} from '../util/encode-character-reference.js'
8+
import {encodeInfo} from '../util/encode-info.js'
79

810
emphasis.peek = emphasisPeek
911

10-
// To do: there are cases where emphasis cannot “form” depending on the
11-
// previous or next character of sequences.
12-
// There’s no way around that though, except for injecting zero-width stuff.
13-
// Do we need to safeguard against that?
1412
/**
1513
* @param {Emphasis} node
1614
* @param {Parents | undefined} _
@@ -22,17 +20,42 @@ export function emphasis(node, _, state, info) {
2220
const marker = checkEmphasis(state)
2321
const exit = state.enter('emphasis')
2422
const tracker = state.createTracker(info)
25-
let value = tracker.move(marker)
26-
value += tracker.move(
23+
const before = tracker.move(marker)
24+
25+
let between = tracker.move(
2726
state.containerPhrasing(node, {
28-
before: value,
2927
after: marker,
28+
before,
3029
...tracker.current()
3130
})
3231
)
33-
value += tracker.move(marker)
32+
const betweenHead = between.charCodeAt(0)
33+
const open = encodeInfo(
34+
info.before.charCodeAt(info.before.length - 1),
35+
betweenHead,
36+
marker
37+
)
38+
39+
if (open.inside) {
40+
between = encodeCharacterReference(betweenHead) + between.slice(1)
41+
}
42+
43+
const betweenTail = between.charCodeAt(between.length - 1)
44+
const close = encodeInfo(info.after.charCodeAt(0), betweenTail, marker)
45+
46+
if (close.inside) {
47+
between = between.slice(0, -1) + encodeCharacterReference(betweenTail)
48+
}
49+
50+
const after = tracker.move(marker)
51+
3452
exit()
35-
return value
53+
54+
state.attentionEncodeSurroundingInfo = {
55+
after: close.outside,
56+
before: open.outside
57+
}
58+
return before + between + after
3659
}
3760

3861
/**

Diff for: lib/handle/heading.js

+2-5
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
* @import {Heading, Parents} from 'mdast'
44
*/
55

6+
import {encodeCharacterReference} from '../util/encode-character-reference.js'
67
import {formatHeadingAsSetext} from '../util/format-heading-as-setext.js'
78

89
/**
@@ -58,11 +59,7 @@ export function heading(node, _, state, info) {
5859

5960
if (/^[\t ]/.test(value)) {
6061
// To do: what effect has the character reference on tracking?
61-
value =
62-
'&#x' +
63-
value.charCodeAt(0).toString(16).toUpperCase() +
64-
';' +
65-
value.slice(1)
62+
value = encodeCharacterReference(value.charCodeAt(0)) + value.slice(1)
6663
}
6764

6865
value = value ? sequence + ' ' + value : sequence

Diff for: lib/handle/strong.js

+32-9
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,11 @@
44
*/
55

66
import {checkStrong} from '../util/check-strong.js'
7+
import {encodeCharacterReference} from '../util/encode-character-reference.js'
8+
import {encodeInfo} from '../util/encode-info.js'
79

810
strong.peek = strongPeek
911

10-
// To do: there are cases where emphasis cannot “form” depending on the
11-
// previous or next character of sequences.
12-
// There’s no way around that though, except for injecting zero-width stuff.
13-
// Do we need to safeguard against that?
1412
/**
1513
* @param {Strong} node
1614
* @param {Parents | undefined} _
@@ -22,17 +20,42 @@ export function strong(node, _, state, info) {
2220
const marker = checkStrong(state)
2321
const exit = state.enter('strong')
2422
const tracker = state.createTracker(info)
25-
let value = tracker.move(marker + marker)
26-
value += tracker.move(
23+
const before = tracker.move(marker + marker)
24+
25+
let between = tracker.move(
2726
state.containerPhrasing(node, {
28-
before: value,
2927
after: marker,
28+
before,
3029
...tracker.current()
3130
})
3231
)
33-
value += tracker.move(marker + marker)
32+
const betweenHead = between.charCodeAt(0)
33+
const open = encodeInfo(
34+
info.before.charCodeAt(info.before.length - 1),
35+
betweenHead,
36+
marker
37+
)
38+
39+
if (open.inside) {
40+
between = encodeCharacterReference(betweenHead) + between.slice(1)
41+
}
42+
43+
const betweenTail = between.charCodeAt(between.length - 1)
44+
const close = encodeInfo(info.after.charCodeAt(0), betweenTail, marker)
45+
46+
if (close.inside) {
47+
between = between.slice(0, -1) + encodeCharacterReference(betweenTail)
48+
}
49+
50+
const after = tracker.move(marker + marker)
51+
3452
exit()
35-
return value
53+
54+
state.attentionEncodeSurroundingInfo = {
55+
after: close.outside,
56+
before: open.outside
57+
}
58+
return before + between + after
3659
}
3760

3861
/**

Diff for: lib/index.js

+13-12
Original file line numberDiff line numberDiff line change
@@ -22,35 +22,36 @@ import {track} from './util/track.js'
2222
*
2323
* @param {Nodes} tree
2424
* Tree to serialize.
25-
* @param {Options} [options]
25+
* @param {Options | null | undefined} [options]
2626
* Configuration (optional).
2727
* @returns {string}
2828
* Serialized markdown representing `tree`.
2929
*/
30-
export function toMarkdown(tree, options = {}) {
30+
export function toMarkdown(tree, options) {
31+
const settings = options || {}
3132
/** @type {State} */
3233
const state = {
33-
enter,
34-
indentLines,
3534
associationId: association,
3635
containerPhrasing: containerPhrasingBound,
3736
containerFlow: containerFlowBound,
3837
createTracker: track,
3938
compilePattern,
40-
safe: safeBound,
41-
stack: [],
42-
unsafe: [...unsafe],
43-
join: [...join],
39+
enter,
4440
// @ts-expect-error: GFM / frontmatter are typed in `mdast` but not defined
4541
// here.
4642
handlers: {...handlers},
47-
options: {},
48-
indexStack: [],
4943
// @ts-expect-error: add `handle` in a second.
50-
handle: undefined
44+
handle: undefined,
45+
indentLines,
46+
indexStack: [],
47+
join: [...join],
48+
options: {},
49+
safe: safeBound,
50+
stack: [],
51+
unsafe: [...unsafe]
5152
}
5253

53-
configure(state, options)
54+
configure(state, settings)
5455

5556
if (state.options.tightDefinitions) {
5657
state.join.push(joinDefinition)

Diff for: lib/types.d.ts

+44
Original file line numberDiff line numberDiff line change
@@ -392,6 +392,38 @@ export type ContainerPhrasing = (parent: PhrasingParents, info: Info) => string
392392
*/
393393
export type CreateTracker = (info: TrackFields) => Tracker
394394

395+
/**
396+
* Whether to encode things — with fields representing the surrounding of a
397+
* whole.
398+
*/
399+
export interface EncodeSurrounding {
400+
/**
401+
* Whether to encode after.
402+
*/
403+
after: boolean
404+
405+
/**
406+
* Whether to encode before.
407+
*/
408+
before: boolean
409+
}
410+
411+
/**
412+
* Whether to encode things — with fields representing the relationship to a
413+
* whole.
414+
*/
415+
export interface EncodeSides {
416+
/**
417+
* Whether to encode inside.
418+
*/
419+
inside: boolean
420+
421+
/**
422+
* Whether to encode before.
423+
*/
424+
outside: boolean
425+
}
426+
395427
/**
396428
* Enter something.
397429
*
@@ -754,6 +786,18 @@ export interface State {
754786
* Get an identifier from an association to match it to others.
755787
*/
756788
associationId: AssociationId
789+
/**
790+
* Info on whether to encode the surrounding of *attention*.
791+
*
792+
* Whether attention (emphasis, strong, strikethrough) forms
793+
* depends on the characters inside and outside them.
794+
* The characters inside can be handled by *attention* itself.
795+
* However the outside characters are already handled.
796+
* Or handled afterwards.
797+
* This field can be used to signal from *attention* that some parent
798+
* function (practically `containerPhrasing`) has to handle the surrounding.
799+
*/
800+
attentionEncodeSurroundingInfo: EncodeSurrounding | undefined
757801
/**
758802
* List marker currently in use.
759803
*/

Diff for: lib/util/container-phrasing.js

+40-10
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
* @import {PhrasingParents} from '../types.js'
44
*/
55

6+
import {encodeCharacterReference} from './encode-character-reference.js'
7+
68
/**
79
* Serialize the children of a parent that contains phrasing children.
810
*
@@ -24,6 +26,8 @@ export function containerPhrasing(parent, state, info) {
2426
const results = []
2527
let index = -1
2628
let before = info.before
29+
/** @type {string | undefined} */
30+
let encodeAfter
2731

2832
indexStack.push(-1)
2933
let tracker = state.createTracker(info)
@@ -75,17 +79,43 @@ export function containerPhrasing(parent, state, info) {
7579
tracker.move(results.join(''))
7680
}
7781

78-
results.push(
79-
tracker.move(
80-
state.handle(child, parent, state, {
81-
...tracker.current(),
82-
before,
83-
after
84-
})
85-
)
86-
)
82+
let value = state.handle(child, parent, state, {
83+
...tracker.current(),
84+
after,
85+
before
86+
})
87+
88+
// If we had to encode the first character after the previous node and it’s
89+
// still the same character,
90+
// encode it.
91+
if (encodeAfter && encodeAfter === value.slice(0, 1)) {
92+
value =
93+
encodeCharacterReference(encodeAfter.charCodeAt(0)) + value.slice(1)
94+
}
95+
96+
const encodingInfo = state.attentionEncodeSurroundingInfo
97+
state.attentionEncodeSurroundingInfo = undefined
98+
encodeAfter = undefined
99+
100+
// If we have to encode the first character before the current node and
101+
// it’s still the same character,
102+
// encode it.
103+
if (encodingInfo) {
104+
if (
105+
encodingInfo.before &&
106+
before === results[results.length - 1].slice(-1)
107+
) {
108+
results[results.length - 1] =
109+
results[results.length - 1].slice(0, -1) +
110+
encodeCharacterReference(before.charCodeAt(0))
111+
}
112+
113+
if (encodingInfo.after) encodeAfter = after
114+
}
87115

88-
before = results[results.length - 1].slice(-1)
116+
tracker.move(value)
117+
results.push(value)
118+
before = value.slice(-1)
89119
}
90120

91121
indexStack.pop()

Diff for: lib/util/encode-character-reference.js

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
/**
2+
* Encode a code point as a character reference.
3+
*
4+
* @param {number} code
5+
* Code point to encode.
6+
* @returns {string}
7+
* Encoded character reference.
8+
*/
9+
export function encodeCharacterReference(code) {
10+
return '&#x' + code.toString(16).toUpperCase() + ';'
11+
}

0 commit comments

Comments
 (0)