1
1
import stripAnsi from 'strip-ansi' ;
2
2
import { eastAsianWidth } from 'get-east-asian-width' ;
3
- import emojiRegex from 'emoji-regex' ;
3
+
4
+ /**
5
+ Logic:
6
+ - Segment graphemes to match how terminals render clusters.
7
+ - Width rules:
8
+ 1. Skip non-printing clusters (Default_Ignorable, Control, pure Mark, lone Surrogates). Tabs are ignored by design.
9
+ 2. Emoji clusters are double-width only when VS16 is present, the base has Emoji_Presentation (and not VS15), or the cluster has multiple scalars (flags, ZWJ, keycaps, tags, etc.).
10
+ 3. Otherwise use East Asian Width of the cluster’s first visible code point, and add widths for trailing Halfwidth/Fullwidth Forms within the same cluster (e.g., dakuten/handakuten/prolonged sound mark).
11
+ */
4
12
5
13
const segmenter = new Intl . Segmenter ( ) ;
6
14
7
- const defaultIgnorableCodePointRegex = / ^ \p{ Default_Ignorable_Code_Point} $ / u;
15
+ // Whole-cluster zero-width
16
+ const zeroWidthClusterRegex = / ^ (?: \p{ Default_Ignorable_Code_Point} | \p{ Control} | \p{ Mark} | \p{ Surrogate} ) + $ / v;
17
+
18
+ // Pick the base scalar if the cluster starts with Prepend/Format/Marks
19
+ const leadingNonPrintingRegex = / ^ [ \p{ Default_Ignorable_Code_Point} \p{ Control} \p{ Format} \p{ Mark} \p{ Surrogate} ] + / v;
20
+
21
+ // RGI emoji sequences
22
+ const rgiEmojiRegex = / ^ \p{ RGI_Emoji} $ / v;
23
+ // Default emoji presentation (single-scalar emoji without VS16)
24
+ const emojiPresentationRegex = / ^ \p{ Emoji_Presentation} $ / v;
25
+
26
+ function baseVisible ( segment ) {
27
+ return segment . replace ( leadingNonPrintingRegex , '' ) ;
28
+ }
29
+
30
+ function isZeroWidthCluster ( segment ) {
31
+ return zeroWidthClusterRegex . test ( segment ) ;
32
+ }
33
+
34
+ function isDoubleWidthEmojiCluster ( segment ) {
35
+ const visible = baseVisible ( segment ) ;
36
+ const baseScalar = visible . codePointAt ( 0 ) ;
37
+ const baseChar = String . fromCodePoint ( baseScalar ) ;
38
+ const baseIsEmojiPresentation = emojiPresentationRegex . test ( baseChar ) ;
39
+ const hasVs16 = segment . includes ( '\uFE0F' ) ;
40
+ const hasVs15 = segment . includes ( '\uFE0E' ) ;
41
+ const codePointCount = [ ...segment ] . length ;
42
+ const multiScalarMeaningful = codePointCount > 1 && ! ( codePointCount === 2 && hasVs15 && ! hasVs16 ) ;
43
+
44
+ return hasVs16 || ( baseIsEmojiPresentation && ! hasVs15 ) || multiScalarMeaningful ;
45
+ }
46
+
47
+ function trailingHalfwidthWidth ( segment , eastAsianWidthOptions ) {
48
+ let extra = 0 ;
49
+ if ( segment . length > 1 ) {
50
+ for ( const char of segment . slice ( 1 ) ) {
51
+ if ( char >= '\uFF00' && char <= '\uFFEF' ) {
52
+ extra += eastAsianWidth ( char . codePointAt ( 0 ) , eastAsianWidthOptions ) ;
53
+ }
54
+ }
55
+ }
8
56
9
- export default function stringWidth ( string , options = { } ) {
10
- if ( typeof string !== 'string' || string . length === 0 ) {
57
+ return extra ;
58
+ }
59
+
60
+ export default function stringWidth ( input , options = { } ) {
61
+ if ( typeof input !== 'string' || input . length === 0 ) {
11
62
return 0 ;
12
63
}
13
64
@@ -16,6 +67,8 @@ export default function stringWidth(string, options = {}) {
16
67
countAnsiEscapeCodes = false ,
17
68
} = options ;
18
69
70
+ let string = input ;
71
+
19
72
if ( ! countAnsiEscapeCodes ) {
20
73
string = stripAnsi ( string ) ;
21
74
}
@@ -27,55 +80,24 @@ export default function stringWidth(string, options = {}) {
27
80
let width = 0 ;
28
81
const eastAsianWidthOptions = { ambiguousAsWide : ! ambiguousIsNarrow } ;
29
82
30
- for ( const { segment : character } of segmenter . segment ( string ) ) {
31
- const codePoint = character . codePointAt ( 0 ) ;
32
-
33
- // Ignore control characters
34
- if ( codePoint <= 0x1F || ( codePoint >= 0x7F && codePoint <= 0x9F ) ) {
83
+ for ( const { segment} of segmenter . segment ( string ) ) {
84
+ // Zero-width / non-printing clusters
85
+ if ( isZeroWidthCluster ( segment ) ) {
35
86
continue ;
36
87
}
37
88
38
- // Ignore zero-width characters
39
- if (
40
- ( codePoint >= 0x20_0B && codePoint <= 0x20_0F ) // Zero-width space, non-joiner, joiner, left-to-right mark, right-to-left mark
41
- || codePoint === 0xFE_FF // Zero-width no-break space
42
- ) {
43
- continue ;
44
- }
45
-
46
- // Ignore combining characters
47
- if (
48
- ( codePoint >= 0x3_00 && codePoint <= 0x3_6F ) // Combining diacritical marks
49
- || ( codePoint >= 0x1A_B0 && codePoint <= 0x1A_FF ) // Combining diacritical marks extended
50
- || ( codePoint >= 0x1D_C0 && codePoint <= 0x1D_FF ) // Combining diacritical marks supplement
51
- || ( codePoint >= 0x20_D0 && codePoint <= 0x20_FF ) // Combining diacritical marks for symbols
52
- || ( codePoint >= 0xFE_20 && codePoint <= 0xFE_2F ) // Combining half marks
53
- ) {
54
- continue ;
55
- }
56
-
57
- // Ignore surrogate pairs
58
- if ( codePoint >= 0xD8_00 && codePoint <= 0xDF_FF ) {
59
- continue ;
60
- }
61
-
62
- // Ignore variation selectors
63
- if ( codePoint >= 0xFE_00 && codePoint <= 0xFE_0F ) {
64
- continue ;
65
- }
66
-
67
- // This covers some of the above cases, but we still keep them for performance reasons.
68
- if ( defaultIgnorableCodePointRegex . test ( character ) ) {
69
- continue ;
70
- }
71
-
72
- // TODO: Use `/\p{RGI_Emoji}/v` when targeting Node.js 20.
73
- if ( emojiRegex ( ) . test ( character ) ) {
89
+ // Emoji width logic
90
+ if ( rgiEmojiRegex . test ( segment ) && isDoubleWidthEmojiCluster ( segment ) ) {
74
91
width += 2 ;
75
92
continue ;
76
93
}
77
94
95
+ // Everything else: EAW of the cluster’s first visible scalar
96
+ const codePoint = baseVisible ( segment ) . codePointAt ( 0 ) ;
78
97
width += eastAsianWidth ( codePoint , eastAsianWidthOptions ) ;
98
+
99
+ // Add width for trailing Halfwidth and Fullwidth Forms (e.g., ゙, ゚, ー)
100
+ width += trailingHalfwidthWidth ( segment , eastAsianWidthOptions ) ;
79
101
}
80
102
81
103
return width ;
0 commit comments