@@ -32,8 +32,8 @@ const (
32
32
headerFlagsMask = 0xC0 // extract the qcInfo bits from the header byte
33
33
)
34
34
35
- // runeInfo is a representation for the data stored in charinfoTrie .
36
- type runeInfo struct {
35
+ // Properties provides access to normalization properties of a rune .
36
+ type Properties struct {
37
37
pos uint8 // start position in reorderBuffer; used in composition.go
38
38
size uint8 // length of UTF-8 encoding of this rune
39
39
ccc uint8 // leading canonical combining class (ccc if not decomposition)
@@ -43,7 +43,7 @@ type runeInfo struct {
43
43
}
44
44
45
45
// functions dispatchable per form
46
- type lookupFunc func (b input , i int ) runeInfo
46
+ type lookupFunc func (b input , i int ) Properties
47
47
48
48
// formInfo holds Form-specific functions and tables.
49
49
type formInfo struct {
@@ -75,11 +75,14 @@ func init() {
75
75
76
76
// We do not distinguish between boundaries for NFC, NFD, etc. to avoid
77
77
// unexpected behavior for the user. For example, in NFD, there is a boundary
78
- // after 'a'. However, a might combine with modifiers, so from the application's
78
+ // after 'a'. However, 'a' might combine with modifiers, so from the application's
79
79
// perspective it is not a good boundary. We will therefore always use the
80
80
// boundaries for the combining variants.
81
- func (i runeInfo ) boundaryBefore () bool {
82
- if i .ccc == 0 && ! i .combinesBackward () {
81
+
82
+ // BoundaryBefore returns true if this rune starts a new segment and
83
+ // cannot combine with any rune on the left.
84
+ func (p Properties ) BoundaryBefore () bool {
85
+ if p .ccc == 0 && ! p .combinesBackward () {
83
86
return true
84
87
}
85
88
// We assume that the CCC of the first character in a decomposition
@@ -88,8 +91,10 @@ func (i runeInfo) boundaryBefore() bool {
88
91
return false
89
92
}
90
93
91
- func (i runeInfo ) boundaryAfter () bool {
92
- return i .isInert ()
94
+ // BoundaryAfter returns true if this rune cannot combine with runes to the right
95
+ // and always denotes the end of a segment.
96
+ func (p Properties ) BoundaryAfter () bool {
97
+ return p .isInert ()
93
98
}
94
99
95
100
// We pack quick check data in 4 bits:
@@ -101,25 +106,52 @@ func (i runeInfo) boundaryAfter() bool {
101
106
// influenced by normalization.
102
107
type qcInfo uint8
103
108
104
- func (i runeInfo ) isYesC () bool { return i .flags & 0x4 == 0 }
105
- func (i runeInfo ) isYesD () bool { return i .flags & 0x1 == 0 }
109
+ func (p Properties ) isYesC () bool { return p .flags & 0x4 == 0 }
110
+ func (p Properties ) isYesD () bool { return p .flags & 0x1 == 0 }
106
111
107
- func (i runeInfo ) combinesForward () bool { return i .flags & 0x8 != 0 }
108
- func (i runeInfo ) combinesBackward () bool { return i .flags & 0x2 != 0 } // == isMaybe
109
- func (i runeInfo ) hasDecomposition () bool { return i .flags & 0x1 != 0 } // == isNoD
112
+ func (p Properties ) combinesForward () bool { return p .flags & 0x8 != 0 }
113
+ func (p Properties ) combinesBackward () bool { return p .flags & 0x2 != 0 } // == isMaybe
114
+ func (p Properties ) hasDecomposition () bool { return p .flags & 0x1 != 0 } // == isNoD
110
115
111
- func (r runeInfo ) isInert () bool {
112
- return r .flags & 0xf == 0 && r .ccc == 0
116
+ func (p Properties ) isInert () bool {
117
+ return p .flags & 0xf == 0 && p .ccc == 0
113
118
}
114
119
115
- func (r runeInfo ) decomposition () []byte {
116
- if r .index == 0 {
120
+ // Decomposition returns the decomposition for the underlying rune
121
+ // or nil if there is none.
122
+ func (p Properties ) Decomposition () []byte {
123
+ if p .index == 0 {
117
124
return nil
118
125
}
119
- p := r .index
120
- n := decomps [p ] & 0x3F
121
- p ++
122
- return decomps [p : p + uint16 (n )]
126
+ i := p .index
127
+ n := decomps [i ] & headerLenMask
128
+ i ++
129
+ return decomps [i : i + uint16 (n )]
130
+ }
131
+
132
+ // Size returns the length of UTF-8 encoding of the rune.
133
+ func (p Properties ) Size () int {
134
+ return int (p .size )
135
+ }
136
+
137
+ // CCC returns the canonical combining class of the underlying rune.
138
+ func (p Properties ) CCC () uint8 {
139
+ if p .index > firstCCCZeroExcept {
140
+ return 0
141
+ }
142
+ return p .ccc
143
+ }
144
+
145
+ // LeadCCC returns the CCC of the first rune in the decomposition.
146
+ // If there is no decomposition, LeadCCC equals CCC.
147
+ func (p Properties ) LeadCCC () uint8 {
148
+ return p .ccc
149
+ }
150
+
151
+ // TrailCCC returns the CCC of the last rune in the decomposition.
152
+ // If there is no decomposition, TrailCCC equals CCC.
153
+ func (p Properties ) TrailCCC () uint8 {
154
+ return p .tccc
123
155
}
124
156
125
157
// Recomposition
@@ -135,24 +167,40 @@ func combine(a, b rune) rune {
135
167
return recompMap [key ]
136
168
}
137
169
138
- func lookupInfoNFC (b input , i int ) runeInfo {
170
+ func lookupInfoNFC (b input , i int ) Properties {
139
171
v , sz := b .charinfoNFC (i )
140
172
return compInfo (v , sz )
141
173
}
142
174
143
- func lookupInfoNFKC (b input , i int ) runeInfo {
175
+ func lookupInfoNFKC (b input , i int ) Properties {
144
176
v , sz := b .charinfoNFKC (i )
145
177
return compInfo (v , sz )
146
178
}
147
179
180
+ // Properties returns properties for the first rune in s.
181
+ func (f Form ) Properties (s []byte ) Properties {
182
+ if f == NFC || f == NFD {
183
+ return compInfo (nfcTrie .lookup (s ))
184
+ }
185
+ return compInfo (nfkcTrie .lookup (s ))
186
+ }
187
+
188
+ // PropertiesString returns properties for the first rune in s.
189
+ func (f Form ) PropertiesString (s string ) Properties {
190
+ if f == NFC || f == NFD {
191
+ return compInfo (nfcTrie .lookupString (s ))
192
+ }
193
+ return compInfo (nfkcTrie .lookupString (s ))
194
+ }
195
+
148
196
// compInfo converts the information contained in v and sz
149
- // to a runeInfo . See the comment at the top of the file
197
+ // to a Properties . See the comment at the top of the file
150
198
// for more information on the format.
151
- func compInfo (v uint16 , sz int ) runeInfo {
199
+ func compInfo (v uint16 , sz int ) Properties {
152
200
if v == 0 {
153
- return runeInfo {size : uint8 (sz )}
201
+ return Properties {size : uint8 (sz )}
154
202
} else if v >= 0x8000 {
155
- return runeInfo {
203
+ return Properties {
156
204
size : uint8 (sz ),
157
205
ccc : uint8 (v ),
158
206
tccc : uint8 (v ),
@@ -162,7 +210,7 @@ func compInfo(v uint16, sz int) runeInfo {
162
210
// has decomposition
163
211
h := decomps [v ]
164
212
f := (qcInfo (h & headerFlagsMask ) >> 4 ) | 0x1
165
- ri := runeInfo {size : uint8 (sz ), flags : f , index : v }
213
+ ri := Properties {size : uint8 (sz ), flags : f , index : v }
166
214
if v >= firstCCC {
167
215
v += uint16 (h & headerLenMask ) + 1
168
216
ri .tccc = decomps [v ]
0 commit comments