@@ -6,28 +6,17 @@ use crate::error::Error;
6
6
use crate :: impl_const:: * ;
7
7
8
8
use core:: convert:: TryFrom ;
9
+ use icu_provider:: yoke:: { self , Yokeable , ZeroCopyFrom } ;
9
10
#[ cfg( feature = "serde" ) ]
10
11
use serde:: { Deserialize , Serialize } ;
11
12
use zerovec:: ZeroVec ;
12
13
13
- // Enums
14
-
15
- /// The width of the elements in the data array of a [`CodePointTrie`].
16
- /// See [`UCPTrieValueWidth`](https://unicode-org.github.io/icu-docs/apidoc/dev/icu4c/ucptrie_8h.html) in ICU4C.
17
- #[ derive( Clone , Copy , PartialEq ) ]
18
- #[ cfg_attr( feature = "serde" , derive( Serialize , Deserialize ) ) ]
19
- pub enum ValueWidthEnum {
20
- Bits16 = 0 ,
21
- Bits32 = 1 ,
22
- Bits8 = 2 ,
23
- }
24
-
25
14
/// The type of trie represents whether the trie has an optimization that
26
15
/// would make it small or fast.
27
16
/// See [`UCPTrieType`](https://unicode-org.github.io/icu-docs/apidoc/dev/icu4c/ucptrie_8h.html) in ICU4C.
28
- #[ derive( Clone , Copy , PartialEq ) ]
17
+ #[ derive( Clone , Copy , PartialEq , Debug , Eq ) ]
29
18
#[ cfg_attr( feature = "serde" , derive( Serialize , Deserialize ) ) ]
30
- pub enum TrieTypeEnum {
19
+ pub enum TrieType {
31
20
/// Represents the "fast" type code point tries for the
32
21
/// [`TrieType`] trait. The "fast max" limit is set to `0xffff`.
33
22
Fast = 0 ,
@@ -36,51 +25,29 @@ pub enum TrieTypeEnum {
36
25
Small = 1 ,
37
26
}
38
27
39
- // ValueWidth trait
28
+ // TrieValue trait
40
29
41
30
// AsULE is AsUnalignedLittleEndian, i.e. "allowed in a zerovec"
42
31
43
- /// A trait representing the width of the values stored in the data array of a
44
- /// [`CodePointTrie`]. This trait is used as a type parameter in constructing
45
- /// a `CodePointTrie`.
46
- pub trait ValueWidth : Copy + zerovec:: ule:: AsULE + ' static {
47
- /// This enum variant represents the specific instance of `ValueWidth` such
48
- /// that the enum discriminant values matches ICU4C's enum integer value.
49
- const ENUM_VALUE : ValueWidthEnum ;
50
- /// This value is used to indicate an error in the Rust code in accessing
51
- /// a position in the trie's `data` array. In normal cases, the position in
52
- /// the `data` array will return either the correct value, or in case of a
53
- /// logical error in the trie's computation, the trie's own error value
54
- /// which is stored that in the `data` array.
32
+ /// A trait representing the values stored in the data array of a [`CodePointTrie`].
33
+ /// This trait is used as a type parameter in constructing a `CodePointTrie`.
34
+ pub trait TrieValue : Copy + Eq + PartialEq + zerovec:: ule:: AsULE + ' static {
35
+ /// Last-resort fallback value to return if we cannot read data from the trie.
36
+ ///
37
+ /// In most cases, the error value is read from the last element of the `data` array.
55
38
const DATA_GET_ERROR_VALUE : Self ;
56
- fn cast_to_widest ( self ) -> u32 ;
57
39
}
58
40
59
- impl ValueWidth for u8 {
60
- const ENUM_VALUE : ValueWidthEnum = ValueWidthEnum :: Bits8 ;
41
+ impl TrieValue for u8 {
61
42
const DATA_GET_ERROR_VALUE : u8 = u8:: MAX ;
62
-
63
- fn cast_to_widest ( self ) -> u32 {
64
- self as u32
65
- }
66
43
}
67
44
68
- impl ValueWidth for u16 {
69
- const ENUM_VALUE : ValueWidthEnum = ValueWidthEnum :: Bits16 ;
45
+ impl TrieValue for u16 {
70
46
const DATA_GET_ERROR_VALUE : u16 = u16:: MAX ;
71
-
72
- fn cast_to_widest ( self ) -> u32 {
73
- self as u32
74
- }
75
47
}
76
48
77
- impl ValueWidth for u32 {
78
- const ENUM_VALUE : ValueWidthEnum = ValueWidthEnum :: Bits32 ;
49
+ impl TrieValue for u32 {
79
50
const DATA_GET_ERROR_VALUE : u32 = u32:: MAX ;
80
-
81
- fn cast_to_widest ( self ) -> u32 {
82
- self
83
- }
84
51
}
85
52
86
53
/// This struct represents a de-serialized CodePointTrie that was exported from
@@ -90,16 +57,18 @@ impl ValueWidth for u32 {
90
57
/// - [ICU Site design doc](http://site.icu-project.org/design/struct/utrie)
91
58
/// - [ICU User Guide section on Properties lookup](https://unicode-org.github.io/icu/userguide/strings/properties.html#lookup)
92
59
#[ cfg_attr( feature = "serde" , derive( Serialize , Deserialize ) ) ]
93
- pub struct CodePointTrie < ' trie , W : ValueWidth > {
60
+ #[ derive( Debug , Eq , PartialEq , Yokeable , ZeroCopyFrom ) ]
61
+ pub struct CodePointTrie < ' trie , T : TrieValue > {
94
62
header : CodePointTrieHeader ,
95
63
#[ cfg_attr( feature = "serde" , serde( borrow) ) ]
96
64
index : ZeroVec < ' trie , u16 > ,
97
65
#[ cfg_attr( feature = "serde" , serde( borrow) ) ]
98
- data : ZeroVec < ' trie , W > ,
66
+ data : ZeroVec < ' trie , T > ,
99
67
}
100
68
101
69
/// This struct contains the fixed-length header fields of a [`CodePointTrie`].
102
70
#[ cfg_attr( feature = "serde" , derive( Serialize , Deserialize ) ) ]
71
+ #[ derive( Copy , Clone , Debug , Eq , PartialEq , Yokeable , ZeroCopyFrom ) ]
103
72
pub struct CodePointTrieHeader {
104
73
/// The code point of the start of the last range of the trie. A
105
74
/// range is defined as a partition of the code point space such that the
@@ -130,31 +99,31 @@ pub struct CodePointTrieHeader {
130
99
pub null_value : u32 ,
131
100
/// The enum value representing the type of trie, where trie type is as it
132
101
/// is defined in ICU (ex: Fast, Small).
133
- pub trie_type : TrieTypeEnum ,
102
+ pub trie_type : TrieType ,
134
103
}
135
104
136
- impl TryFrom < u8 > for TrieTypeEnum {
105
+ impl TryFrom < u8 > for TrieType {
137
106
type Error = crate :: error:: Error ;
138
107
139
- fn try_from ( trie_type_int : u8 ) -> Result < TrieTypeEnum , crate :: error:: Error > {
108
+ fn try_from ( trie_type_int : u8 ) -> Result < TrieType , crate :: error:: Error > {
140
109
match trie_type_int {
141
- 0 => Ok ( TrieTypeEnum :: Fast ) ,
142
- 1 => Ok ( TrieTypeEnum :: Small ) ,
110
+ 0 => Ok ( TrieType :: Fast ) ,
111
+ 1 => Ok ( TrieType :: Small ) ,
143
112
_ => Err ( crate :: error:: Error :: FromDeserialized {
144
113
reason : "Cannot parse value for trie_type" ,
145
114
} ) ,
146
115
}
147
116
}
148
117
}
149
118
150
- impl < ' trie , W : ValueWidth > CodePointTrie < ' trie , W > {
119
+ impl < ' trie , T : TrieValue > CodePointTrie < ' trie , T > {
151
120
/// Returns a new [`CodePointTrie`] backed by borrowed data for the `index`
152
121
/// array and `data` array, whose data values have width `W`.
153
122
pub fn try_new (
154
123
header : CodePointTrieHeader ,
155
124
index : ZeroVec < ' trie , u16 > ,
156
- data : ZeroVec < ' trie , W > ,
157
- ) -> Result < CodePointTrie < ' trie , W > , Error > {
125
+ data : ZeroVec < ' trie , T > ,
126
+ ) -> Result < CodePointTrie < ' trie , T > , Error > {
158
127
// Validation invariants are not needed here when constructing a new
159
128
// `CodePointTrie` because:
160
129
//
@@ -167,7 +136,7 @@ impl<'trie, W: ValueWidth> CodePointTrie<'trie, W> {
167
136
// - The `ZeroVec` serializer stores the length of the array along with the
168
137
// ZeroVec data, meaning that a deserializer would also see that length info.
169
138
170
- let trie: CodePointTrie < ' trie , W > = CodePointTrie {
139
+ let trie: CodePointTrie < ' trie , T > = CodePointTrie {
171
140
header,
172
141
index,
173
142
data,
@@ -183,7 +152,7 @@ impl<'trie, W: ValueWidth> CodePointTrie<'trie, W> {
183
152
184
153
fn internal_small_index ( & self , code_point : u32 ) -> u32 {
185
154
let mut index1_pos: u32 = code_point >> SHIFT_1 ;
186
- if self . header . trie_type == TrieTypeEnum :: Fast {
155
+ if self . header . trie_type == TrieType :: Fast {
187
156
debug_assert ! (
188
157
FAST_TYPE_FAST_INDEXING_MAX < code_point && code_point < self . header. high_start
189
158
) ;
@@ -290,14 +259,14 @@ impl<'trie, W: ValueWidth> CodePointTrie<'trie, W> {
290
259
/// assert_eq!(0, trie.get(0x13E0)); // 'Ꮰ' as u32
291
260
/// assert_eq!(1, trie.get(0x10044)); // '𐁄' as u32
292
261
/// ```
293
- pub fn get ( & self , code_point : u32 ) -> W {
262
+ pub fn get ( & self , code_point : u32 ) -> T {
294
263
// All code points up to the fast max limit are represented
295
264
// individually in the `index` array to hold their `data` array position, and
296
265
// thus only need 2 lookups for a [CodePointTrie::get()](`crate::codepointtrie::CodePointTrie::get`).
297
266
// Code points above the "fast max" limit require 4 lookups.
298
267
let fast_max = match self . header . trie_type {
299
- TrieTypeEnum :: Fast => FAST_TYPE_FAST_INDEXING_MAX ,
300
- TrieTypeEnum :: Small => SMALL_TYPE_FAST_INDEXING_MAX ,
268
+ TrieType :: Fast => FAST_TYPE_FAST_INDEXING_MAX ,
269
+ TrieType :: Small => SMALL_TYPE_FAST_INDEXING_MAX ,
301
270
} ;
302
271
let data_pos: u32 = if code_point <= fast_max {
303
272
Self :: fast_index ( self , code_point)
@@ -308,12 +277,14 @@ impl<'trie, W: ValueWidth> CodePointTrie<'trie, W> {
308
277
} ;
309
278
// Returns the trie value (or trie's error value).
310
279
// If we cannot read from the data array, then return the associated constant
311
- // DATA_GET_ERROR_VALUE for the instance type for W: ValueWidth .
280
+ // DATA_GET_ERROR_VALUE for the instance type for T: TrieValue .
312
281
self . data
313
282
. get ( data_pos as usize )
314
- . unwrap_or ( W :: DATA_GET_ERROR_VALUE )
283
+ . unwrap_or ( T :: DATA_GET_ERROR_VALUE )
315
284
}
285
+ }
316
286
287
+ impl < ' trie , T : TrieValue + Into < u32 > > CodePointTrie < ' trie , T > {
317
288
/// Returns the value that is associated with `code_point` for this [`CodePointTrie`]
318
289
/// as a `u32`.
319
290
///
@@ -333,7 +304,20 @@ impl<'trie, W: ValueWidth> CodePointTrie<'trie, W> {
333
304
// Note: This API method maintains consistency with the corresponding
334
305
// original ICU APIs.
335
306
pub fn get_u32 ( & self , code_point : u32 ) -> u32 {
336
- self . get ( code_point) . cast_to_widest ( )
307
+ self . get ( code_point) . into ( )
308
+ }
309
+ }
310
+
311
+ impl < ' trie , T : TrieValue > Clone for CodePointTrie < ' trie , T >
312
+ where
313
+ <T as zerovec:: ule:: AsULE >:: ULE : Clone ,
314
+ {
315
+ fn clone ( & self ) -> Self {
316
+ CodePointTrie {
317
+ header : self . header ,
318
+ index : self . index . clone ( ) ,
319
+ data : self . data . clone ( ) ,
320
+ }
337
321
}
338
322
}
339
323
0 commit comments