@@ -2,14 +2,24 @@ package UTF8
22
33import (
44 "fmt"
5- "unicode "
5+ "math "
66 "unicode/utf16"
77 "unicode/utf8"
88
99 "github.com/aws/aws-cryptographic-material-providers-library/releases/go/smithy-dafny-standard-library/Wrappers"
1010 "github.com/dafny-lang/DafnyRuntimeGo/v4/dafny"
1111)
1212
13+ // The following constants are copied from the Go utf16 lib and are used
14+ // to check the validity of the utf16 surrogate pairs.
15+ const (
16+ // 0xd800-0xdc00 encodes the high 10 bits of a pair.
17+ // 0xdc00-0xe000 encodes the low 10 bits of a pair.
18+ surr1 = 0xd800
19+ surr2 = 0xdc00
20+ surr3 = 0xe000
21+ )
22+
1323//IMP: The below extern implementations are only compatible
1424//with unicode-char:false transpiled code.
1525
@@ -19,46 +29,94 @@ import (
1929// we need to encode the result in compatible dafny utf16 string before returning
2030// the result.
2131func Decode (utf8EncodedDafnySeq dafny.Sequence ) Wrappers.Result {
22- utf8EncodedByteArray := dafny .ToByteArray (utf8EncodedDafnySeq )
23-
24- utf16Encoded := utf16 .Encode ([]rune (string (utf8EncodedByteArray )))
25- var dafnyCharArray []dafny.Char
26- for _ , c := range utf16Encoded {
27- if c == unicode .ReplacementChar {
28- err := fmt .Errorf ("Encountered Not Allowed Replacement Character: %s " , unicode .ReplacementChar )
29- return Wrappers .Companion_Result_ .Create_Failure_ (dafny .SeqOfString (err .Error ()))
30- }
31- dafnyCharArray = append (dafnyCharArray , dafny .Char (c ))
32+ res , err := DecodeFromNativeGoByteArray (dafny .ToByteArray (utf8EncodedDafnySeq ))
33+ if err != nil {
34+ return Wrappers .Companion_Result_ .Create_Failure_ (dafny .SeqOfString (err .Error ()))
3235 }
33- return Wrappers .Companion_Result_ .Default (dafny .SeqOfChars (dafnyCharArray ... ))
36+
37+ return Wrappers .Companion_Result_ .Create_Success_ (res )
3438}
3539
3640// Encode encodes utf16 encoded dafny char (rune) to utf-8 Go rune sequence.
3741// Anything we receive here is supposed to be utf16 encoded Go rune
3842// since this extern is for unicode-char:false.
3943func Encode (utf16EncodedDafnySeq dafny.Sequence ) Wrappers.Result {
40- encodedUtf16 := utf16EncodedDafnySeqToUint16 (utf16EncodedDafnySeq )
41- decodedUtf16 := utf16 .Decode (encodedUtf16 )
42- var utf8EncodedBytes []byte
43- for _ , r := range decodedUtf16 {
44- if ! utf8 .ValidRune (r ) || r == unicode .ReplacementChar {
45- return Wrappers .Companion_Result_ .Create_Failure_ (dafny .SeqOfString ("Failed to utf8 encode rune" ))
46- }
47- buf := make ([]byte , utf8 .RuneLen (r ))
48- n := utf8 .EncodeRune (buf , r )
49- utf8EncodedBytes = append (utf8EncodedBytes , buf [:n ]... )
44+ utf8EncodedBytes , err := decodeUtf16 (utf16EncodedDafnySeq )
45+ if err != nil {
46+ return Wrappers .Companion_Result_ .Create_Failure_ (dafny .SeqOfString (err .Error ()))
5047 }
5148 return Wrappers .Companion_Result_ .Create_Success_ (dafny .SeqOfBytes (utf8EncodedBytes ))
5249}
5350
54- func utf16EncodedDafnySeqToUint16 (seq dafny.Sequence ) []uint16 {
55- var r []uint16
51+ // This method is to be called from the Type Conversion layer.
52+ // We reuse the same method so that all conversions are consistent.
53+ func DecodeFromNativeGoByteArray (utf8EncodedByteArray []byte ) (dafny.Sequence , error ) {
54+ if ! utf8 .Valid (utf8EncodedByteArray ) {
55+ return nil , fmt .Errorf ("invalid utf8 encoded sequence: %v" , utf8EncodedByteArray )
56+ }
57+ utf16Encoded := utf16 .Encode ([]rune (string (utf8EncodedByteArray )))
58+ var dafnyCharArray []dafny.Char
59+ for _ , c := range utf16Encoded {
60+ dafnyCharArray = append (dafnyCharArray , dafny .Char (c ))
61+ }
62+ return dafny .SeqOfChars (dafnyCharArray ... ), nil
63+ }
64+
65+ // decode appends to buf the Unicode code point sequence represented
66+ // by the UTF-16 encoding seq, then encode the code point as utf8 and return the utf8 buffer
67+ func decodeUtf16 (seq dafny.Sequence ) ([]byte , error ) {
68+ utf8EncodedBytes := []byte {}
69+
5670 for i := dafny .Iterate (seq ); ; {
57- val , notEndOfSequence := i ()
58- if ! notEndOfSequence {
59- return r
71+ firstVal , firstValExists := i ()
72+ if ! firstValExists {
73+ // Iterator has finished, return the buffer
74+ return utf8EncodedBytes , nil
6075 } else {
61- r = append (r , uint16 (val .(dafny.Char )))
76+ var ar rune
77+
78+ // We should be able to rely on dafny that anything inside the seq is utf16 encoded
79+ // with unicode-char: false. But given the Long Psi issue, it's better to be safe.
80+ // First check if it's a dafny.Char type, then check if it's within the limits of uint16.
81+ firstChar , firstValIsAChar := firstVal .(dafny.Char )
82+ if ! firstValIsAChar || firstChar > math .MaxUint16 || firstChar < 0 {
83+ return nil , fmt .Errorf ("invalid utf16 encoded sequence: %v" , seq )
84+ }
85+
86+ // Downcast to uint16
87+ switch r1 := uint16 (firstChar ); {
88+
89+ case r1 < surr1 , surr3 <= r1 :
90+ // normal rune
91+ ar = rune (r1 )
92+
93+ case utf16 .IsSurrogate (rune (r1 )):
94+ // If firstVal is surrogate, then we need the secondVal to construct the pair
95+ secondVal , ok := i ()
96+
97+ // Same sanity check as line 84
98+ secondChar , secondValIsAChar := secondVal .(dafny.Char )
99+ if ! ok || ! secondValIsAChar || secondChar > math .MaxUint16 || secondChar < 0 {
100+ return nil , fmt .Errorf ("invalid utf16 encoded sequence: %v" , seq )
101+ }
102+
103+ // Check if the secondVal is within the valid low surrogate range
104+ if surr2 <= uint16 (secondChar ) && uint16 (secondChar ) < surr3 {
105+ // valid surrogate sequence
106+ ar = utf16 .DecodeRune (rune (r1 ), rune (uint16 (secondChar )))
107+ } else {
108+ return nil , fmt .Errorf ("invalid utf16 encoded sequence: %v" , seq )
109+ }
110+ default :
111+ return nil , fmt .Errorf ("invalid utf16 encoded sequence: %v" , seq )
112+ }
113+
114+ // Create the buffer (upto 4 bytes) to hold the utf8 rune
115+ buf := make ([]byte , utf8 .RuneLen (ar ))
116+ n := utf8 .EncodeRune (buf , ar )
117+
118+ // Append to the result
119+ utf8EncodedBytes = append (utf8EncodedBytes , buf [:n ]... )
62120 }
63121 }
64122}
0 commit comments