@@ -225,7 +225,7 @@ where
225225 return result ( self , scratch) ;
226226 }
227227 b'\\' => {
228- tri ! ( parse_escape( self , scratch) ) ;
228+ tri ! ( parse_escape( self , validate , scratch) ) ;
229229 }
230230 _ => {
231231 if validate {
@@ -465,7 +465,7 @@ impl<'a> SliceRead<'a> {
465465 b'\\' => {
466466 scratch. extend_from_slice ( & self . slice [ start..self . index ] ) ;
467467 self . index += 1 ;
468- tri ! ( parse_escape( self , scratch) ) ;
468+ tri ! ( parse_escape( self , validate , scratch) ) ;
469469 start = self . index ;
470470 }
471471 _ => {
@@ -817,6 +817,16 @@ where
817817 }
818818}
819819
820+ fn peek_or_eof < ' de , R > ( read : & mut R ) -> Result < u8 >
821+ where
822+ R : ?Sized + Read < ' de > ,
823+ {
824+ match tri ! ( read. peek( ) ) {
825+ Some ( b) => Ok ( b) ,
826+ None => error ( read, ErrorCode :: EofWhileParsingString ) ,
827+ }
828+ }
829+
820830fn error < ' de , R , T > ( read : & R , reason : ErrorCode ) -> Result < T >
821831where
822832 R : ?Sized + Read < ' de > ,
@@ -831,7 +841,11 @@ fn as_str<'de, 's, R: Read<'de>>(read: &R, slice: &'s [u8]) -> Result<&'s str> {
831841
832842/// Parses a JSON escape sequence and appends it into the scratch space. Assumes
833843/// the previous byte read was a backslash.
834- fn parse_escape < ' de , R : Read < ' de > > ( read : & mut R , scratch : & mut Vec < u8 > ) -> Result < ( ) > {
844+ fn parse_escape < ' de , R : Read < ' de > > (
845+ read : & mut R ,
846+ validate : bool ,
847+ scratch : & mut Vec < u8 > ,
848+ ) -> Result < ( ) > {
835849 let ch = tri ! ( next_or_eof( read) ) ;
836850
837851 match ch {
@@ -845,19 +859,67 @@ fn parse_escape<'de, R: Read<'de>>(read: &mut R, scratch: &mut Vec<u8>) -> Resul
845859 b't' => scratch. push ( b'\t' ) ,
846860 b'u' => {
847861 let c = match tri ! ( read. decode_hex_escape( ) ) {
848- 0xDC00 ..=0xDFFF => {
849- return error ( read, ErrorCode :: LoneLeadingSurrogateInHexEscape ) ;
862+ n @ 0xDC00 ..=0xDFFF => {
863+ if validate {
864+ return error ( read, ErrorCode :: LoneLeadingSurrogateInHexEscape ) ;
865+ }
866+
867+ let utf8_bytes = [
868+ ( n >> 12 & 0x0F ) as u8 | 0b1110_0000 ,
869+ ( n >> 6 & 0x3F ) as u8 | 0b1000_0000 ,
870+ ( n & 0x3F ) as u8 | 0b1000_0000 ,
871+ ] ;
872+
873+ scratch. extend_from_slice ( & utf8_bytes) ;
874+
875+ return Ok ( ( ) ) ;
850876 }
851877
852878 // Non-BMP characters are encoded as a sequence of
853879 // two hex escapes, representing UTF-16 surrogates.
880+ // If `validate` is false and we only find a single
881+ // hex escape that is a surrogate, then we'll accept
882+ // it instead of erroring.
854883 n1 @ 0xD800 ..=0xDBFF => {
855- if tri ! ( next_or_eof( read) ) != b'\\' {
856- return error ( read, ErrorCode :: UnexpectedEndOfHexEscape ) ;
884+ if tri ! ( peek_or_eof( read) ) != b'\\' {
885+ if validate {
886+ read. discard ( ) ;
887+ return error ( read, ErrorCode :: UnexpectedEndOfHexEscape ) ;
888+ }
889+
890+ let utf8_bytes = [
891+ ( n1 >> 12 & 0x0F ) as u8 | 0b1110_0000 ,
892+ ( n1 >> 6 & 0x3F ) as u8 | 0b1000_0000 ,
893+ ( n1 & 0x3F ) as u8 | 0b1000_0000 ,
894+ ] ;
895+
896+ scratch. extend_from_slice ( & utf8_bytes) ;
897+
898+ return Ok ( ( ) ) ;
857899 }
858- if tri ! ( next_or_eof( read) ) != b'u' {
859- return error ( read, ErrorCode :: UnexpectedEndOfHexEscape ) ;
900+ read. discard ( ) ;
901+
902+ if tri ! ( peek_or_eof( read) ) != b'u' {
903+ if validate {
904+ read. discard ( ) ;
905+ return error ( read, ErrorCode :: UnexpectedEndOfHexEscape ) ;
906+ }
907+
908+ let utf8_bytes = [
909+ ( n1 >> 12 & 0x0F ) as u8 | 0b1110_0000 ,
910+ ( n1 >> 6 & 0x3F ) as u8 | 0b1000_0000 ,
911+ ( n1 & 0x3F ) as u8 | 0b1000_0000 ,
912+ ] ;
913+
914+ scratch. extend_from_slice ( & utf8_bytes) ;
915+
916+ // The \ prior to this byte started an escape sequence,
917+ // so we need to parse that now.
918+ parse_escape ( read, validate, scratch) ?;
919+
920+ return Ok ( ( ) ) ;
860921 }
922+ read. discard ( ) ;
861923
862924 let n2 = tri ! ( read. decode_hex_escape( ) ) ;
863925
0 commit comments