Skip to content

Commit dd805e9

Browse files
authored
Support unicode whitespace (#482)
* Support unicode whitespace * Add test
1 parent 97a148a commit dd805e9

File tree

1 file changed

+19
-0
lines changed

1 file changed

+19
-0
lines changed

src/tokenizer.rs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -653,6 +653,10 @@ impl<'a> Tokenizer<'a> {
653653
);
654654
Ok(Some(Token::Placeholder(String::from("$") + &s)))
655655
}
656+
//whitespace check (including unicode chars) should be last as it covers some of the chars above
657+
ch if ch.is_whitespace() => {
658+
self.consume_and_return(chars, Token::Whitespace(Whitespace::Space))
659+
}
656660
other => self.consume_and_return(chars, Token::Char(other)),
657661
},
658662
None => Ok(None),
@@ -1254,6 +1258,21 @@ mod tests {
12541258
compare(expected, tokens);
12551259
}
12561260

1261+
#[test]
1262+
fn tokenize_unicode_whitespace() {
1263+
let sql = String::from(" \u{2003}\n");
1264+
1265+
let dialect = GenericDialect {};
1266+
let mut tokenizer = Tokenizer::new(&dialect, &sql);
1267+
let tokens = tokenizer.tokenize().unwrap();
1268+
let expected = vec![
1269+
Token::Whitespace(Whitespace::Space),
1270+
Token::Whitespace(Whitespace::Space),
1271+
Token::Whitespace(Whitespace::Newline),
1272+
];
1273+
compare(expected, tokens);
1274+
}
1275+
12571276
#[test]
12581277
fn tokenize_mismatched_quotes() {
12591278
let sql = String::from("\"foo");

0 commit comments

Comments
 (0)