Skip to content

Commit 1baf58a

Browse files
committed
Start new line if \r and dialect is postgres
Currently the tokenizer throws an error for insert into cats_2 (petname) values ('foo'),--\r(version()||'\n'); this is because postgres treats \r as a separate new line character, see https://github.com/postgres/postgres/blob/master/src/backend/parser/scan.l > In order to make the world safe for Windows and Mac clients as well as Unix ones, we accept either \n or \r as a newline. > A DOS-style \r\n sequence will be seen as two successive newlines, but that doesn't cause any problems. > non_newline [^\n\r] > comment ("--"{non_newline}*) Let's make sure we start a new line if we encounter a \r when tokenizing a comment.
1 parent fe36020 commit 1baf58a

File tree

1 file changed

+57
-6
lines changed

1 file changed

+57
-6
lines changed

src/tokenizer.rs

Lines changed: 57 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1621,11 +1621,17 @@ impl<'a> Tokenizer<'a> {
16211621

16221622
// Consume characters until newline
16231623
fn tokenize_single_line_comment(&self, chars: &mut State) -> String {
1624-
let mut comment = peeking_take_while(chars, |ch| ch != '\n');
1624+
let mut comment = peeking_take_while(chars, |ch| match ch {
1625+
'\n' => false, // Always stop at \n
1626+
'\r' if dialect_of!(self is PostgreSqlDialect) => false, // Stop at \r for Postgres
1627+
_ => true, // Keep consuming for other characters
1628+
});
1629+
16251630
if let Some(ch) = chars.next() {
1626-
assert_eq!(ch, '\n');
1631+
assert!(ch == '\n' || ch == '\r');
16271632
comment.push(ch);
16281633
}
1634+
16291635
comment
16301636
}
16311637

@@ -2672,17 +2678,62 @@ mod tests {
26722678

26732679
#[test]
26742680
fn tokenize_comment() {
2675-
let sql = String::from("0--this is a comment\n1");
2681+
let test_cases = vec![
2682+
(
2683+
String::from("0--this is a comment\n1"),
2684+
vec![
2685+
Token::Number("0".to_string(), false),
2686+
Token::Whitespace(Whitespace::SingleLineComment {
2687+
prefix: "--".to_string(),
2688+
comment: "this is a comment\n".to_string(),
2689+
}),
2690+
Token::Number("1".to_string(), false),
2691+
],
2692+
),
2693+
(
2694+
String::from("0--this is a comment\r1"),
2695+
vec![
2696+
Token::Number("0".to_string(), false),
2697+
Token::Whitespace(Whitespace::SingleLineComment {
2698+
prefix: "--".to_string(),
2699+
comment: "this is a comment\r1".to_string(),
2700+
}),
2701+
],
2702+
),
2703+
(
2704+
String::from("0--this is a comment\r\n1"),
2705+
vec![
2706+
Token::Number("0".to_string(), false),
2707+
Token::Whitespace(Whitespace::SingleLineComment {
2708+
prefix: "--".to_string(),
2709+
comment: "this is a comment\r\n".to_string(),
2710+
}),
2711+
Token::Number("1".to_string(), false),
2712+
],
2713+
),
2714+
];
26762715

26772716
let dialect = GenericDialect {};
2717+
2718+
for (sql, expected) in test_cases {
2719+
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
2720+
compare(expected, tokens);
2721+
}
2722+
}
2723+
2724+
#[test]
2725+
fn tokenize_comment_postgres() {
2726+
let sql = String::from("1--\r0");
2727+
2728+
let dialect = PostgreSqlDialect {};
26782729
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
26792730
let expected = vec![
2680-
Token::Number("0".to_string(), false),
2731+
Token::Number("1".to_string(), false),
26812732
Token::Whitespace(Whitespace::SingleLineComment {
26822733
prefix: "--".to_string(),
2683-
comment: "this is a comment\n".to_string(),
2734+
comment: "\r".to_string(),
26842735
}),
2685-
Token::Number("1".to_string(), false),
2736+
Token::Number("0".to_string(), false),
26862737
];
26872738
compare(expected, tokens);
26882739
}

0 commit comments

Comments
 (0)