Skip to content

Commit c808c4e

Browse files
authored
Correctly look for end delimiter dollar quoted string (#1650)
1 parent 3b4dc0f commit c808c4e

File tree

1 file changed

+129
-47
lines changed

1 file changed

+129
-47
lines changed

src/tokenizer.rs

Lines changed: 129 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1566,46 +1566,33 @@ impl<'a> Tokenizer<'a> {
15661566
if matches!(chars.peek(), Some('$')) && !self.dialect.supports_dollar_placeholder() {
15671567
chars.next();
15681568

1569-
'searching_for_end: loop {
1570-
s.push_str(&peeking_take_while(chars, |ch| ch != '$'));
1571-
match chars.peek() {
1572-
Some('$') => {
1573-
chars.next();
1574-
let mut maybe_s = String::from("$");
1575-
for c in value.chars() {
1576-
if let Some(next_char) = chars.next() {
1577-
maybe_s.push(next_char);
1578-
if next_char != c {
1579-
// This doesn't match the dollar quote delimiter so this
1580-
// is not the end of the string.
1581-
s.push_str(&maybe_s);
1582-
continue 'searching_for_end;
1583-
}
1584-
} else {
1585-
return self.tokenizer_error(
1586-
chars.location(),
1587-
"Unterminated dollar-quoted, expected $",
1588-
);
1569+
let mut temp = String::new();
1570+
let end_delimiter = format!("${}$", value);
1571+
1572+
loop {
1573+
match chars.next() {
1574+
Some(ch) => {
1575+
temp.push(ch);
1576+
1577+
if temp.ends_with(&end_delimiter) {
1578+
if let Some(temp) = temp.strip_suffix(&end_delimiter) {
1579+
s.push_str(temp);
15891580
}
1590-
}
1591-
if chars.peek() == Some(&'$') {
1592-
chars.next();
1593-
maybe_s.push('$');
1594-
// maybe_s matches the end delimiter
1595-
break 'searching_for_end;
1596-
} else {
1597-
// This also doesn't match the dollar quote delimiter as there are
1598-
// more characters before the second dollar so this is not the end
1599-
// of the string.
1600-
s.push_str(&maybe_s);
1601-
continue 'searching_for_end;
1581+
break;
16021582
}
16031583
}
1604-
_ => {
1584+
None => {
1585+
if temp.ends_with(&end_delimiter) {
1586+
if let Some(temp) = temp.strip_suffix(&end_delimiter) {
1587+
s.push_str(temp);
1588+
}
1589+
break;
1590+
}
1591+
16051592
return self.tokenizer_error(
16061593
chars.location(),
16071594
"Unterminated dollar-quoted, expected $",
1608-
)
1595+
);
16091596
}
16101597
}
16111598
}
@@ -2569,20 +2556,67 @@ mod tests {
25692556

25702557
#[test]
25712558
fn tokenize_dollar_quoted_string_tagged() {
2572-
let sql = String::from(
2573-
"SELECT $tag$dollar '$' quoted strings have $tags like this$ or like this $$$tag$",
2574-
);
2575-
let dialect = GenericDialect {};
2576-
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
2577-
let expected = vec![
2578-
Token::make_keyword("SELECT"),
2579-
Token::Whitespace(Whitespace::Space),
2580-
Token::DollarQuotedString(DollarQuotedString {
2581-
value: "dollar '$' quoted strings have $tags like this$ or like this $$".into(),
2582-
tag: Some("tag".into()),
2583-
}),
2559+
let test_cases = vec![
2560+
(
2561+
String::from("SELECT $tag$dollar '$' quoted strings have $tags like this$ or like this $$$tag$"),
2562+
vec![
2563+
Token::make_keyword("SELECT"),
2564+
Token::Whitespace(Whitespace::Space),
2565+
Token::DollarQuotedString(DollarQuotedString {
2566+
value: "dollar '$' quoted strings have $tags like this$ or like this $$".into(),
2567+
tag: Some("tag".into()),
2568+
})
2569+
]
2570+
),
2571+
(
2572+
String::from("SELECT $abc$x$ab$abc$"),
2573+
vec![
2574+
Token::make_keyword("SELECT"),
2575+
Token::Whitespace(Whitespace::Space),
2576+
Token::DollarQuotedString(DollarQuotedString {
2577+
value: "x$ab".into(),
2578+
tag: Some("abc".into()),
2579+
})
2580+
]
2581+
),
2582+
(
2583+
String::from("SELECT $abc$$abc$"),
2584+
vec![
2585+
Token::make_keyword("SELECT"),
2586+
Token::Whitespace(Whitespace::Space),
2587+
Token::DollarQuotedString(DollarQuotedString {
2588+
value: "".into(),
2589+
tag: Some("abc".into()),
2590+
})
2591+
]
2592+
),
2593+
(
2594+
String::from("0$abc$$abc$1"),
2595+
vec![
2596+
Token::Number("0".into(), false),
2597+
Token::DollarQuotedString(DollarQuotedString {
2598+
value: "".into(),
2599+
tag: Some("abc".into()),
2600+
}),
2601+
Token::Number("1".into(), false),
2602+
]
2603+
),
2604+
(
2605+
String::from("$function$abc$q$data$q$$function$"),
2606+
vec![
2607+
Token::DollarQuotedString(DollarQuotedString {
2608+
value: "abc$q$data$q$".into(),
2609+
tag: Some("function".into()),
2610+
}),
2611+
]
2612+
),
25842613
];
2585-
compare(expected, tokens);
2614+
2615+
let dialect = GenericDialect {};
2616+
for (sql, expected) in test_cases {
2617+
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
2618+
compare(expected, tokens);
2619+
}
25862620
}
25872621

25882622
#[test]
@@ -2601,6 +2635,22 @@ mod tests {
26012635
);
26022636
}
26032637

2638+
#[test]
2639+
fn tokenize_dollar_quoted_string_tagged_unterminated_mirror() {
2640+
let sql = String::from("SELECT $abc$abc$");
2641+
let dialect = GenericDialect {};
2642+
assert_eq!(
2643+
Tokenizer::new(&dialect, &sql).tokenize(),
2644+
Err(TokenizerError {
2645+
message: "Unterminated dollar-quoted, expected $".into(),
2646+
location: Location {
2647+
line: 1,
2648+
column: 17
2649+
}
2650+
})
2651+
);
2652+
}
2653+
26042654
#[test]
26052655
fn tokenize_dollar_placeholder() {
26062656
let sql = String::from("SELECT $$, $$ABC$$, $ABC$, $ABC");
@@ -2625,6 +2675,38 @@ mod tests {
26252675
);
26262676
}
26272677

2678+
#[test]
2679+
fn tokenize_nested_dollar_quoted_strings() {
2680+
let sql = String::from("SELECT $tag$dollar $nested$ string$tag$");
2681+
let dialect = GenericDialect {};
2682+
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
2683+
let expected = vec![
2684+
Token::make_keyword("SELECT"),
2685+
Token::Whitespace(Whitespace::Space),
2686+
Token::DollarQuotedString(DollarQuotedString {
2687+
value: "dollar $nested$ string".into(),
2688+
tag: Some("tag".into()),
2689+
}),
2690+
];
2691+
compare(expected, tokens);
2692+
}
2693+
2694+
#[test]
2695+
fn tokenize_dollar_quoted_string_untagged_empty() {
2696+
let sql = String::from("SELECT $$$$");
2697+
let dialect = GenericDialect {};
2698+
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
2699+
let expected = vec![
2700+
Token::make_keyword("SELECT"),
2701+
Token::Whitespace(Whitespace::Space),
2702+
Token::DollarQuotedString(DollarQuotedString {
2703+
value: "".into(),
2704+
tag: None,
2705+
}),
2706+
];
2707+
compare(expected, tokens);
2708+
}
2709+
26282710
#[test]
26292711
fn tokenize_dollar_quoted_string_untagged() {
26302712
let sql =

0 commit comments

Comments
 (0)