Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions e2e_test/batch/basic/dollar_quoted_string.slt.part
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
query T
select $$hello$$;
----
hello

query T
select $foo$the content with $bar$nested$bar$ usage$foo$;
----
the content with $bar$nested$bar$ usage

query T
select $fo$o$not nesting just $ sign$fo$o$;
----
o$not nesting just $ sign

query T
select $$hel$$lo$$;
----
hel

statement ok
drop table if exists t;

statement ok
create table t(v int);

statement ok
COMMENT ON TABLE t IS $$hello$$;

query T
SELECT DATE $$1999-01-01$$;
----
1999-01-01

statement ok
drop table t;
1 change: 1 addition & 0 deletions src/frontend/src/binder/expr/value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ impl Binder {
match value {
Value::Number(s) => self.bind_number(s.clone()),
Value::SingleQuotedString(s) => self.bind_string(s),
Value::DollarQuotedString(s) => self.bind_string(&s.value),
Value::CstyleEscapedString(s) => self.bind_string(&s.value),
Value::Boolean(b) => self.bind_bool(*b),
// Both null and string literal will be treated as `unknown` during type inference.
Expand Down
1 change: 1 addition & 0 deletions src/sqlparser/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4255,6 +4255,7 @@ impl Parser<'_> {
let token = self.next_token();
match token.token {
Token::SingleQuotedString(s) => Ok(s),
Token::DollarQuotedString(s) => Ok(s.value),
_ => self.expected_at(checkpoint, "literal string"),
}
}
Expand Down
165 changes: 115 additions & 50 deletions src/sqlparser/src/tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -663,76 +663,57 @@ impl<'a> Tokenizer<'a> {
self.next();

if let Some('$') = self.peek() {
// syntax: $$......$$
self.next();

let mut is_terminated = false;
let mut prev: Option<char> = None;

while let Some(ch) = self.peek() {
if prev == Some('$') {
if ch == '$' {
let delimiter = "$$";
while self.peek().is_some() {
if self.starts_with(delimiter) {
for _ in delimiter.chars() {
self.next();
is_terminated = true;
break;
} else {
s.push('$');
s.push(ch);
}
} else if ch != '$' {
s.push(ch);
return Ok(Token::DollarQuotedString(DollarQuotedString {
value: s,
tag: None,
}));
}

prev = Some(ch);
self.next();
s.push(self.next().unwrap());
}
Comment on lines +669 to 681

return if self.peek().is_none() && !is_terminated {
self.error("Unterminated dollar-quoted string")
} else {
Ok(Token::DollarQuotedString(DollarQuotedString {
value: s,
tag: None,
}))
};
self.error("Unterminated dollar-quoted string")
} else {
// syntax: $SomeTag$.....$SomeTag$
value.push_str(&self.peeking_take_while(|ch| ch.is_alphanumeric() || ch == '_'));

if let Some('$') = self.peek() {
self.next();
s.push_str(&self.peeking_take_while(|ch| ch != '$'));
if !is_valid_dollar_quote_tag(&value) {
return self.error(format!("Invalid dollar-quoted string tag \"{}\"", value));
}

match self.peek() {
Some('$') => {
self.next();
for c in value.chars() {
let next_char = self.next();
if Some(c) != next_char {
return self.error(format!(
"Unterminated dollar-quoted string at or near \"{}\"",
value
));
}
}
self.next();

if let Some('$') = self.peek() {
let delimiter = format!("${}$", value);
while self.peek().is_some() {
if self.starts_with(&delimiter) {
for _ in delimiter.chars() {
self.next();
} else {
return self.error("Unterminated dollar-quoted string, expected $");
}
return Ok(Token::DollarQuotedString(DollarQuotedString {
value: s,
tag: Some(value),
}));
}
_ => {
return self.error("Unterminated dollar-quoted, expected $");
}
s.push(self.next().unwrap());
}
Comment on lines +695 to 707

self.error(format!(
"Unterminated dollar-quoted string at or near \"{}\"",
value
))
} else {
return Ok(Token::Parameter(value));
Ok(Token::Parameter(value))
}
}

Ok(Token::DollarQuotedString(DollarQuotedString {
value: s,
tag: if value.is_empty() { None } else { Some(value) },
}))
}

fn error<R>(&self, message: impl Into<String>) -> Result<R, TokenizerError> {
Expand Down Expand Up @@ -1002,6 +983,16 @@ impl<'a> Tokenizer<'a> {
Ok(Some(t))
}

fn starts_with(&self, expected: &str) -> bool {
let mut chars = self.chars.clone();
for expected_char in expected.chars() {
if chars.next() != Some(expected_char) {
return false;
}
}
true
}

/// Read from `self` until `predicate` returns `false` or EOF is hit.
/// Return the characters read as String, and keep the first non-matching
/// char available as `self.next()`.
Expand Down Expand Up @@ -1041,6 +1032,12 @@ fn is_identifier_part(ch: char) -> bool {
ch.is_ascii_alphanumeric() || ch == '$' || ch == '_'
}

fn is_valid_dollar_quote_tag(tag: &str) -> bool {
let mut chars = tag.chars();
matches!(chars.next(), Some(ch) if ch.is_ascii_alphabetic() || ch == '_')
&& chars.all(|ch| ch.is_ascii_alphanumeric() || ch == '_')
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down Expand Up @@ -1149,6 +1146,74 @@ mod tests {
compare(expected, tokens);
}

#[test]
fn tokenize_tagged_dollar_quoted_string_with_inner_different_tag() {
let sql = String::from("SELECT $foo$the content with $bar$nested$bar$ usage$foo$");
let mut tokenizer = Tokenizer::new(&sql);
let tokens = tokenizer.tokenize_with_whitespace().unwrap();

let expected = vec![
Token::make_keyword("SELECT"),
Token::Whitespace(Whitespace::Space),
Token::DollarQuotedString(DollarQuotedString {
tag: Some("foo".into()),
value: "the content with $bar$nested$bar$ usage".into(),
}),
];

compare(expected, tokens);
}

#[test]
fn tokenize_tagged_dollar_quoted_string_with_identifier_tag() {
let sql = String::from("SELECT $_tag_1$hello$_tag_1$");
let mut tokenizer = Tokenizer::new(&sql);
let tokens = tokenizer.tokenize_with_whitespace().unwrap();

let expected = vec![
Token::make_keyword("SELECT"),
Token::Whitespace(Whitespace::Space),
Token::DollarQuotedString(DollarQuotedString {
tag: Some("_tag_1".into()),
value: "hello".into(),
}),
];

compare(expected, tokens);
}

#[test]
fn tokenize_dollar_quoted_string_with_invalid_tag() {
let sql = String::from("SELECT $1tag$hello$1tag$");
let mut tokenizer = Tokenizer::new(&sql);
let error = tokenizer.tokenize_with_whitespace().unwrap_err();

assert!(
error
.to_string()
.contains("Invalid dollar-quoted string tag \"1tag\"")
);
}

#[test]
fn tokenize_tagged_dollar_quoted_string_followed_by_alias_with_dollar() {
let sql = String::from("SELECT $go$o$not nesting just $ sign$go$o$");
let mut tokenizer = Tokenizer::new(&sql);
let tokens = tokenizer.tokenize_with_whitespace().unwrap();

let expected = vec![
Token::make_keyword("SELECT"),
Token::Whitespace(Whitespace::Space),
Token::DollarQuotedString(DollarQuotedString {
tag: Some("go".into()),
value: "o$not nesting just $ sign".into(),
}),
Token::make_word("o$", None),
];

compare(expected, tokens);
}

#[test]
fn tokenize_logical_xor() {
let sql =
Expand Down
21 changes: 21 additions & 0 deletions src/sqlparser/tests/sqlparser_common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2245,6 +2245,27 @@ fn parse_literal_date() {
);
}

#[test]
fn parse_literal_string_with_dollar_quoted_string() {
let stmts = parse_sql_statements("SELECT DATE $$1999-01-01$$").unwrap();

let projection = match &stmts[0] {
Statement::Query(query) => match &query.body {
SetExpr::Select(select) => &select.projection,
_ => unreachable!(),
},
_ => unreachable!(),
};

assert_eq!(
&Expr::TypedString {
data_type: DataType::Date,
value: "1999-01-01".into()
},
expr_from_projection(only(projection)),
);
}

#[test]
fn parse_literal_time() {
let sql = "SELECT TIME '01:23:34'";
Expand Down
Loading