Skip to content

Commit

Permalink
optimize
Browse files Browse the repository at this point in the history
  • Loading branch information
jasonnnli committed Feb 26, 2024
1 parent afc0ca3 commit 4407b66
Showing 1 changed file with 48 additions and 46 deletions.
94 changes: 48 additions & 46 deletions datafusion/sql/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@ use datafusion_expr::expr::{Alias, GroupingSet, WindowFunction};
use datafusion_expr::expr_vec_fmt;
use datafusion_expr::utils::{expr_as_column_expr, find_column_exprs};
use datafusion_expr::{Expr, LogicalPlan};
use std::collections::{HashMap, VecDeque};
use std::collections::HashMap;
use std::iter::Peekable;
use std::str::Chars;

/// Make a best-effort attempt at resolving all columns in the expression tree
pub(crate) fn resolve_columns(expr: &Expr, plan: &LogicalPlan) -> Result<Expr> {
Expand Down Expand Up @@ -247,43 +249,51 @@ pub(crate) fn normalize_ident(id: Ident) -> String {
}
}

type PeekableChars<'a> = Peekable<Chars<'a>>;

pub(crate) fn unescape(s: &str) -> Option<String> {
let mut queue: VecDeque<_> = String::from(s).chars().collect();
let mut chars = s.chars().peekable();
let mut unescaped = String::new();

while let Some(c) = queue.pop_front() {
while let Some(c) = chars.next() {
if c != '\\' {
unescaped.push(c);
continue;
}

let ch = match queue.pop_front() {
Some(c) => match c {
'b' => '\u{0008}',
'f' => '\u{000C}',
'n' => '\n',
'r' => '\r',
't' => '\t',
'u' => unescape_unicode_16(&mut queue)?,
'U' => unescape_unicode_32(&mut queue)?,
'x' => unescape_byte(&mut queue)?,
c if c.is_digit(8) => unescape_octal(c, &mut queue)?,
c => c,
},
None => return None,
let c = match chars.next()? {
'b' => '\u{0008}',
'f' => '\u{000C}',
'n' => '\n',
'r' => '\r',
't' => '\t',
'u' => unescape_unicode_16(&mut chars)?,
'U' => unescape_unicode_32(&mut chars)?,
'x' => unescape_byte(&mut chars)?,
c if c.is_digit(8) => unescape_octal(c, &mut chars)?,
c => c,
};
unescaped.push(ch);
unescaped.push(check_null_char(c)?);
}

Some(unescaped)
}

#[inline]
fn check_null_char(c: char) -> Option<char> {
if c == '\0' {
None
} else {
Some(c)
}
}

// Hexadecimal byte value. \xh, \xhh (h = 0–9, A–F)
fn unescape_byte(queue: &mut VecDeque<char>) -> Option<char> {
fn unescape_byte(chars: &mut PeekableChars) -> Option<char> {
let mut s = String::new();

for _ in 0..2 {
match next_hex_digit(queue) {
match next_hex_digit(chars) {
Some(c) => s.push(c),
None => break,
}
Expand All @@ -297,69 +307,60 @@ fn unescape_byte(queue: &mut VecDeque<char>) -> Option<char> {
}

#[inline]
fn next_hex_digit(queue: &mut VecDeque<char>) -> Option<char> {
match queue.front() {
Some(c) if c.is_ascii_hexdigit() => queue.pop_front(),
fn next_hex_digit(chars: &mut PeekableChars) -> Option<char> {
match chars.peek() {
Some(c) if c.is_ascii_hexdigit() => chars.next(),
_ => None,
}
}

// 16-bit hexadecimal Unicode character value. \uxxxx (x = 0–9, A–F)
fn unescape_unicode_16(queue: &mut VecDeque<char>) -> Option<char> {
unescape_unicode::<4>(queue)
fn unescape_unicode_16(chars: &mut PeekableChars) -> Option<char> {
unescape_unicode::<4>(chars)
}

// 32-bit hexadecimal Unicode character value. \Uxxxxxxxx (x = 0–9, A–F)
fn unescape_unicode_32(queue: &mut VecDeque<char>) -> Option<char> {
unescape_unicode::<8>(queue)
fn unescape_unicode_32(chars: &mut PeekableChars) -> Option<char> {
unescape_unicode::<8>(chars)
}

fn unescape_unicode<const NUM: usize>(queue: &mut VecDeque<char>) -> Option<char> {
fn unescape_unicode<const NUM: usize>(chars: &mut PeekableChars) -> Option<char> {
let mut s = String::new();
for _ in 0..NUM {
s.push(queue.pop_front()?);
}
to_char::<16>(&s).and_then(check_null_char)
}

#[inline]
fn check_null_char(c: char) -> Option<char> {
if c == '\0' {
None
} else {
Some(c)
s.push(chars.next()?);
}
to_char::<16>(&s)
}

// Octal byte value. \o, \oo, \ooo (o = 0–7)
fn unescape_octal(c: char, queue: &mut VecDeque<char>) -> Option<char> {
fn unescape_octal(c: char, chars: &mut PeekableChars) -> Option<char> {
let mut s = String::new();

match c {
'0' | '1' | '2' | '3' => {
s.push(c);
for _ in 0..2 {
match next_octal_digest(queue) {
match next_octal_digest(chars) {
Some(c) => s.push(c),
None => break,
}
}
}
'4' | '5' | '6' | '7' => {
s.push(c);
if let Some(c) = next_octal_digest(queue) {
if let Some(c) = next_octal_digest(chars) {
s.push(c);
}
}
_ => return None,
}
to_char::<8>(&s).and_then(check_null_char)
to_char::<8>(&s)
}

#[inline]
fn next_octal_digest(queue: &mut VecDeque<char>) -> Option<char> {
match queue.front() {
Some(c) if c.is_digit(8) => queue.pop_front(),
fn next_octal_digest(chars: &mut PeekableChars) -> Option<char> {
match chars.peek() {
Some(c) if c.is_digit(8) => chars.next(),
_ => None,
}
}
Expand Down Expand Up @@ -409,6 +410,7 @@ mod tests {
check_unescape(r"\x4L", Some("\u{0004}L"));
check_unescape(r"\x", Some("x"));
check_unescape(r"\xP", Some("xP"));
check_unescape(r"\x0", None);

// octal byte value
check_unescape(r"\1", Some("\u{0001}"));
Expand Down

0 comments on commit 4407b66

Please sign in to comment.