Skip to content

Commit

Permalink
update docs and tests
Browse files Browse the repository at this point in the history
  • Loading branch information
ParkMyCar committed Nov 14, 2023
1 parent 4cc7e98 commit 192f6cc
Show file tree
Hide file tree
Showing 3 changed files with 111 additions and 73 deletions.
57 changes: 57 additions & 0 deletions src/ore/src/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,63 @@ impl fmt::Display for Indent {
}
}

/// Newtype wrapper around [`String`] whose _byte_ length is guaranteed to be less than or equal to
/// the provided `MAX`.
#[derive(Debug, Clone, PartialEq)]
pub struct MaxLenString<const MAX: usize>(String);

impl<const MAX: usize> MaxLenString<MAX> {
/// Creates a new [`MaxLenString`] returning an error if `s` is more than `MAX` bytes long.
///
/// # Examples
///
/// ```
/// use mz_ore::str::MaxLenString;
///
/// type ShortString = MaxLenString<30>;
///
/// let good = ShortString::new("hello".to_string()).unwrap();
/// assert_eq!(good.as_str(), "hello");
///
/// // Note: this is only 8 characters, but each character requires 4 bytes.
/// let too_long = "😊😊😊😊😊😊😊😊";
/// let smol = ShortString::new(too_long.to_string());
/// assert!(smol.is_err());
/// ```
///
pub fn new(s: String) -> Result<Self, String> {
if s.len() > MAX {
return Err(s);
}

Ok(MaxLenString(s))
}

/// Consume self, returning the inner [`String`].
pub fn into_inner(self) -> String {
self.0
}

/// Returns a reference to the underlying string.
pub fn as_str(&self) -> &str {
self
}
}

impl<const MAX: usize> Deref for MaxLenString<MAX> {
type Target = str;

fn deref(&self) -> &Self::Target {
&self.0
}
}

impl<const MAX: usize> fmt::Display for MaxLenString<MAX> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(&self.0)
}
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
69 changes: 8 additions & 61 deletions src/sql-lexer/src/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,18 +35,20 @@
extern crate alloc;

use std::error::Error;
use std::ops::Deref;
use std::{char, fmt};

use mz_ore::lex::LexBuf;
use mz_ore::str::StrExt;
use mz_ore::str::{MaxLenString, StrExt};
use serde::{Deserialize, Serialize};

use crate::keywords::Keyword;

// Maximum allowed identifier length in bytes.
/// Maximum allowed identifier length in bytes.
pub const MAX_IDENTIFIER_LENGTH: usize = 255;

/// Newtype that limits the length of identifiers.
pub type IdentString = MaxLenString<MAX_IDENTIFIER_LENGTH>;

#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct LexerError {
/// The error message.
Expand Down Expand Up @@ -76,65 +78,10 @@ impl LexerError {
}
}

/// Newtype wrapper around [`String`] whose _byte_ length is guaranteed to be less than or equal to
/// [`MAX_IDENTIFIER_LENGTH`].
#[derive(Debug, Clone, PartialEq)]

pub struct SmallString(String);

impl SmallString {
/// Creates a new [`SmallString`] returning an error if `s` is more than
/// [`MAX_IDENTIFIER_LENGTH`] bytes long.
///
/// # Examples
///
/// ```
/// use mz_sql_lexer::lexer::SmallString;
///
/// let good = SmallString::new("hello".to_string()).unwrap();
/// assert_eq!(good.as_str(), "hello");
///
/// // Note: this is only 64 characters, but each character requires 4 bytes.
/// let too_long = "😊😊😊😊😊😊😊😊😊😊😊😊😊😊😊😊😊😊😊😊😊😊😊😊😊😊😊😊😊😊😊😊😊😊😊😊😊😊😊😊😊😊😊😊😊😊😊😊😊😊😊😊😊😊😊😊😊😊😊😊😊😊😊😊";
/// let smol = SmallString::new(too_long.to_string());
/// assert!(smol.is_err());
/// ```
///
pub fn new(s: String) -> Result<Self, String> {
if s.len() > MAX_IDENTIFIER_LENGTH {
return Err(s);
}

Ok(SmallString(s))
}

pub fn into_inner(self) -> String {
self.0
}

pub fn as_str(&self) -> &str {
self
}
}

impl Deref for SmallString {
type Target = str;

fn deref(&self) -> &Self::Target {
&self.0
}
}

impl fmt::Display for SmallString {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(&self.0)
}
}

#[derive(Debug, Clone, PartialEq)]
pub enum Token {
Keyword(Keyword),
Ident(SmallString),
Ident(IdentString),
String(String),
HexString(String),
Number(String),
Expand Down Expand Up @@ -279,7 +226,7 @@ fn lex_ident(buf: &mut LexBuf) -> Result<Token, LexerError> {
match word.parse() {
Ok(kw) => Ok(Token::Keyword(kw)),
Err(_) => {
let Ok(small) = SmallString::new(word.to_lowercase()) else {
let Ok(small) = IdentString::new(word.to_lowercase()) else {
bail!(
pos,
"identifier length exceeds {MAX_IDENTIFIER_LENGTH} bytes"
Expand All @@ -302,7 +249,7 @@ fn lex_quoted_ident(buf: &mut LexBuf) -> Result<Token, LexerError> {
None => bail!(pos, "unterminated quoted identifier"),
}
}
let Ok(small) = SmallString::new(s) else {
let Ok(small) = IdentString::new(s) else {
bail!(
pos,
"identifier length exceeds {MAX_IDENTIFIER_LENGTH} bytes"
Expand Down
58 changes: 46 additions & 12 deletions src/sql-parser/src/ast/defs/name.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use mz_ore::str::MaxLenString;
use mz_sql_lexer::keywords::Keyword;
use mz_sql_lexer::lexer::SmallString;
use std::fmt;

use crate::ast::display::{self, AstDisplay, AstFormatter};
Expand Down Expand Up @@ -70,15 +70,15 @@ impl Ident {
/// ```
/// use mz_sql_parser::ast::Ident;
///
/// let too_long = "🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢\
/// let too_long = "🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢\
/// 🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵\
/// 🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴";
///
/// let id = Ident::new_lossy(too_long);
///
/// // `new_lossy`` will truncate the provided string, since it's too long. Note the missing
/// // `new_lossy` will truncate the provided string, since it's too long. Note the missing
/// // `🔴` characters.
/// assert_eq!(id.as_str(), "🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵");
/// assert_eq!(id.as_str(), "🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵");
/// ```
pub fn new_lossy<S: Into<String>>(value: S) -> Self {
let s: String = value.into();
Expand All @@ -91,7 +91,7 @@ impl Ident {
.chars()
.take_while(|c| {
byte_length += c.len_utf8();
byte_length < Self::MAX_LENGTH
byte_length <= Self::MAX_LENGTH
})
.collect();

Expand Down Expand Up @@ -177,6 +177,9 @@ impl Ident {

/// Append the provided `suffix`, truncating `self` as necessary to satisfy our invariants.
///
/// Note: We `soft_assert!` that the provided `suffix` is not too long, if it is, we'll
/// truncate it.
///
/// # Examples
///
/// ```
Expand All @@ -191,12 +194,43 @@ impl Ident {
/// // We truncated the original ident, removing all '🔵' chars.
/// assert_eq!(id.as_str(), "🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴");
/// ```
///
/// ### Too long suffix
/// If the provided suffix is too long, we'll also truncate that.
///
/// ```
/// # mz_ore::assert::SOFT_ASSERTIONS.store(false, std::sync::atomic::Ordering::Relaxed);
/// use mz_sql_parser::{
/// ident,
/// ast::Ident,
/// };
///
/// let mut stem = ident!("hello___world");
///
/// let too_long_suffix = "\
/// 🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢\
/// 🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢\
/// 🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢\
/// 🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🔵🔵\
/// ";
///
/// stem.append_lossy(too_long_suffix);
///
/// // Notice the "hello___world" stem got truncated, as did the "🔵🔵" characters from the suffix.
/// let result = "hello___wor\
/// 🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢\
/// 🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢\
/// 🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢\
/// 🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢\
/// ";
/// assert_eq!(stem.as_str(), result);
/// ```
pub fn append_lossy<S: Into<String>>(&mut self, suffix: S) {
// Make sure our suffix at least leaves a bit of room for the original ident.
const MAX_SUFFIX_LENGTH: usize = Ident::MAX_LENGTH - 8;

let mut suffix: String = suffix.into();
mz_ore::soft_assert!(suffix.len() <= MAX_SUFFIX_LENGTH);
mz_ore::soft_assert!(suffix.len() <= MAX_SUFFIX_LENGTH, "suffix too long");

// Truncate the suffix as necessary.
if suffix.len() > MAX_SUFFIX_LENGTH {
Expand All @@ -205,7 +239,7 @@ impl Ident {
.chars()
.take_while(|c| {
byte_length += c.len_utf8();
byte_length < Self::MAX_LENGTH
byte_length <= MAX_SUFFIX_LENGTH
})
.collect();
}
Expand All @@ -219,7 +253,7 @@ impl Ident {
.chars()
.take_while(|c| {
byte_length += c.len_utf8();
byte_length < available_length
byte_length <= available_length
})
.collect();
}
Expand Down Expand Up @@ -257,10 +291,10 @@ impl Ident {
}
}

impl From<SmallString> for Ident {
fn from(value: SmallString) -> Self {
// Note: using unchecked here is okay because SmallString is known to be less than or equal
// to our max length.
impl From<MaxLenString<{ Ident::MAX_LENGTH }>> for Ident {
fn from(value: MaxLenString<{ Ident::MAX_LENGTH }>) -> Self {
// Note: using unchecked here is okay because the length of `MaxLenString` is guaranteed to
// be less than or equal to our MAX_LENGTH.
Ident::new_unchecked(value.into_inner())
}
}
Expand Down

0 comments on commit 192f6cc

Please sign in to comment.