From 0f600bc3f52e4685caa942050003fd82996ea5d1 Mon Sep 17 00:00:00 2001 From: Christopher Durham Date: Wed, 8 Nov 2023 17:58:54 -0500 Subject: [PATCH] Improve case insensitivity consistency (#10884) # Description Add an extension trait `IgnoreCaseExt` to nu_utils which adds some case insensitivity helpers, and use them throughout nu to improve the handling of case insensitivity. Proper case folding is done via unicase, which is already a dependency via mime_guess from nu-command. In actuality a lot of code still does `to_lowercase`, because unicase only provides immediate comparison and doesn't expose a `to_folded_case` yet. And since we do a lot of `contains`/`starts_with`/`ends_with`, it's not sufficient to just have `eq_ignore_case`. But if we get access in the future, this makes us ready to use it with a change in one place. Plus, it's clearer what the purpose is at the call site to call `to_folded_case` instead of `to_lowercase` if it's exclusively for the purpose of case insensitive comparison, even if it just does `to_lowercase` still. # User-Facing Changes - Some commands that were supposed to be case insensitive remained only insensitive to ASCII case (a-z), and now are case insensitive w.r.t. non-ASCII characters as well. # Tests + Formatting - :green_circle: `toolkit fmt` - :green_circle: `toolkit clippy` - :green_circle: `toolkit test` - :green_circle: `toolkit test stdlib` --------- Co-authored-by: Stefan Holderbach --- Cargo.lock | 1 + .../src/completions/custom_completions.rs | 5 +- .../src/completions/file_completions.rs | 3 +- .../src/completions/variable_completions.rs | 8 +-- crates/nu-cli/src/menus/help_completions.rs | 13 ++--- crates/nu-cli/src/reedline_config.rs | 8 +-- .../src/dataframe/eager/sql_expr.rs | 2 +- .../nu-cmd-extra/src/extra/formats/to/html.rs | 3 +- crates/nu-color-config/src/nu_style.rs | 2 +- crates/nu-command/src/conversions/fill.rs | 2 +- .../nu-command/src/conversions/into/bool.rs | 4 +- .../src/conversions/into/datetime.rs | 2 +- crates/nu-command/src/date/parser.rs | 2 +- crates/nu-command/src/filters/find.rs | 5 +- crates/nu-command/src/filters/sort.rs | 41 +++++++------- crates/nu-command/src/filters/uniq.rs | 11 ++-- crates/nu-command/src/generators/generate.rs | 4 +- crates/nu-command/src/generators/unfold.rs | 4 +- crates/nu-command/src/help/help_.rs | 5 +- crates/nu-command/src/help/help_commands.rs | 2 +- .../src/platform/input/input_listen.rs | 8 +-- crates/nu-command/src/sort_utils.rs | 33 +++++------ .../src/strings/encode_decode/encoding.rs | 6 +- .../nu-command/src/strings/str_/contains.rs | 9 +-- .../nu-command/src/strings/str_/ends_with.rs | 4 +- .../src/strings/str_/starts_with.rs | 3 +- crates/nu-command/src/system/run_external.rs | 16 +++--- crates/nu-glob/src/lib.rs | 22 ++------ crates/nu-parser/src/parser.rs | 2 +- crates/nu-protocol/src/did_you_mean.rs | 2 +- crates/nu-protocol/src/value/mod.rs | 4 +- crates/nu-utils/Cargo.toml | 1 + crates/nu-utils/src/casing.rs | 55 +++++++++++++++++++ crates/nu-utils/src/lib.rs | 2 + src/config_files.rs | 4 +- 35 files changed, 176 insertions(+), 122 deletions(-) create mode 100644 crates/nu-utils/src/casing.rs diff --git a/Cargo.lock b/Cargo.lock index 4a7bc2f76f926..0f062d80f5250 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3141,6 +3141,7 @@ dependencies = [ "num-format", "strip-ansi-escapes", "sys-locale", + "unicase", ] [[package]] diff --git a/crates/nu-cli/src/completions/custom_completions.rs b/crates/nu-cli/src/completions/custom_completions.rs index 06aed6739b400..f8555c9d4352e 100644 --- a/crates/nu-cli/src/completions/custom_completions.rs +++ b/crates/nu-cli/src/completions/custom_completions.rs @@ -5,6 +5,7 @@ use nu_protocol::{ engine::{EngineState, Stack, StateWorkingSet}, PipelineData, Span, Type, Value, }; +use nu_utils::IgnoreCaseExt; use reedline::Suggestion; use std::collections::HashMap; use std::sync::Arc; @@ -153,8 +154,8 @@ fn filter(prefix: &[u8], items: Vec, options: &CompletionOptions) -> (true, true) => it.value.as_bytes().starts_with(prefix), (true, false) => it.value.contains(std::str::from_utf8(prefix).unwrap_or("")), (false, positional) => { - let value = it.value.to_lowercase(); - let prefix = std::str::from_utf8(prefix).unwrap_or("").to_lowercase(); + let value = it.value.to_folded_case(); + let prefix = std::str::from_utf8(prefix).unwrap_or("").to_folded_case(); if positional { value.starts_with(&prefix) } else { diff --git a/crates/nu-cli/src/completions/file_completions.rs b/crates/nu-cli/src/completions/file_completions.rs index a3eadf2938d64..fc68638fc0b54 100644 --- a/crates/nu-cli/src/completions/file_completions.rs +++ b/crates/nu-cli/src/completions/file_completions.rs @@ -6,6 +6,7 @@ use nu_protocol::{ engine::{EngineState, StateWorkingSet}, levenshtein_distance, Span, }; +use nu_utils::IgnoreCaseExt; use reedline::Suggestion; use std::path::{Path, MAIN_SEPARATOR as SEP}; use std::sync::Arc; @@ -125,7 +126,7 @@ pub fn matches(partial: &str, from: &str, options: &CompletionOptions) -> bool { if !options.case_sensitive { return options .match_algorithm - .matches_str(&from.to_ascii_lowercase(), &partial.to_ascii_lowercase()); + .matches_str(&from.to_folded_case(), &partial.to_folded_case()); } options.match_algorithm.matches_str(from, partial) diff --git a/crates/nu-cli/src/completions/variable_completions.rs b/crates/nu-cli/src/completions/variable_completions.rs index b82dc0dc41783..a72ddc7b98e61 100644 --- a/crates/nu-cli/src/completions/variable_completions.rs +++ b/crates/nu-cli/src/completions/variable_completions.rs @@ -44,9 +44,7 @@ impl Completer for VariableCompletion { ) -> Vec { let mut output = vec![]; let builtins = ["$nu", "$in", "$env"]; - let var_str = std::str::from_utf8(&self.var_context.0) - .unwrap_or("") - .to_lowercase(); + let var_str = std::str::from_utf8(&self.var_context.0).unwrap_or(""); let var_id = working_set.find_variable(&self.var_context.0); let current_span = reedline::Span { start: span.start - offset, @@ -57,7 +55,7 @@ impl Completer for VariableCompletion { // Completions for the given variable if !var_str.is_empty() { // Completion for $env. - if var_str.as_str() == "$env" { + if var_str == "$env" { let env_vars = self.stack.get_env_vars(&self.engine_state); // Return nested values @@ -109,7 +107,7 @@ impl Completer for VariableCompletion { } // Completions for $nu. - if var_str.as_str() == "$nu" { + if var_str == "$nu" { // Eval nu var if let Ok(nuval) = eval_variable( &self.engine_state, diff --git a/crates/nu-cli/src/menus/help_completions.rs b/crates/nu-cli/src/menus/help_completions.rs index bb239dae3658b..91642ff980a0c 100644 --- a/crates/nu-cli/src/menus/help_completions.rs +++ b/crates/nu-cli/src/menus/help_completions.rs @@ -1,5 +1,6 @@ use nu_engine::documentation::get_flags_section; use nu_protocol::{engine::EngineState, levenshtein_distance}; +use nu_utils::IgnoreCaseExt; use reedline::{Completer, Suggestion}; use std::fmt::Write; use std::sync::Arc; @@ -13,21 +14,19 @@ impl NuHelpCompleter { fn completion_helper(&self, line: &str, pos: usize) -> Vec { let full_commands = self.0.get_signatures_with_examples(false); + let folded_line = line.to_folded_case(); //Vec<(Signature, Vec, bool, bool)> { let mut commands = full_commands .iter() .filter(|(sig, _, _, _, _)| { - sig.name.to_lowercase().contains(&line.to_lowercase()) - || sig.usage.to_lowercase().contains(&line.to_lowercase()) + sig.name.to_folded_case().contains(&folded_line) + || sig.usage.to_folded_case().contains(&folded_line) || sig .search_terms .iter() - .any(|term| term.to_lowercase().contains(&line.to_lowercase())) - || sig - .extra_usage - .to_lowercase() - .contains(&line.to_lowercase()) + .any(|term| term.to_folded_case().contains(&folded_line)) + || sig.extra_usage.to_folded_case().contains(&folded_line) }) .collect::>(); diff --git a/crates/nu-cli/src/reedline_config.rs b/crates/nu-cli/src/reedline_config.rs index 09e924862f888..bd2bba5762f2c 100644 --- a/crates/nu-cli/src/reedline_config.rs +++ b/crates/nu-cli/src/reedline_config.rs @@ -616,7 +616,7 @@ fn add_parsed_keybinding( let modifier = match keybinding .modifier .into_string("", config) - .to_lowercase() + .to_ascii_lowercase() .as_str() { "control" => KeyModifiers::CONTROL, @@ -641,7 +641,7 @@ fn add_parsed_keybinding( let keycode = match keybinding .keycode .into_string("", config) - .to_lowercase() + .to_ascii_lowercase() .as_str() { "backspace" => KeyCode::Backspace, @@ -728,7 +728,7 @@ fn parse_event(value: &Value, config: &Config) -> Result, match value { Value::Record { val: record, .. } => match EventType::try_from_record(record, span)? { EventType::Send(value) => event_from_record( - value.into_string("", config).to_lowercase().as_str(), + value.into_string("", config).to_ascii_lowercase().as_str(), record, config, span, @@ -736,7 +736,7 @@ fn parse_event(value: &Value, config: &Config) -> Result, .map(Some), EventType::Edit(value) => { let edit = edit_from_record( - value.into_string("", config).to_lowercase().as_str(), + value.into_string("", config).to_ascii_lowercase().as_str(), record, config, span, diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/sql_expr.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/sql_expr.rs index ef69b7ea418b6..f23c48c87ec46 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/sql_expr.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/eager/sql_expr.rs @@ -152,7 +152,7 @@ fn apply_window_spec(expr: Expr, window_type: Option<&WindowType>) -> Result Result { use sqlparser::ast::{FunctionArg, FunctionArgExpr}; // Function name mostly do not have name space, so it mostly take the first args - let function_name = sql_function.name.0[0].value.to_lowercase(); + let function_name = sql_function.name.0[0].value.to_ascii_lowercase(); let args = sql_function .args .iter() diff --git a/crates/nu-cmd-extra/src/extra/formats/to/html.rs b/crates/nu-cmd-extra/src/extra/formats/to/html.rs index bd9988532b7c4..4b4c967139e4c 100644 --- a/crates/nu-cmd-extra/src/extra/formats/to/html.rs +++ b/crates/nu-cmd-extra/src/extra/formats/to/html.rs @@ -7,6 +7,7 @@ use nu_protocol::{ record, Category, Config, DataSource, Example, IntoPipelineData, PipelineData, PipelineMetadata, ShellError, Signature, Spanned, SyntaxShape, Type, Value, }; +use nu_utils::IgnoreCaseExt; use rust_embed::RustEmbed; use serde::{Deserialize, Serialize}; use std::collections::HashMap; @@ -180,7 +181,7 @@ fn get_theme_from_asset_file( let th = asset .themes .into_iter() - .find(|n| n.name.to_lowercase() == theme_name.to_lowercase()) // case insensitive search + .find(|n| n.name.eq_ignore_case(theme_name)) // case insensitive search .unwrap_or_default(); Ok(convert_html_theme_to_hash_map(is_dark, &th)) diff --git a/crates/nu-color-config/src/nu_style.rs b/crates/nu-color-config/src/nu_style.rs index 42da9d285253b..a7a2a4a34708f 100644 --- a/crates/nu-color-config/src/nu_style.rs +++ b/crates/nu-color-config/src/nu_style.rs @@ -578,7 +578,7 @@ fn fill_modifiers(attrs: &str, style: &mut Style) { // // since we can combine styles like bold-italic, iterate through the chars // and set the bools for later use in the nu_ansi_term::Style application - for ch in attrs.to_lowercase().chars() { + for ch in attrs.chars().map(|c| c.to_ascii_lowercase()) { match ch { 'l' => style.is_blink = true, 'b' => style.is_bold = true, diff --git a/crates/nu-command/src/conversions/fill.rs b/crates/nu-command/src/conversions/fill.rs index bcc2702fc7b21..1d9c8f0d87e02 100644 --- a/crates/nu-command/src/conversions/fill.rs +++ b/crates/nu-command/src/conversions/fill.rs @@ -143,7 +143,7 @@ fn fill( let cell_paths = (!cell_paths.is_empty()).then_some(cell_paths); let alignment = if let Some(arg) = alignment_arg { - match arg.to_lowercase().as_str() { + match arg.to_ascii_lowercase().as_str() { "l" | "left" => FillAlignment::Left, "r" | "right" => FillAlignment::Right, "c" | "center" | "m" | "middle" => FillAlignment::Middle, diff --git a/crates/nu-command/src/conversions/into/bool.rs b/crates/nu-command/src/conversions/into/bool.rs index a9212d6666b12..a49cb2b5936dd 100644 --- a/crates/nu-command/src/conversions/into/bool.rs +++ b/crates/nu-command/src/conversions/into/bool.rs @@ -116,13 +116,13 @@ fn into_bool( } fn string_to_boolean(s: &str, span: Span) -> Result { - match s.trim().to_lowercase().as_str() { + match s.trim().to_ascii_lowercase().as_str() { "true" => Ok(true), "false" => Ok(false), o => { let val = o.parse::(); match val { - Ok(f) => Ok(f.abs() >= f64::EPSILON), + Ok(f) => Ok(f != 0.0), Err(_) => Err(ShellError::CantConvert { to_type: "boolean".to_string(), from_type: "string".to_string(), diff --git a/crates/nu-command/src/conversions/into/datetime.rs b/crates/nu-command/src/conversions/into/datetime.rs index 57f1633a3f631..6272be93ec800 100644 --- a/crates/nu-command/src/conversions/into/datetime.rs +++ b/crates/nu-command/src/conversions/into/datetime.rs @@ -46,7 +46,7 @@ impl Zone { } } fn from_string(s: String) -> Self { - match s.to_lowercase().as_str() { + match s.to_ascii_lowercase().as_str() { "utc" | "u" => Self::Utc, "local" | "l" => Self::Local, _ => Self::Error, diff --git a/crates/nu-command/src/date/parser.rs b/crates/nu-command/src/date/parser.rs index 66872b2e24bc2..ad6b953396599 100644 --- a/crates/nu-command/src/date/parser.rs +++ b/crates/nu-command/src/date/parser.rs @@ -26,7 +26,7 @@ pub fn datetime_in_timezone( None => Err(ParseErrorKind::OutOfRange), }, Err(ParseErrorKind::Invalid) => { - if s.to_lowercase() == "local" { + if s.eq_ignore_ascii_case("local") { Ok(dt.with_timezone(Local::now().offset())) } else { let tz: Tz = parse_timezone_internal(s)?; diff --git a/crates/nu-command/src/filters/find.rs b/crates/nu-command/src/filters/find.rs index 08051687a5396..4f26477e0e484 100644 --- a/crates/nu-command/src/filters/find.rs +++ b/crates/nu-command/src/filters/find.rs @@ -10,6 +10,7 @@ use nu_protocol::{ record, Category, Config, Example, IntoInterruptiblePipelineData, IntoPipelineData, ListStream, PipelineData, Record, ShellError, Signature, Span, SyntaxShape, Type, Value, }; +use nu_utils::IgnoreCaseExt; #[derive(Clone)] pub struct Find; @@ -318,7 +319,9 @@ fn highlight_terms_in_record_with_search_columns( } fn contains_ignore_case(string: &str, substring: &str) -> bool { - string.to_lowercase().contains(&substring.to_lowercase()) + string + .to_folded_case() + .contains(&substring.to_folded_case()) } fn find_with_rest_and_highlight( diff --git a/crates/nu-command/src/filters/sort.rs b/crates/nu-command/src/filters/sort.rs index 76fc79601e17f..412d55e410582 100644 --- a/crates/nu-command/src/filters/sort.rs +++ b/crates/nu-command/src/filters/sort.rs @@ -5,6 +5,7 @@ use nu_protocol::{ record, Category, Example, IntoInterruptiblePipelineData, IntoPipelineData, PipelineData, Record, ShellError, Signature, Span, Type, Value, }; +use nu_utils::IgnoreCaseExt; use std::cmp::Ordering; #[derive(Clone)] @@ -220,14 +221,14 @@ fn sort_record( b.0.clone() }; - // Convert to lowercase if case-insensitive + // Fold case if case-insensitive let left = if insensitive { - left_res.to_ascii_lowercase() + left_res.to_folded_case() } else { left_res }; let right = if insensitive { - right_res.to_ascii_lowercase() + right_res.to_folded_case() } else { right_res }; @@ -235,7 +236,7 @@ fn sort_record( if natural { compare_str(left, right) } else { - left.partial_cmp(&right).unwrap_or(Ordering::Equal) + left.cmp(&right) } }); @@ -262,28 +263,24 @@ pub fn sort( let span_a = a.span(); let span_b = b.span(); if insensitive { - let lowercase_left = match a { - Value::String { val, .. } => { - Value::string(val.to_ascii_lowercase(), span_a) - } + let folded_left = match a { + Value::String { val, .. } => Value::string(val.to_folded_case(), span_a), _ => a.clone(), }; - let lowercase_right = match b { - Value::String { val, .. } => { - Value::string(val.to_ascii_lowercase(), span_b) - } + let folded_right = match b { + Value::String { val, .. } => Value::string(val.to_folded_case(), span_b), _ => b.clone(), }; if natural { - match (lowercase_left.as_string(), lowercase_right.as_string()) { + match (folded_left.as_string(), folded_right.as_string()) { (Ok(left), Ok(right)) => compare_str(left, right), _ => Ordering::Equal, } } else { - lowercase_left - .partial_cmp(&lowercase_right) + folded_left + .partial_cmp(&folded_right) .unwrap_or(Ordering::Equal) } } else if natural { @@ -326,23 +323,23 @@ pub fn process( let result = if insensitive { let span_left = left_res.span(); let span_right = right_res.span(); - let lowercase_left = match left_res { - Value::String { val, .. } => Value::string(val.to_ascii_lowercase(), span_left), + let folded_left = match left_res { + Value::String { val, .. } => Value::string(val.to_folded_case(), span_left), _ => left_res, }; - let lowercase_right = match right_res { - Value::String { val, .. } => Value::string(val.to_ascii_lowercase(), span_right), + let folded_right = match right_res { + Value::String { val, .. } => Value::string(val.to_folded_case(), span_right), _ => right_res, }; if natural { - match (lowercase_left.as_string(), lowercase_right.as_string()) { + match (folded_left.as_string(), folded_right.as_string()) { (Ok(left), Ok(right)) => compare_str(left, right), _ => Ordering::Equal, } } else { - lowercase_left - .partial_cmp(&lowercase_right) + folded_left + .partial_cmp(&folded_right) .unwrap_or(Ordering::Equal) } } else { diff --git a/crates/nu-command/src/filters/uniq.rs b/crates/nu-command/src/filters/uniq.rs index c333351c8be73..26e2576690ece 100644 --- a/crates/nu-command/src/filters/uniq.rs +++ b/crates/nu-command/src/filters/uniq.rs @@ -6,6 +6,7 @@ use nu_protocol::{ record, Category, Example, IntoPipelineData, PipelineData, PipelineMetadata, ShellError, Signature, Span, Type, Value, }; +use nu_utils::IgnoreCaseExt; use std::collections::hash_map::IntoIter; use std::collections::HashMap; @@ -172,7 +173,7 @@ impl ValueCounter { ValueCounter { val, val_to_compare: if flag_ignore_case { - clone_to_lowercase(&vals_to_compare.with_span(Span::unknown())) + clone_to_folded_case(&vals_to_compare.with_span(Span::unknown())) } else { vals_to_compare.with_span(Span::unknown()) }, @@ -182,17 +183,17 @@ impl ValueCounter { } } -fn clone_to_lowercase(value: &Value) -> Value { +fn clone_to_folded_case(value: &Value) -> Value { let span = value.span(); match value { - Value::String { val: s, .. } => Value::string(s.clone().to_lowercase(), span), + Value::String { val: s, .. } => Value::string(s.clone().to_folded_case(), span), Value::List { vals: vec, .. } => { - Value::list(vec.iter().map(clone_to_lowercase).collect(), span) + Value::list(vec.iter().map(clone_to_folded_case).collect(), span) } Value::Record { val: record, .. } => Value::record( record .iter() - .map(|(k, v)| (k.to_owned(), clone_to_lowercase(v))) + .map(|(k, v)| (k.to_owned(), clone_to_folded_case(v))) .collect(), span, ), diff --git a/crates/nu-command/src/generators/generate.rs b/crates/nu-command/src/generators/generate.rs index 9341d787281f6..5fdf77d839ac6 100644 --- a/crates/nu-command/src/generators/generate.rs +++ b/crates/nu-command/src/generators/generate.rs @@ -150,9 +150,9 @@ used as the next argument to the closure, otherwise generation stops. let mut err = None; for (k, v) in iter { - if k.to_lowercase() == "out" { + if k.eq_ignore_ascii_case("out") { out = Some(v); - } else if k.to_lowercase() == "next" { + } else if k.eq_ignore_ascii_case("next") { next = Some(v); } else { let error = ShellError::GenericError( diff --git a/crates/nu-command/src/generators/unfold.rs b/crates/nu-command/src/generators/unfold.rs index c44bfbe1d543f..2d9349ffab9fa 100644 --- a/crates/nu-command/src/generators/unfold.rs +++ b/crates/nu-command/src/generators/unfold.rs @@ -162,9 +162,9 @@ used as the next argument to the closure, otherwise generation stops. let mut err = None; for (k, v) in iter { - if k.to_lowercase() == "out" { + if k.eq_ignore_ascii_case("out") { out = Some(v); - } else if k.to_lowercase() == "next" { + } else if k.eq_ignore_ascii_case("next") { next = Some(v); } else { let error = ShellError::GenericError( diff --git a/crates/nu-command/src/help/help_.rs b/crates/nu-command/src/help/help_.rs index 74351f031880c..d2a8eda894f75 100644 --- a/crates/nu-command/src/help/help_.rs +++ b/crates/nu-command/src/help/help_.rs @@ -10,6 +10,7 @@ use nu_protocol::{ span, Category, Example, IntoPipelineData, PipelineData, ShellError, Signature, Span, Spanned, SyntaxShape, Type, Value, }; +use nu_utils::IgnoreCaseExt; #[derive(Clone)] pub struct Help; @@ -144,7 +145,7 @@ pub fn highlight_search_in_table( highlight_style: &Style, ) -> Result, ShellError> { let orig_search_string = search_string; - let search_string = search_string.to_lowercase(); + let search_string = search_string.to_folded_case(); let mut matches = vec![]; for record in table { @@ -168,7 +169,7 @@ pub fn highlight_search_in_table( } let span = val.span(); if let Value::String { val: s, .. } = val { - if s.to_lowercase().contains(&search_string) { + if s.to_folded_case().contains(&search_string) { *val = Value::string( highlight_search_string( s, diff --git a/crates/nu-command/src/help/help_commands.rs b/crates/nu-command/src/help/help_commands.rs index 655bb1f48c559..4c930ffa30df5 100644 --- a/crates/nu-command/src/help/help_commands.rs +++ b/crates/nu-command/src/help/help_commands.rs @@ -134,7 +134,7 @@ fn build_help_commands(engine_state: &EngineState, span: Span) -> Vec { let usage = sig.usage; let search_terms = sig.search_terms; - let command_type = format!("{:?}", decl.command_type()).to_lowercase(); + let command_type = format!("{:?}", decl.command_type()).to_ascii_lowercase(); // Build table of parameters let param_table = { diff --git a/crates/nu-command/src/platform/input/input_listen.rs b/crates/nu-command/src/platform/input/input_listen.rs index b325187055d81..95ae1121d05bb 100644 --- a/crates/nu-command/src/platform/input/input_listen.rs +++ b/crates/nu-command/src/platform/input/input_listen.rs @@ -345,9 +345,9 @@ fn get_keycode_name(head: Span, code: &KeyCode) -> (Value, Value) { let (typ, code) = match code { KeyCode::F(n) => ("f", n.to_string()), KeyCode::Char(c) => ("char", c.to_string()), - KeyCode::Media(m) => ("media", format!("{m:?}").to_lowercase()), - KeyCode::Modifier(m) => ("modifier", format!("{m:?}").to_lowercase()), - _ => ("other", format!("{code:?}").to_lowercase()), + KeyCode::Media(m) => ("media", format!("{m:?}").to_ascii_lowercase()), + KeyCode::Modifier(m) => ("modifier", format!("{m:?}").to_ascii_lowercase()), + _ => ("other", format!("{code:?}").to_ascii_lowercase()), }; (Value::string(typ, head), Value::string(code, head)) } @@ -365,7 +365,7 @@ fn parse_modifiers(head: Span, modifiers: &KeyModifiers) -> Value { let parsed_modifiers = ALL_MODIFIERS .iter() .filter(|m| modifiers.contains(**m)) - .map(|m| format!("{m:?}").to_lowercase()) + .map(|m| format!("{m:?}").to_ascii_lowercase()) .map(|string| Value::string(string, head)) .collect(); diff --git a/crates/nu-command/src/sort_utils.rs b/crates/nu-command/src/sort_utils.rs index 95cb7dd33725a..35c0681493b95 100644 --- a/crates/nu-command/src/sort_utils.rs +++ b/crates/nu-command/src/sort_utils.rs @@ -1,6 +1,7 @@ use alphanumeric_sort::compare_str; use nu_engine::column::nonexistent_column; use nu_protocol::{ShellError, Span, Value}; +use nu_utils::IgnoreCaseExt; use std::cmp::Ordering; // This module includes sorting functionality that is useful in sort-by and elsewhere. @@ -125,28 +126,24 @@ pub fn sort( if insensitive { let span_a = a.span(); let span_b = b.span(); - let lowercase_left = match a { - Value::String { val, .. } => { - Value::string(val.to_ascii_lowercase(), span_a) - } + let folded_left = match a { + Value::String { val, .. } => Value::string(val.to_folded_case(), span_a), _ => a.clone(), }; - let lowercase_right = match b { - Value::String { val, .. } => { - Value::string(val.to_ascii_lowercase(), span_b) - } + let folded_right = match b { + Value::String { val, .. } => Value::string(val.to_folded_case(), span_b), _ => b.clone(), }; if natural { - match (lowercase_left.as_string(), lowercase_right.as_string()) { + match (folded_left.as_string(), folded_right.as_string()) { (Ok(left), Ok(right)) => compare_str(left, right), _ => Ordering::Equal, } } else { - lowercase_left - .partial_cmp(&lowercase_right) + folded_left + .partial_cmp(&folded_right) .unwrap_or(Ordering::Equal) } } else if natural { @@ -189,23 +186,23 @@ pub fn compare( let result = if insensitive { let span_left = left_res.span(); let span_right = right_res.span(); - let lowercase_left = match left_res { - Value::String { val, .. } => Value::string(val.to_ascii_lowercase(), span_left), + let folded_left = match left_res { + Value::String { val, .. } => Value::string(val.to_folded_case(), span_left), _ => left_res, }; - let lowercase_right = match right_res { - Value::String { val, .. } => Value::string(val.to_ascii_lowercase(), span_right), + let folded_right = match right_res { + Value::String { val, .. } => Value::string(val.to_folded_case(), span_right), _ => right_res, }; if natural { - match (lowercase_left.as_string(), lowercase_right.as_string()) { + match (folded_left.as_string(), folded_right.as_string()) { (Ok(left), Ok(right)) => compare_str(left, right), _ => Ordering::Equal, } } else { - lowercase_left - .partial_cmp(&lowercase_right) + folded_left + .partial_cmp(&folded_right) .unwrap_or(Ordering::Equal) } } else if natural { diff --git a/crates/nu-command/src/strings/encode_decode/encoding.rs b/crates/nu-command/src/strings/encode_decode/encoding.rs index 25bdfeab2df54..abfeaba1cf10b 100644 --- a/crates/nu-command/src/strings/encode_decode/encoding.rs +++ b/crates/nu-command/src/strings/encode_decode/encoding.rs @@ -28,7 +28,7 @@ pub fn decode( bytes: &[u8], ) -> Result { // Workaround for a bug in the Encodings Specification. - let encoding = if encoding_name.item.to_lowercase() == "utf16" { + let encoding = if encoding_name.item.eq_ignore_ascii_case("utf16") { parse_encoding(encoding_name.span, "utf-16") } else { parse_encoding(encoding_name.span, &encoding_name.item) @@ -45,7 +45,7 @@ pub fn encode( ignore_errors: bool, ) -> Result { // Workaround for a bug in the Encodings Specification. - let encoding = if encoding_name.item.to_lowercase() == "utf16" { + let encoding = if encoding_name.item.eq_ignore_ascii_case("utf16") { parse_encoding(encoding_name.span, "utf-16") } else { parse_encoding(encoding_name.span, &encoding_name.item) @@ -69,7 +69,7 @@ pub fn encode( fn parse_encoding(span: Span, label: &str) -> Result<&'static Encoding, ShellError> { // Workaround for a bug in the Encodings Specification. - let label = if label.to_lowercase() == "utf16" { + let label = if label.eq_ignore_ascii_case("utf16") { "utf-16" } else { label diff --git a/crates/nu-command/src/strings/str_/contains.rs b/crates/nu-command/src/strings/str_/contains.rs index fceac1aad373b..426bf01a2126b 100644 --- a/crates/nu-command/src/strings/str_/contains.rs +++ b/crates/nu-command/src/strings/str_/contains.rs @@ -7,6 +7,7 @@ use nu_protocol::record; use nu_protocol::{ Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value, }; +use nu_utils::IgnoreCaseExt; #[derive(Clone)] pub struct SubCommand; @@ -153,11 +154,11 @@ fn action( match case_insensitive { true => { if *not_contain { - !val.to_lowercase() - .contains(substring.to_lowercase().as_str()) + !val.to_folded_case() + .contains(substring.to_folded_case().as_str()) } else { - val.to_lowercase() - .contains(substring.to_lowercase().as_str()) + val.to_folded_case() + .contains(substring.to_folded_case().as_str()) } } false => { diff --git a/crates/nu-command/src/strings/str_/ends_with.rs b/crates/nu-command/src/strings/str_/ends_with.rs index dcea8d1a9d01e..39501fb33bfec 100644 --- a/crates/nu-command/src/strings/str_/ends_with.rs +++ b/crates/nu-command/src/strings/str_/ends_with.rs @@ -5,6 +5,7 @@ use nu_protocol::ast::CellPath; use nu_protocol::engine::{Command, EngineState, Stack}; use nu_protocol::Category; use nu_protocol::{Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value}; +use nu_utils::IgnoreCaseExt; struct Arguments { substring: String, @@ -98,7 +99,8 @@ fn action(input: &Value, args: &Arguments, head: Span) -> Value { match input { Value::String { val: s, .. } => { let ends_with = if args.case_insensitive { - s.to_lowercase().ends_with(&args.substring.to_lowercase()) + s.to_folded_case() + .ends_with(&args.substring.to_folded_case()) } else { s.ends_with(&args.substring) }; diff --git a/crates/nu-command/src/strings/str_/starts_with.rs b/crates/nu-command/src/strings/str_/starts_with.rs index bb8c36d2f11bc..c24230f6d5de7 100644 --- a/crates/nu-command/src/strings/str_/starts_with.rs +++ b/crates/nu-command/src/strings/str_/starts_with.rs @@ -6,6 +6,7 @@ use nu_protocol::engine::{Command, EngineState, Stack}; use nu_protocol::Category; use nu_protocol::Spanned; use nu_protocol::{Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value}; +use nu_utils::IgnoreCaseExt; struct Arguments { substring: String, @@ -111,7 +112,7 @@ fn action( match input { Value::String { val: s, .. } => { let starts_with = if *case_insensitive { - s.to_lowercase().starts_with(&substring.to_lowercase()) + s.to_folded_case().starts_with(&substring.to_folded_case()) } else { s.starts_with(substring) }; diff --git a/crates/nu-command/src/system/run_external.rs b/crates/nu-command/src/system/run_external.rs index b59b7afa9d8e7..682e35cea45c1 100644 --- a/crates/nu-command/src/system/run_external.rs +++ b/crates/nu-command/src/system/run_external.rs @@ -9,6 +9,7 @@ use nu_protocol::{ SyntaxShape, Type, Value, }; use nu_system::ForegroundProcess; +use nu_utils::IgnoreCaseExt; use os_pipe::PipeReader; use pathdiff::diff_paths; use std::collections::HashMap; @@ -223,10 +224,10 @@ impl ExternalCommand { const CMD_INTERNAL_COMMANDS: [&str; 9] = [ "ASSOC", "CLS", "ECHO", "FTYPE", "MKLINK", "PAUSE", "START", "VER", "VOL", ]; - let command_name_upper = self.name.item.to_uppercase(); + let command_name = &self.name.item; let looks_like_cmd_internal = CMD_INTERNAL_COMMANDS .iter() - .any(|&cmd| command_name_upper == cmd); + .any(|&cmd| command_name.eq_ignore_ascii_case(cmd)); if looks_like_cmd_internal { let (cmd, new_reader) = self.create_process(&input, true, head)?; @@ -252,9 +253,10 @@ impl ExternalCommand { which::which_in(&self.name.item, Some(path_with_cwd), cwd) { if let Some(file_name) = which_path.file_name() { - let file_name_upper = - file_name.to_string_lossy().to_uppercase(); - if file_name_upper != command_name_upper { + if !file_name + .to_string_lossy() + .eq_ignore_case(command_name) + { // which-rs found an executable file with a slightly different name // than the one the user tried. Let's try running it let mut new_command = self.clone(); @@ -767,11 +769,11 @@ fn trim_expand_and_apply_arg( /// Given an invalid command name, try to suggest an alternative fn suggest_command(attempted_command: &str, engine_state: &EngineState) -> Option { let commands = engine_state.get_signatures(false); - let command_name_lower = attempted_command.to_lowercase(); + let command_folded_case = attempted_command.to_folded_case(); let search_term_match = commands.iter().find(|sig| { sig.search_terms .iter() - .any(|term| term.to_lowercase() == command_name_lower) + .any(|term| term.to_folded_case() == command_folded_case) }); match search_term_match { Some(sig) => Some(sig.name.clone()), diff --git a/crates/nu-glob/src/lib.rs b/crates/nu-glob/src/lib.rs index ea7892d1e53cd..4c5d04d45c860 100644 --- a/crates/nu-glob/src/lib.rs +++ b/crates/nu-glob/src/lib.rs @@ -1016,29 +1016,19 @@ fn in_char_specifiers(specifiers: &[CharSpecifier], c: char, options: MatchOptio CharRange(start, end) => { // FIXME: work with non-ascii chars properly (issue #1347) if !options.case_sensitive && c.is_ascii() && start.is_ascii() && end.is_ascii() { - let start = start.to_ascii_lowercase(); - let end = end.to_ascii_lowercase(); - - let start_up = start - .to_uppercase() - .next() - .expect("internal error: getting start uppercase"); - let end_up = end - .to_uppercase() - .next() - .expect("internal error: getting end uppercase"); - // only allow case insensitive matching when // both start and end are within a-z or A-Z - if start != start_up && end != end_up { + if start.is_ascii_alphabetic() && end.is_ascii_alphabetic() { + let start = start.to_ascii_lowercase(); + let end = end.to_ascii_lowercase(); let c = c.to_ascii_lowercase(); - if c >= start && c <= end { + if (start..=end).contains(&c) { return true; } } } - if c >= start && c <= end { + if (start..=end).contains(&c) { return true; } } @@ -1279,7 +1269,7 @@ mod test { fn test_range_pattern() { let pat = Pattern::new("a[0-9]b").unwrap(); for i in 0..10 { - assert!(pat.matches(&format!("a{}b", i))); + assert!(pat.matches(&format!("a{}b", i)), "a{i}b =~ a[0-9]b"); } assert!(!pat.matches("a_b")); diff --git a/crates/nu-parser/src/parser.rs b/crates/nu-parser/src/parser.rs index 8b002f3a0bc12..84e1a119789b6 100644 --- a/crates/nu-parser/src/parser.rs +++ b/crates/nu-parser/src/parser.rs @@ -2199,7 +2199,7 @@ pub fn parse_filesize(working_set: &mut StateWorkingSet, span: Span) -> Expressi } match parse_unit_value(bytes, span, FILESIZE_UNIT_GROUPS, Type::Filesize, |x| { - x.to_uppercase() + x.to_ascii_uppercase() }) { Some(Ok(expr)) => expr, Some(Err(mk_err_for)) => { diff --git a/crates/nu-protocol/src/did_you_mean.rs b/crates/nu-protocol/src/did_you_mean.rs index 1e97d1dc13c2a..793d26becdb21 100644 --- a/crates/nu-protocol/src/did_you_mean.rs +++ b/crates/nu-protocol/src/did_you_mean.rs @@ -8,7 +8,7 @@ where crate::lev_distance::find_best_match_for_name_with_substrings(&possibilities, input, None) .map(|s| s.to_string()); if let Some(suggestion) = &suggestion { - if suggestion.len() == 1 && suggestion.to_lowercase() != input.to_lowercase() { + if suggestion.len() == 1 && !suggestion.eq_ignore_ascii_case(input) { return None; } } diff --git a/crates/nu-protocol/src/value/mod.rs b/crates/nu-protocol/src/value/mod.rs index c317a31331e7a..7576c0a0af4ee 100644 --- a/crates/nu-protocol/src/value/mod.rs +++ b/crates/nu-protocol/src/value/mod.rs @@ -20,8 +20,8 @@ pub use custom_value::CustomValue; use fancy_regex::Regex; pub use from_value::FromValue; pub use lazy_record::LazyRecord; -use nu_utils::get_system_locale; use nu_utils::locale::get_system_locale_string; +use nu_utils::{get_system_locale, IgnoreCaseExt}; use num_format::ToFormattedString; pub use range::*; pub use record::Record; @@ -1008,7 +1008,7 @@ impl Value { // Make reverse iterate to avoid duplicate column leads to first value, actually last value is expected. if let Some(found) = val.iter().rev().find(|x| { if insensitive { - x.0.to_lowercase() == column_name.to_lowercase() + x.0.eq_ignore_case(column_name) } else { x.0 == column_name } diff --git a/crates/nu-utils/Cargo.toml b/crates/nu-utils/Cargo.toml index f4fd18839a022..9360c71697a42 100644 --- a/crates/nu-utils/Cargo.toml +++ b/crates/nu-utils/Cargo.toml @@ -22,6 +22,7 @@ lscolors = { version = "0.15", default-features = false, features = ["nu-ansi-te num-format = { version = "0.4" } strip-ansi-escapes = "0.2.0" sys-locale = "0.3" +unicase = "2.7.0" [target.'cfg(windows)'.dependencies] crossterm_winapi = "0.9" diff --git a/crates/nu-utils/src/casing.rs b/crates/nu-utils/src/casing.rs new file mode 100644 index 0000000000000..6d22d6860b939 --- /dev/null +++ b/crates/nu-utils/src/casing.rs @@ -0,0 +1,55 @@ +use std::cmp::Ordering; +use unicase::UniCase; + +pub trait IgnoreCaseExt { + /// Returns a [case folded] equivalent of this string, as a new String. + /// + /// Case folding is primarily based on lowercase mapping, but includes + /// additional changes to the source text to help make case folding + /// language-invariant and consistent. Case folded text should be used + /// solely for processing and generally should not be stored or displayed. + /// + /// Note: this method might only do [`str::to_lowercase`] instead of a + /// full case fold, depending on how Nu is compiled. You should still + /// prefer using this method for generating case-insensitive strings, + /// though, as it expresses intent much better than `to_lowercase`. + /// + /// [case folded]: + fn to_folded_case(&self) -> String; + + /// Checks that two strings are a case-insensitive match. + /// + /// Essentially `to_folded_case(a) == to_folded_case(b)`, but without + /// allocating and copying string temporaries. Because case folding involves + /// Unicode table lookups, it can sometimes be more efficient to use + /// `to_folded_case` to case fold once and then compare those strings. + fn eq_ignore_case(&self, other: &str) -> bool; + + /// Compares two strings case-insensitively. + /// + /// Essentially `to_folded_case(a) == to_folded_case(b)`, but without + /// allocating and copying string temporaries. Because case folding involves + /// Unicode table lookups, it can sometimes be more efficient to use + /// `to_folded_case` to case fold once and then compare those strings. + /// + /// Note that this *only* ignores case, comparing the folded strings without + /// any other collation data or locale, so the sort order may be surprising + /// outside of ASCII characters. + fn cmp_ignore_case(&self, other: &str) -> Ordering; +} + +impl IgnoreCaseExt for str { + fn to_folded_case(&self) -> String { + // we only do to_lowercase, as unicase doesn't expose its case fold yet + // (seanmonstar/unicase#61) and we don't want to pull in another table + self.to_lowercase() + } + + fn eq_ignore_case(&self, other: &str) -> bool { + UniCase::new(self) == UniCase::new(other) + } + + fn cmp_ignore_case(&self, other: &str) -> Ordering { + UniCase::new(self).cmp(&UniCase::new(other)) + } +} diff --git a/crates/nu-utils/src/lib.rs b/crates/nu-utils/src/lib.rs index fae1a42c9d576..6a0c23679f4ab 100644 --- a/crates/nu-utils/src/lib.rs +++ b/crates/nu-utils/src/lib.rs @@ -1,3 +1,4 @@ +mod casing; pub mod ctrl_c; mod deansi; pub mod locale; @@ -9,6 +10,7 @@ pub use utils::{ stderr_write_all_and_flush, stdout_write_all_and_flush, }; +pub use casing::IgnoreCaseExt; pub use deansi::{ strip_ansi_likely, strip_ansi_string_likely, strip_ansi_string_unlikely, strip_ansi_unlikely, }; diff --git a/src/config_files.rs b/src/config_files.rs index f5acfd53052d1..6bcd314612fd9 100644 --- a/src/config_files.rs +++ b/src/config_files.rs @@ -70,8 +70,8 @@ pub(crate) fn read_config_file( get_default_config() }; - match answer.to_lowercase().trim() { - "y" | "" => { + match answer.trim() { + "y" | "Y" | "" => { if let Ok(mut output) = File::create(&config_path) { if write!(output, "{config_file}").is_ok() { let config_type = if is_env_config {