From 9b6231bba10e62c4d6f1921e9e3bf7f489faa732 Mon Sep 17 00:00:00 2001 From: Manish Goregaokar Date: Tue, 7 Jan 2025 12:37:58 -0800 Subject: [PATCH] fixes --- components/locale/README.md | 2 +- .../locale/benches/locale_canonicalizer.rs | 4 ++-- components/locale/src/canonicalizer.rs | 9 ++++----- components/locale/src/lib.rs | 2 +- .../locale/tests/locale_canonicalizer.rs | 2 +- components/segmenter/src/grapheme.rs | 1 + components/segmenter/src/line.rs | 18 +++++++++--------- components/segmenter/src/sentence.rs | 4 ++-- components/segmenter/src/word.rs | 18 +++++++++--------- ffi/capi/src/locale_directionality.rs | 14 ++++++++++++-- provider/source/src/segmenter/lstm.rs | 6 +++++- tutorials/c-tiny/segmenter/test.c | 2 +- 12 files changed, 48 insertions(+), 34 deletions(-) diff --git a/components/locale/README.md b/components/locale/README.md index 229aa3df452..80f92c8af98 100644 --- a/components/locale/README.md +++ b/components/locale/README.md @@ -25,7 +25,7 @@ This minimize method returns a new Locale that is the result of running the use icu::locale::Locale; use icu::locale::{LocaleCanonicalizer, TransformResult}; -let lc = LocaleCanonicalizer::new(); +let lc = LocaleCanonicalizer::new_extended(); let mut locale: Locale = "ja-Latn-fonipa-hepburn-heploc" .parse() diff --git a/components/locale/benches/locale_canonicalizer.rs b/components/locale/benches/locale_canonicalizer.rs index ecfbeeba8b4..11d8668196a 100644 --- a/components/locale/benches/locale_canonicalizer.rs +++ b/components/locale/benches/locale_canonicalizer.rs @@ -8,7 +8,7 @@ use icu_locale::LocaleExpander; use icu_locale_core::Locale; fn canonicalize_bench(c: &mut Criterion) { - let lc = LocaleCanonicalizer::new(); + let lc = LocaleCanonicalizer::new_common(); let mut group = c.benchmark_group("uncanonicalized"); @@ -38,7 +38,7 @@ fn canonicalize_bench(c: &mut Criterion) { } fn canonicalize_noop_bench(c: &mut Criterion) { - let lc = LocaleCanonicalizer::new(); + let lc = LocaleCanonicalizer::new_common(); let mut group = c.benchmark_group("canonicalized"); diff --git a/components/locale/src/canonicalizer.rs b/components/locale/src/canonicalizer.rs index cfb388c1bb3..955944714d7 100644 --- a/components/locale/src/canonicalizer.rs +++ b/components/locale/src/canonicalizer.rs @@ -28,7 +28,7 @@ use tinystr::TinyAsciiStr; /// use icu::locale::Locale; /// use icu::locale::{LocaleCanonicalizer, TransformResult}; /// -/// let lc = LocaleCanonicalizer::new(); +/// let lc = LocaleCanonicalizer::new_extended(); /// /// let mut locale: Locale = "ja-Latn-fonipa-hepburn-heploc".parse().unwrap(); /// assert_eq!(lc.canonicalize(&mut locale), TransformResult::Modified); @@ -202,7 +202,6 @@ impl LocaleCanonicalizer { /// /// [📚 Help choosing a constructor](icu_provider::constructors) #[cfg(feature = "compiled_data")] - #[allow(clippy::new_without_default)] // Deliberate choice, see #5554 pub const fn new_common() -> Self { Self::new_with_expander(LocaleExpander::new_common()) } @@ -217,7 +216,7 @@ impl LocaleCanonicalizer { ] ); - #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new)] + #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_common)] pub fn try_new_common_unstable

(provider: &P) -> Result where P: DataProvider @@ -251,7 +250,7 @@ impl LocaleCanonicalizer { ] ); - #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new)] + #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_extended)] pub fn try_new_extended_unstable

(provider: &P) -> Result where P: DataProvider @@ -319,7 +318,7 @@ impl> LocaleCanonicalizer { /// ``` /// use icu::locale::{Locale, LocaleCanonicalizer, TransformResult}; /// - /// let lc = LocaleCanonicalizer::new(); + /// let lc = LocaleCanonicalizer::new_extended(); /// /// let mut locale: Locale = "ja-Latn-fonipa-hepburn-heploc".parse().unwrap(); /// assert_eq!(lc.canonicalize(&mut locale), TransformResult::Modified); diff --git a/components/locale/src/lib.rs b/components/locale/src/lib.rs index 28917f2d414..fc7488ea83a 100644 --- a/components/locale/src/lib.rs +++ b/components/locale/src/lib.rs @@ -25,7 +25,7 @@ //! use icu::locale::Locale; //! use icu::locale::{LocaleCanonicalizer, TransformResult}; //! -//! let lc = LocaleCanonicalizer::new(); +//! let lc = LocaleCanonicalizer::new_extended(); //! //! let mut locale: Locale = "ja-Latn-fonipa-hepburn-heploc" //! .parse() diff --git a/components/locale/tests/locale_canonicalizer.rs b/components/locale/tests/locale_canonicalizer.rs index 0b7e194a184..aa91070fcf3 100644 --- a/components/locale/tests/locale_canonicalizer.rs +++ b/components/locale/tests/locale_canonicalizer.rs @@ -58,7 +58,7 @@ fn test_minimize() { #[test] fn test_canonicalize() { - let lc = LocaleCanonicalizer::new(); + let lc = LocaleCanonicalizer::new_extended(); let testcases: Vec = serde_json::from_str(include_str!("fixtures/canonicalize.json")) diff --git a/components/segmenter/src/grapheme.rs b/components/segmenter/src/grapheme.rs index ec80de3a9b5..d70c4b66daa 100644 --- a/components/segmenter/src/grapheme.rs +++ b/components/segmenter/src/grapheme.rs @@ -138,6 +138,7 @@ impl GraphemeClusterSegmenter { /// /// [📚 Help choosing a constructor](icu_provider::constructors) #[cfg(feature = "compiled_data")] + #[allow(clippy::new_without_default)] // Deliberate choice, see #5554 pub fn new() -> Self { Self { payload: DataPayload::from_static_ref( diff --git a/components/segmenter/src/line.rs b/components/segmenter/src/line.rs index 43457e8107c..c225ee79bf3 100644 --- a/components/segmenter/src/line.rs +++ b/components/segmenter/src/line.rs @@ -398,9 +398,9 @@ impl LineSegmenter { Self::new_auto_with_options(Default::default()) } - #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_auto)] + #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_root_auto)] #[cfg(feature = "auto")] - pub fn try_new_auto_unstable(provider: &D) -> Result + pub fn try_new_root_auto_unstable(provider: &D) -> Result where D: DataProvider + DataProvider @@ -428,8 +428,8 @@ impl LineSegmenter { } #[cfg(feature = "lstm")] - #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_lstm)] - pub fn try_new_lstm_unstable(provider: &D) -> Result + #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_root_lstm)] + pub fn try_new_root_lstm_unstable(provider: &D) -> Result where D: DataProvider + DataProvider @@ -455,8 +455,8 @@ impl LineSegmenter { Self::new_dictionary_with_options(Default::default()) } - #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_dictionary)] - pub fn try_new_dictionary_unstable(provider: &D) -> Result + #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_root_dictionary)] + pub fn try_new_root_dictionary_unstable(provider: &D) -> Result where D: DataProvider + DataProvider @@ -471,7 +471,7 @@ impl LineSegmenter { /// /// The current behavior, which is subject to change, is to use the LSTM model when available. /// - /// See also [`Self::new_auto`]. + /// See also [`Self::new_root_auto`]. /// /// ✨ *Enabled with the `compiled_data` and `auto` Cargo features.* /// @@ -515,7 +515,7 @@ impl LineSegmenter { /// The LSTM, or Long Term Short Memory, is a machine learning model. It is smaller than /// the full dictionary but more expensive during segmentation (inference). /// - /// See also [`Self::new_dictionary`]. + /// See also [`Self::new_root_lstm`]. /// /// ✨ *Enabled with the `compiled_data` and `lstm` Cargo features.* /// @@ -569,7 +569,7 @@ impl LineSegmenter { /// The dictionary model uses a list of words to determine appropriate breakpoints. It is /// faster than the LSTM model but requires more data. /// - /// See also [`Self::new_dictionary`]. + /// See also [`Self::new_root_dictionary`]. /// /// ✨ *Enabled with the `compiled_data` Cargo feature.* /// diff --git a/components/segmenter/src/sentence.rs b/components/segmenter/src/sentence.rs index a5c7e932082..b9a2b8e47ac 100644 --- a/components/segmenter/src/sentence.rs +++ b/components/segmenter/src/sentence.rs @@ -128,8 +128,8 @@ impl SentenceSegmenter { } } - #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new)] - pub fn try_new_unstable(provider: &D) -> Result + #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_root)] + pub fn try_new_root_unstable(provider: &D) -> Result where D: DataProvider + ?Sized, { diff --git a/components/segmenter/src/word.rs b/components/segmenter/src/word.rs index 6c84a70dc82..b5ef1992d96 100644 --- a/components/segmenter/src/word.rs +++ b/components/segmenter/src/word.rs @@ -222,8 +222,8 @@ impl WordSegmenter { } #[cfg(feature = "auto")] - #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_auto)] - pub fn try_new_auto_unstable(provider: &D) -> Result + #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_root_auto)] + pub fn try_new_root_auto_unstable(provider: &D) -> Result where D: DataProvider + DataProvider @@ -252,7 +252,7 @@ impl WordSegmenter { ); #[cfg(feature = "auto")] - #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_auto)] + #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_root_auto)] pub fn try_new_auto_with_options_unstable( provider: &D, options: WordBreakOptions, @@ -334,8 +334,8 @@ impl WordSegmenter { } #[cfg(feature = "lstm")] - #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_lstm)] - pub fn try_new_lstm_unstable(provider: &D) -> Result + #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_root_lstm)] + pub fn try_new_root_lstm_unstable(provider: &D) -> Result where D: DataProvider + DataProvider @@ -363,7 +363,7 @@ impl WordSegmenter { ); #[cfg(feature = "lstm")] - #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_lstm)] + #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_root_lstm)] pub fn try_new_lstm_with_options_unstable( provider: &D, options: WordBreakOptions, @@ -437,8 +437,8 @@ impl WordSegmenter { } } - #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_dictionary)] - pub fn try_new_dictionary_unstable(provider: &D) -> Result + #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_root_dictionary)] + pub fn try_new_root_dictionary_unstable(provider: &D) -> Result where D: DataProvider + DataProvider @@ -465,7 +465,7 @@ impl WordSegmenter { ] ); - #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_dictionary)] + #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_root_dictionary)] pub fn try_new_dictionary_with_options_unstable( provider: &D, options: WordBreakOptions, diff --git a/ffi/capi/src/locale_directionality.rs b/ffi/capi/src/locale_directionality.rs index d899f965be1..02df7922fc0 100644 --- a/ffi/capi/src/locale_directionality.rs +++ b/ffi/capi/src/locale_directionality.rs @@ -51,7 +51,12 @@ pub mod ffi { ))) } /// Construct a new LocaleDirectionality instance using compiled data. - #[diplomat::rust_link(icu::locale::LocaleDirectionality::new_with_expander, FnInStruct)] + #[diplomat::rust_link(icu::locale::LocaleDirectionality::new_extended, FnInStruct)] + #[diplomat::rust_link( + icu::locale::LocaleDirectionality::new_with_expander, + FnInStruct, + hidden + )] #[diplomat::attr(auto, named_constructor = "extended")] #[cfg(feature = "compiled_data")] pub fn create_extended() -> Box { @@ -61,7 +66,12 @@ pub mod ffi { } /// Construct a new LocaleDirectionality instance using a particular data source. - #[diplomat::rust_link(icu::locale::LocaleDirectionality::new_with_expander, FnInStruct)] + #[diplomat::rust_link(icu::locale::LocaleDirectionality::new_extended, FnInStruct)] + #[diplomat::rust_link( + icu::locale::LocaleDirectionality::new_with_expander, + FnInStruct, + hidden + )] #[diplomat::attr(supports = fallible_constructors, named_constructor = "extended_with_provider")] #[cfg(feature = "buffer_provider")] pub fn create_extended_with_provider( diff --git a/provider/source/src/segmenter/lstm.rs b/provider/source/src/segmenter/lstm.rs index 140b42f78a4..e91a3e0b1bd 100644 --- a/provider/source/src/segmenter/lstm.rs +++ b/provider/source/src/segmenter/lstm.rs @@ -246,7 +246,11 @@ mod tests { provider.as_any_provider(), ); - let segmenter = LineSegmenter::try_new_lstm_with_any_provider(&provider).unwrap(); + let segmenter = LineSegmenter::try_new_lstm_with_options_with_any_provider( + &provider, + Default::default(), + ) + .unwrap(); const TEST_STR: &str = "ภาษาไทยภาษาไทย"; let utf16: Vec = TEST_STR.encode_utf16().collect(); diff --git a/tutorials/c-tiny/segmenter/test.c b/tutorials/c-tiny/segmenter/test.c index e6f3a7687f2..21cbc79d7fd 100644 --- a/tutorials/c-tiny/segmenter/test.c +++ b/tutorials/c-tiny/segmenter/test.c @@ -8,7 +8,7 @@ #include int main(int argc, char *argv[]) { - LineSegmenter* segmenter = icu4x_LineSegmenter_create_auto_mv1(); + LineSegmenter* segmenter = icu4x_LineSegmenter_create_root_auto_mv1(); char output[40];