Skip to content

Commit

Permalink
fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
Manishearth committed Jan 7, 2025
1 parent a17ee6f commit 7f186fe
Show file tree
Hide file tree
Showing 12 changed files with 49 additions and 35 deletions.
2 changes: 1 addition & 1 deletion components/locale/README.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions components/locale/benches/locale_canonicalizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use icu_locale::LocaleExpander;
use icu_locale_core::Locale;

fn canonicalize_bench(c: &mut Criterion) {
let lc = LocaleCanonicalizer::new();
let lc = LocaleCanonicalizer::new_common();

let mut group = c.benchmark_group("uncanonicalized");

Expand Down Expand Up @@ -38,7 +38,7 @@ fn canonicalize_bench(c: &mut Criterion) {
}

fn canonicalize_noop_bench(c: &mut Criterion) {
let lc = LocaleCanonicalizer::new();
let lc = LocaleCanonicalizer::new_common();

let mut group = c.benchmark_group("canonicalized");

Expand Down
9 changes: 4 additions & 5 deletions components/locale/src/canonicalizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ use tinystr::TinyAsciiStr;
/// use icu::locale::Locale;
/// use icu::locale::{LocaleCanonicalizer, TransformResult};
///
/// let lc = LocaleCanonicalizer::new();
/// let lc = LocaleCanonicalizer::new_extended();
///
/// let mut locale: Locale = "ja-Latn-fonipa-hepburn-heploc".parse().unwrap();
/// assert_eq!(lc.canonicalize(&mut locale), TransformResult::Modified);
Expand Down Expand Up @@ -202,7 +202,6 @@ impl LocaleCanonicalizer<LocaleExpander> {
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
#[cfg(feature = "compiled_data")]
#[allow(clippy::new_without_default)] // Deliberate choice, see #5554
pub const fn new_common() -> Self {
Self::new_with_expander(LocaleExpander::new_common())
}
Expand All @@ -217,7 +216,7 @@ impl LocaleCanonicalizer<LocaleExpander> {
]
);

#[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new)]
#[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_common)]
pub fn try_new_common_unstable<P>(provider: &P) -> Result<Self, DataError>
where
P: DataProvider<AliasesV2Marker>
Expand Down Expand Up @@ -251,7 +250,7 @@ impl LocaleCanonicalizer<LocaleExpander> {
]
);

#[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new)]
#[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_extended)]
pub fn try_new_extended_unstable<P>(provider: &P) -> Result<Self, DataError>
where
P: DataProvider<AliasesV2Marker>
Expand Down Expand Up @@ -319,7 +318,7 @@ impl<Expander: AsRef<LocaleExpander>> LocaleCanonicalizer<Expander> {
/// ```
/// use icu::locale::{Locale, LocaleCanonicalizer, TransformResult};
///
/// let lc = LocaleCanonicalizer::new();
/// let lc = LocaleCanonicalizer::new_extended();
///
/// let mut locale: Locale = "ja-Latn-fonipa-hepburn-heploc".parse().unwrap();
/// assert_eq!(lc.canonicalize(&mut locale), TransformResult::Modified);
Expand Down
2 changes: 1 addition & 1 deletion components/locale/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
//! use icu::locale::Locale;
//! use icu::locale::{LocaleCanonicalizer, TransformResult};
//!
//! let lc = LocaleCanonicalizer::new();
//! let lc = LocaleCanonicalizer::new_extended();
//!
//! let mut locale: Locale = "ja-Latn-fonipa-hepburn-heploc"
//! .parse()
Expand Down
2 changes: 1 addition & 1 deletion components/locale/tests/locale_canonicalizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ fn test_minimize() {

#[test]
fn test_canonicalize() {
let lc = LocaleCanonicalizer::new();
let lc = LocaleCanonicalizer::new_extended();

let testcases: Vec<fixtures::CanonicalizationTest> =
serde_json::from_str(include_str!("fixtures/canonicalize.json"))
Expand Down
1 change: 1 addition & 0 deletions components/segmenter/src/grapheme.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ impl GraphemeClusterSegmenter {
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
#[cfg(feature = "compiled_data")]
#[allow(clippy::new_without_default)] // Deliberate choice, see #5554
pub fn new() -> Self {
Self {
payload: DataPayload::from_static_ref(
Expand Down
20 changes: 10 additions & 10 deletions components/segmenter/src/line.rs
Original file line number Diff line number Diff line change
Expand Up @@ -398,9 +398,9 @@ impl LineSegmenter {
Self::new_auto_with_options(Default::default())
}

#[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_auto)]
#[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_root_auto)]
#[cfg(feature = "auto")]
pub fn try_new_auto_unstable<D>(provider: &D) -> Result<Self, DataError>
pub fn try_new_root_auto_unstable<D>(provider: &D) -> Result<Self, DataError>
where
D: DataProvider<LineBreakDataV2Marker>
+ DataProvider<LstmForWordLineAutoV1Marker>
Expand Down Expand Up @@ -428,8 +428,8 @@ impl LineSegmenter {
}

#[cfg(feature = "lstm")]
#[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_lstm)]
pub fn try_new_lstm_unstable<D>(provider: &D) -> Result<Self, DataError>
#[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_root_lstm)]
pub fn try_new_root_lstm_unstable<D>(provider: &D) -> Result<Self, DataError>
where
D: DataProvider<LineBreakDataV2Marker>
+ DataProvider<LstmForWordLineAutoV1Marker>
Expand All @@ -455,8 +455,8 @@ impl LineSegmenter {
Self::new_dictionary_with_options(Default::default())
}

#[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_dictionary)]
pub fn try_new_dictionary_unstable<D>(provider: &D) -> Result<Self, DataError>
#[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_root_dictionary)]
pub fn try_new_root_dictionary_unstable<D>(provider: &D) -> Result<Self, DataError>
where
D: DataProvider<LineBreakDataV2Marker>
+ DataProvider<DictionaryForWordLineExtendedV1Marker>
Expand All @@ -471,7 +471,7 @@ impl LineSegmenter {
///
/// The current behavior, which is subject to change, is to use the LSTM model when available.
///
/// See also [`Self::new_auto`].
/// See also [`Self::new_root_auto`].
///
/// ✨ *Enabled with the `compiled_data` and `auto` Cargo features.*
///
Expand Down Expand Up @@ -515,7 +515,7 @@ impl LineSegmenter {
/// The LSTM, or Long Term Short Memory, is a machine learning model. It is smaller than
/// the full dictionary but more expensive during segmentation (inference).
///
/// See also [`Self::new_dictionary`].
/// See also [`Self::new_root_lstm`].
///
/// ✨ *Enabled with the `compiled_data` and `lstm` Cargo features.*
///
Expand Down Expand Up @@ -569,7 +569,7 @@ impl LineSegmenter {
/// The dictionary model uses a list of words to determine appropriate breakpoints. It is
/// faster than the LSTM model but requires more data.
///
/// See also [`Self::new_dictionary`].
/// See also [`Self::new_root_dictionary`].
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
Expand Down Expand Up @@ -1522,7 +1522,7 @@ mod tests {

#[test]
fn linebreak() {
let segmenter = LineSegmenter::try_new_dictionary_unstable(&crate::provider::Baked)
let segmenter = LineSegmenter::try_new_root_dictionary_unstable(&crate::provider::Baked)
.expect("Data exists");

let mut iter = segmenter.segment_str("hello world");
Expand Down
4 changes: 2 additions & 2 deletions components/segmenter/src/sentence.rs
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,8 @@ impl SentenceSegmenter {
}
}

#[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new)]
pub fn try_new_unstable<D>(provider: &D) -> Result<Self, DataError>
#[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_root)]
pub fn try_new_root_unstable<D>(provider: &D) -> Result<Self, DataError>
where
D: DataProvider<SentenceBreakDataV2Marker> + ?Sized,
{
Expand Down
18 changes: 9 additions & 9 deletions components/segmenter/src/word.rs
Original file line number Diff line number Diff line change
Expand Up @@ -222,8 +222,8 @@ impl WordSegmenter {
}

#[cfg(feature = "auto")]
#[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_auto)]
pub fn try_new_auto_unstable<D>(provider: &D) -> Result<Self, DataError>
#[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_root_auto)]
pub fn try_new_root_auto_unstable<D>(provider: &D) -> Result<Self, DataError>
where
D: DataProvider<WordBreakDataV2Marker>
+ DataProvider<WordBreakDataOverrideV1Marker>
Expand Down Expand Up @@ -252,7 +252,7 @@ impl WordSegmenter {
);

#[cfg(feature = "auto")]
#[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_auto)]
#[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_root_auto)]
pub fn try_new_auto_with_options_unstable<D>(
provider: &D,
options: WordBreakOptions,
Expand Down Expand Up @@ -334,8 +334,8 @@ impl WordSegmenter {
}

#[cfg(feature = "lstm")]
#[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_lstm)]
pub fn try_new_lstm_unstable<D>(provider: &D) -> Result<Self, DataError>
#[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_root_lstm)]
pub fn try_new_root_lstm_unstable<D>(provider: &D) -> Result<Self, DataError>
where
D: DataProvider<WordBreakDataV2Marker>
+ DataProvider<WordBreakDataOverrideV1Marker>
Expand Down Expand Up @@ -363,7 +363,7 @@ impl WordSegmenter {
);

#[cfg(feature = "lstm")]
#[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_lstm)]
#[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_root_lstm)]
pub fn try_new_lstm_with_options_unstable<D>(
provider: &D,
options: WordBreakOptions,
Expand Down Expand Up @@ -437,8 +437,8 @@ impl WordSegmenter {
}
}

#[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_dictionary)]
pub fn try_new_dictionary_unstable<D>(provider: &D) -> Result<Self, DataError>
#[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_root_dictionary)]
pub fn try_new_root_dictionary_unstable<D>(provider: &D) -> Result<Self, DataError>
where
D: DataProvider<WordBreakDataV2Marker>
+ DataProvider<WordBreakDataOverrideV1Marker>
Expand All @@ -465,7 +465,7 @@ impl WordSegmenter {
]
);

#[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_dictionary)]
#[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_root_dictionary)]
pub fn try_new_dictionary_with_options_unstable<D>(
provider: &D,
options: WordBreakOptions,
Expand Down
14 changes: 12 additions & 2 deletions ffi/capi/src/locale_directionality.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,12 @@ pub mod ffi {
)))
}
/// Construct a new LocaleDirectionality instance using compiled data.
#[diplomat::rust_link(icu::locale::LocaleDirectionality::new_with_expander, FnInStruct)]
#[diplomat::rust_link(icu::locale::LocaleDirectionality::new_extended, FnInStruct)]
#[diplomat::rust_link(
icu::locale::LocaleDirectionality::new_with_expander,
FnInStruct,
hidden
)]
#[diplomat::attr(auto, named_constructor = "extended")]
#[cfg(feature = "compiled_data")]
pub fn create_extended() -> Box<LocaleDirectionality> {
Expand All @@ -61,7 +66,12 @@ pub mod ffi {
}

/// Construct a new LocaleDirectionality instance using a particular data source.
#[diplomat::rust_link(icu::locale::LocaleDirectionality::new_with_expander, FnInStruct)]
#[diplomat::rust_link(icu::locale::LocaleDirectionality::new_extended, FnInStruct)]
#[diplomat::rust_link(
icu::locale::LocaleDirectionality::new_with_expander,
FnInStruct,
hidden
)]
#[diplomat::attr(supports = fallible_constructors, named_constructor = "extended_with_provider")]
#[cfg(feature = "buffer_provider")]
pub fn create_extended_with_provider(
Expand Down
6 changes: 5 additions & 1 deletion provider/source/src/segmenter/lstm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,11 @@ mod tests {
provider.as_any_provider(),
);

let segmenter = LineSegmenter::try_new_lstm_with_any_provider(&provider).unwrap();
let segmenter = LineSegmenter::try_new_lstm_with_options_with_any_provider(
&provider,
Default::default(),
)
.unwrap();

const TEST_STR: &str = "ภาษาไทยภาษาไทย";
let utf16: Vec<u16> = TEST_STR.encode_utf16().collect();
Expand Down
2 changes: 1 addition & 1 deletion tutorials/c-tiny/segmenter/test.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
#include <stdio.h>

int main(int argc, char *argv[]) {
LineSegmenter* segmenter = icu4x_LineSegmenter_create_auto_mv1();
LineSegmenter* segmenter = icu4x_LineSegmenter_create_root_auto_mv1();


char output[40];
Expand Down

0 comments on commit 7f186fe

Please sign in to comment.