diff --git a/Sources/Lang.php b/Sources/Lang.php index 6fb284cac52..8d852deac38 100644 --- a/Sources/Lang.php +++ b/Sources/Lang.php @@ -622,6 +622,90 @@ public static function numberFormat(int|float|string $number, ?int $decimals = n self::$thousands_separator, ); } + + /** + * Mapper for old Language names to locales. + * This is the full array. Languages can map to: + * - null: No translation, langauge is removed and no UTF-8 upgrade possible. + * - Same locales: Merging language into the same locale. + * + * This MAY NOT map the same name to multiple locales. + * This is used to support Upgrading from SMF 2.1 and below. + * + * @return array Key/Value array with Key being the old name and value the locale. + */ + public static function oldLanguageMap(): array + { + /** + * Some notes: + * - english_pirate isn't a real language, so we use the _x_ to mark it as a 'private language'. + * - Various languages had 'informal', these are not a locale, so we typically just mapped these to the 'root' + * */ + return [ + 'albanian' => 'sq_AL', + 'arabic' => 'ar_001', // 001 is the region code the whole world, so this means modern standard Arabic. + 'bulgarian' => 'bg_BG', + 'cambodian' => 'km_KH', + 'catalan' => 'ca_ES', + 'chinese-simplified' => 'zh_Hans', // Hans indicates the script rather than a country. + 'chinese-traditional' => 'zh_Hant', // Hant indicates the script rather than a country. + 'croatian' => 'hr_HR', + 'czech' => 'cs_CZ', + 'czech_informal' => 'cs', + 'danish' => 'da_DK', + 'dutch' => 'nl_NL', + 'english' => 'en_US', + 'english_british' => 'en_GB', + 'english_pirate' => 'en_x_pirate', + 'esperanto' => 'eo', + 'finnish' => 'fi_FI', + 'french' => 'fr_FR', + 'galician' => 'gl_ES', + 'german' => 'de_DE', + 'german_informal' => 'de', + 'greek' => 'el_GR', + 'hebrew' => 'he_IL', + 'hungarian' => 'hu_HU', + 'indonesian' => 'id_ID', + 'italian' => 'it_IT', + 'japanese' => 'ja_JP', + 'lithuanian' => 'lt_LT', + 'macedonian' => 'mk_MK', + 'malay' => 'ms_MY', + 'norwegian' => 'nb_NO', + 'persian' => 'fa_IR', + 'polish' => 'pl_PL', + 'portuguese_brazilian' => 'pt_BR', + 'portuguese_pt' => 'pt_PT', + 'romanian' => 'ro_RO', + 'russian' => 'ru_RU', + 'serbian_cyrillic' => 'sr_Cyrl', // Cyrl indicates Cyrillic script. + 'serbian_latin' => 'sr_Latn', // Latn indicates Latin script. + 'slovak' => 'sk_SK', + 'slovenian' => 'sl_SI', + 'spanish_es' => 'es_ES', + 'spanish_latin' => 'es_419', // 419 is the region code for Latin America. + 'swedish' => 'sv_SE', + 'thai' => 'th_TH', + 'turkish' => 'tr_TR', + 'ukrainian' => 'uk_UA', + 'urdu' => 'ur_PK', + 'vietnamese' => 'vi_VN', + ]; + } + + /** + * Given a old language naming, this will return the locale name. + * This is used to support upgrading from SMF 2.1 and below. + * This is also used to support compatibility for customizations. + * + * @param string $lang Language name + * @return ?string Locale is returned if found, null otherwise. + */ + public static function getLocaleFromLanguageName(string $lang): ?string + { + return self::oldLanguageMap()[$lang] ?? null; + } } // Export properties to global namespace for backward compatibility. diff --git a/other/upgrade.php b/other/upgrade.php index 2a03e4b80ac..2e6baa557d8 100644 --- a/other/upgrade.php +++ b/other/upgrade.php @@ -3278,6 +3278,17 @@ function ConvertUtf8() 'yoruba' => 'UTF-8', ]; + // Map in the new locales. We do it like this, because we want to try our best to capture + // the correct charset no mater what the status of the language upgrade is. + foreach ($lang_charsets as $key => $value) { + // This could be more efficient, but its upgrade logic. + $locale = Lang::getLocaleFromLanguageName($key); + + if ($locale !== null) { + $lang_charsets[$locale] = $value; + } + } + // Default to ISO-8859-1 unless we detected another supported charset $upcontext['charset_detected'] = (isset($lang_charsets[Config::$language], $charsets[strtr(strtolower($upcontext['charset_detected']), ['utf' => 'UTF', 'iso' => 'ISO'])])) ? $lang_charsets[Config::$language] : 'ISO-8859-1';