Skip to content

Commit

Permalink
Add some logic so the upgrader can know how to handle the new locale …
Browse files Browse the repository at this point in the history
…for UTF-8 migration.
  • Loading branch information
jdarwood007 committed Jan 27, 2024
1 parent 13ae296 commit d20300b
Show file tree
Hide file tree
Showing 2 changed files with 95 additions and 0 deletions.
84 changes: 84 additions & 0 deletions Sources/Lang.php
Original file line number Diff line number Diff line change
Expand Up @@ -622,6 +622,90 @@ public static function numberFormat(int|float|string $number, ?int $decimals = n
self::$thousands_separator,
);
}

/**
* Mapper for old Language names to locales.
* This is the full array. Languages can map to:
* - null: No translation, langauge is removed and no UTF-8 upgrade possible.
* - Same locales: Merging language into the same locale.
*
* This MAY NOT map the same name to multiple locales.
* This is used to support Upgrading from SMF 2.1 and below.
*
* @return array Key/Value array with Key being the old name and value the locale.
*/
public static function oldLanguageMap(): array
{
/**
* Some notes:
* - english_pirate isn't a real language, so we use the _x_ to mark it as a 'private language'.
* - Various languages had 'informal', these are not a locale, so we typically just mapped these to the 'root'
* */
return [
'albanian' => 'sq_AL',
'arabic' => 'ar_001', // 001 is the region code the whole world, so this means modern standard Arabic.
'bulgarian' => 'bg_BG',
'cambodian' => 'km_KH',
'catalan' => 'ca_ES',
'chinese-simplified' => 'zh_Hans', // Hans indicates the script rather than a country.
'chinese-traditional' => 'zh_Hant', // Hant indicates the script rather than a country.
'croatian' => 'hr_HR',
'czech' => 'cs_CZ',
'czech_informal' => 'cs',
'danish' => 'da_DK',
'dutch' => 'nl_NL',
'english' => 'en_US',
'english_british' => 'en_GB',
'english_pirate' => 'en_x_pirate',
'esperanto' => 'eo',
'finnish' => 'fi_FI',
'french' => 'fr_FR',
'galician' => 'gl_ES',
'german' => 'de_DE',
'german_informal' => 'de',
'greek' => 'el_GR',
'hebrew' => 'he_IL',
'hungarian' => 'hu_HU',
'indonesian' => 'id_ID',
'italian' => 'it_IT',
'japanese' => 'ja_JP',
'lithuanian' => 'lt_LT',
'macedonian' => 'mk_MK',
'malay' => 'ms_MY',
'norwegian' => 'nb_NO',
'persian' => 'fa_IR',
'polish' => 'pl_PL',
'portuguese_brazilian' => 'pt_BR',
'portuguese_pt' => 'pt_PT',
'romanian' => 'ro_RO',
'russian' => 'ru_RU',
'serbian_cyrillic' => 'sr_Cyrl', // Cyrl indicates Cyrillic script.
'serbian_latin' => 'sr_Latn', // Latn indicates Latin script.
'slovak' => 'sk_SK',
'slovenian' => 'sl_SI',
'spanish_es' => 'es_ES',
'spanish_latin' => 'es_419', // 419 is the region code for Latin America.
'swedish' => 'sv_SE',
'thai' => 'th_TH',
'turkish' => 'tr_TR',
'ukrainian' => 'uk_UA',
'urdu' => 'ur_PK',
'vietnamese' => 'vi_VN',
];
}

/**
* Given a old language naming, this will return the locale name.
* This is used to support upgrading from SMF 2.1 and below.
* This is also used to support compatibility for customizations.
*
* @param string $lang Language name
* @return ?string Locale is returned if found, null otherwise.
*/
public static function getLocaleFromLanguageName(string $lang): ?string
{
return self::oldLanguageMap()[$lang] ?? null;
}
}

// Export properties to global namespace for backward compatibility.
Expand Down
11 changes: 11 additions & 0 deletions other/upgrade.php
Original file line number Diff line number Diff line change
Expand Up @@ -3278,6 +3278,17 @@ function ConvertUtf8()
'yoruba' => 'UTF-8',
];

// Map in the new locales. We do it like this, because we want to try our best to capture
// the correct charset no mater what the status of the language upgrade is.
foreach ($lang_charsets as $key => $value) {
// This could be more efficient, but its upgrade logic.
$locale = Lang::getLocaleFromLanguageName($key);

if ($locale !== null) {
$lang_charsets[$locale] = $value;
}
}

// Default to ISO-8859-1 unless we detected another supported charset
$upcontext['charset_detected'] = (isset($lang_charsets[Config::$language], $charsets[strtr(strtolower($upcontext['charset_detected']), ['utf' => 'UTF', 'iso' => 'ISO'])])) ? $lang_charsets[Config::$language] : 'ISO-8859-1';

Expand Down

0 comments on commit d20300b

Please sign in to comment.