diff --git a/Sources/Unicode/CaseFold.php b/Sources/Unicode/CaseFold.php index 24d097936e..52befa30df 100644 --- a/Sources/Unicode/CaseFold.php +++ b/Sources/Unicode/CaseFold.php @@ -815,10 +815,12 @@ function utf8_casefold_simple_maps() "\xE1\xBF\x8A" => "\xE1\xBD\xB4", "\xE1\xBF\x8B" => "\xE1\xBD\xB5", "\xE1\xBF\x8C" => "\xE1\xBF\x83", + "\xE1\xBF\x93" => "\xCE\x90", "\xE1\xBF\x98" => "\xE1\xBF\x90", "\xE1\xBF\x99" => "\xE1\xBF\x91", "\xE1\xBF\x9A" => "\xE1\xBD\xB6", "\xE1\xBF\x9B" => "\xE1\xBD\xB7", + "\xE1\xBF\xA3" => "\xCE\xB0", "\xE1\xBF\xA8" => "\xE1\xBF\xA0", "\xE1\xBF\xA9" => "\xE1\xBF\xA1", "\xE1\xBF\xAA" => "\xE1\xBD\xBA", @@ -1195,6 +1197,7 @@ function utf8_casefold_simple_maps() "\xEA\xAE\xBD" => "\xE1\x8F\xAD", "\xEA\xAE\xBE" => "\xE1\x8F\xAE", "\xEA\xAE\xBF" => "\xE1\x8F\xAF", + "\xEF\xAC\x85" => "\xEF\xAC\x86", "\xEF\xBC\xA1" => "\xEF\xBD\x81", "\xEF\xBC\xA2" => "\xEF\xBD\x82", "\xEF\xBC\xA3" => "\xEF\xBD\x83", diff --git a/Sources/Unicode/Idna.php b/Sources/Unicode/Idna.php index 5535d743f6..b4da7789d9 100644 --- a/Sources/Unicode/Idna.php +++ b/Sources/Unicode/Idna.php @@ -831,7 +831,7 @@ function idna_maps() "\xE1\xBA\x94" => "\xE1\xBA\x95", "\xE1\xBA\x9A" => "\x61\xCA\xBE", "\xE1\xBA\x9B" => "\xE1\xB9\xA1", - "\xE1\xBA\x9E" => "\x73\x73", + "\xE1\xBA\x9E" => "\xC3\x9F", "\xE1\xBA\xA0" => "\xE1\xBA\xA1", "\xE1\xBA\xA2" => "\xE1\xBA\xA3", "\xE1\xBA\xA4" => "\xE1\xBA\xA5", @@ -6369,8 +6369,6 @@ function idna_regex() '\x{2101}' . '\x{2105}' . '\x{2106}' . - '\x{2260}' . - '\x{226E}-\x{226F}' . '\x{2474}' . '\x{2475}' . '\x{2476}' . @@ -6911,7 +6909,8 @@ function idna_regex() '\x{3130}' . '\x{3164}' . '\x{318F}' . - '\x{31E4}-\x{31EF}' . + '\x{31E4}-\x{31EE}' . + '\x{31EF}' . '\x{321F}' . '\x{33C2}' . '\x{33C7}' . @@ -7353,7 +7352,8 @@ function idna_regex() '\x{2B73A}-\x{2B73F}' . '\x{2B81E}-\x{2B81F}' . '\x{2CEA2}-\x{2CEAF}' . - '\x{2EBE1}-\x{2F7FF}' . + '\x{2EBE1}-\x{2EBEF}' . + '\x{2EE5E}-\x{2F7FF}' . '\x{2F868}' . '\x{2F874}' . '\x{2F91F}' . diff --git a/Sources/Unicode/Metadata.php b/Sources/Unicode/Metadata.php index 2ccf662812..1ca474c0c2 100644 --- a/Sources/Unicode/Metadata.php +++ b/Sources/Unicode/Metadata.php @@ -17,6 +17,6 @@ die('No direct access...'); if (!defined('SMF_UNICODE_VERSION')) - define('SMF_UNICODE_VERSION', '15.0.0.0'); + define('SMF_UNICODE_VERSION', '15.1.0.0'); ?> \ No newline at end of file diff --git a/Sources/Unicode/RegularExpressions.php b/Sources/Unicode/RegularExpressions.php index a34987d6f0..055e4be3fa 100644 --- a/Sources/Unicode/RegularExpressions.php +++ b/Sources/Unicode/RegularExpressions.php @@ -800,13 +800,12 @@ function utf8_regex_properties() '\x{2E9A}' . '\x{2EF4}-\x{2EFF}' . '\x{2FD6}-\x{2FEF}' . - '\x{2FFC}-\x{2FFF}' . '\x{3040}' . '\x{3097}-\x{3098}' . '\x{3100}-\x{3104}' . '\x{3130}' . '\x{318F}' . - '\x{31E4}-\x{31EF}' . + '\x{31E4}-\x{31EE}' . '\x{321F}' . '\x{A48D}-\x{A48F}' . '\x{A4C7}-\x{A4CF}' . @@ -1229,7 +1228,8 @@ function utf8_regex_properties() '\x{2B73A}-\x{2B73F}' . '\x{2B81E}-\x{2B81F}' . '\x{2CEA2}-\x{2CEAF}' . - '\x{2EBE1}-\x{2F7FF}' . + '\x{2EBE1}-\x{2EBEF}' . + '\x{2EE5E}-\x{2F7FF}' . '\x{2FA1E}-\x{2FFFF}' . '\x{3134B}-\x{3134F}' . '\x{323B0}-\x{E0000}' . @@ -1692,6 +1692,7 @@ function utf8_regex_properties() '\x{2B740}-\x{2B81D}' . '\x{2B820}-\x{2CEA1}' . '\x{2CEB0}-\x{2EBE0}' . + '\x{2EBF0}-\x{2EE5D}' . '\x{2F800}-\x{2FA1D}' . '\x{30000}-\x{3134A}' . '\x{31350}-\x{323AF}', @@ -1737,9 +1738,7 @@ function utf8_regex_variation_selectors() '\x{231A}-\x{231B}' . '\x{2328}' . '\x{23CF}' . - '\x{23E9}-\x{23EA}' . - '\x{23ED}-\x{23EF}' . - '\x{23F1}-\x{23F3}' . + '\x{23E9}-\x{23F3}' . '\x{23F8}-\x{23FA}' . '\x{24C2}' . '\x{25AA}-\x{25AB}' . @@ -1777,7 +1776,7 @@ function utf8_regex_variation_selectors() '\x{26BD}-\x{26BE}' . '\x{26C4}-\x{26C5}' . '\x{26C8}' . - '\x{26CF}' . + '\x{26CE}-\x{26CF}' . '\x{26D1}' . '\x{26D3}-\x{26D4}' . '\x{26E9}-\x{26EA}' . @@ -1785,21 +1784,27 @@ function utf8_regex_variation_selectors() '\x{26F7}-\x{26FA}' . '\x{26FD}' . '\x{2702}' . - '\x{2708}-\x{2709}' . - '\x{270C}-\x{270D}' . + '\x{2705}' . + '\x{2708}-\x{270D}' . '\x{270F}' . '\x{2712}' . '\x{2714}' . '\x{2716}' . '\x{271D}' . '\x{2721}' . + '\x{2728}' . '\x{2733}-\x{2734}' . '\x{2744}' . '\x{2747}' . - '\x{2753}' . + '\x{274C}' . + '\x{274E}' . + '\x{2753}-\x{2755}' . '\x{2757}' . '\x{2763}-\x{2764}' . + '\x{2795}-\x{2797}' . '\x{27A1}' . + '\x{27B0}' . + '\x{27BF}' . '\x{2934}-\x{2935}' . '\x{2B05}-\x{2B07}' . '\x{2B1B}-\x{2B1C}' . @@ -3183,17 +3188,9 @@ function utf8_regex_joining_type() '\x{0711}' . '\x{0730}-\x{074A}', ), - 'Adlam' => array( - 'Join_Causing' => - '\x{0640}', - 'Dual_Joining' => - '\x{1E900}-\x{1E943}', - 'Transparent' => - '\x{1E944}-\x{1E94A}' . - '\x{1E94B}', - ), 'Tirhuta' => array( 'Dual_Joining' => + '\x{A840}-\x{A871}' . '\x{A840}-\x{A871}', 'Transparent' => '\x{0951}-\x{0957}' . @@ -3202,6 +3199,15 @@ function utf8_regex_joining_type() '\x{114BF}-\x{114C0}' . '\x{114C2}-\x{114C3}', ), + 'Adlam' => array( + 'Join_Causing' => + '\x{0640}', + 'Dual_Joining' => + '\x{1E900}-\x{1E943}', + 'Transparent' => + '\x{1E944}-\x{1E94A}' . + '\x{1E94B}', + ), 'Nko' => array( 'Join_Causing' => '\x{07FA}', @@ -3536,6 +3542,7 @@ function utf8_regex_indic() '\x{0D54}-\x{0D63}' . '\x{0D66}-\x{0D7F}' . '\x{1CDA}' . + '\x{1CF2}' . '\x{A838}', 'Letter' => '\x{0D04}-\x{0D0C}' . @@ -3545,7 +3552,8 @@ function utf8_regex_indic() '\x{0D4E}' . '\x{0D54}-\x{0D56}' . '\x{0D5F}-\x{0D61}' . - '\x{0D7A}-\x{0D7F}', + '\x{0D7A}-\x{0D7F}' . + '\x{1CF2}', 'Nonspacing_Combining_Mark' => '\x{0951}-\x{0952}' . '\x{0D3B}-\x{0D3C}' . @@ -3669,13 +3677,15 @@ function utf8_regex_indic() '\x{0DD8}-\x{0DDF}' . '\x{0DE6}-\x{0DEF}' . '\x{0DF2}-\x{0DF4}' . + '\x{1CF2}' . '\x{111E1}-\x{111F4}', 'Letter' => '\x{0D85}-\x{0D96}' . '\x{0D9A}-\x{0DB1}' . '\x{0DB3}-\x{0DBB}' . '\x{0DBD}' . - '\x{0DC0}-\x{0DC6}', + '\x{0DC0}-\x{0DC6}' . + '\x{1CF2}', 'Nonspacing_Combining_Mark' => '\x{0DCA}', 'Nonspacing_Mark' => @@ -3970,6 +3980,9 @@ function utf8_regex_indic() '\x{1CD9}' . '\x{1CDD}' . '\x{1CE0}' . + '\x{A838}' . + '\x{A83B}' . + '\x{A83D}' . '\x{11180}-\x{111DF}', 'Letter' => '\x{11183}-\x{111B2}' . @@ -4130,8 +4143,8 @@ function utf8_regex_indic() '\x{0964}-\x{0965}' . '\x{1CF2}' . '\x{A838}-\x{A839}' . - '\x{A83D}' . - '\x{A83F}-\x{A840}' . + '\x{A83E}' . + '\x{A840}' . '\x{11480}-\x{114C7}' . '\x{114D0}-\x{114D9}', 'Letter' => @@ -4260,8 +4273,8 @@ function utf8_regex_indic() 'All' => '\x{0964}-\x{0965}' . '\x{A838}-\x{A839}' . - '\x{A83C}' . - '\x{A83E}-\x{A83F}' . + '\x{A83D}' . + '\x{A83F}' . '\x{11680}-\x{116B9}' . '\x{116C0}-\x{116C9}', 'Letter' => diff --git a/Sources/tasks/UpdateUnicode.php b/Sources/tasks/UpdateUnicode.php index bd1b6317f2..89dc669502 100644 --- a/Sources/tasks/UpdateUnicode.php +++ b/Sources/tasks/UpdateUnicode.php @@ -1497,7 +1497,7 @@ private function build_regex_variation_selectors() foreach ($this->funcs['utf8_regex_variation_selectors']['data'] as $variation_selector => $class_string) { - $this->funcs['utf8_regex_variation_selectors']['data'][$variation_selector] = preg_split('/(?<=})(?=\\\x{)/', $class_string); + $this->funcs['utf8_regex_variation_selectors']['data'][$variation_selector] = array_unique(preg_split('/(?<=})(?=\\\x{)/', $class_string)); } krsort($this->funcs['utf8_regex_variation_selectors']['data']); @@ -1815,7 +1815,7 @@ private function build_regex_joining_type() return $a['stats']['age'] - $b['stats']['age']; } }); - foreach ($this->funcs['utf8_regex_joining_type']['data'] as $char_script => $joining_types) + foreach ($this->funcs['utf8_regex_joining_type']['data'] as $char_script => &$joining_types) { unset($this->funcs['utf8_regex_joining_type']['data'][$char_script]['stats'], $joining_types['stats']); @@ -1826,7 +1826,7 @@ private function build_regex_joining_type() continue; } - foreach ($joining_types as $joining_type => $value) + foreach ($joining_types as $joining_type => &$value) { sort($value); } @@ -2001,7 +2001,7 @@ private function build_regex_indic() } } - $this->funcs['utf8_regex_indic']['data'][$char_script][$insc] = preg_split('/(?<=})(?=\\\x{)/', $class_string); + $this->funcs['utf8_regex_indic']['data'][$char_script][$insc] = array_unique(preg_split('/(?<=})(?=\\\x{)/', $class_string)); } ksort($this->funcs['utf8_regex_indic']['data'][$char_script]);