diff options
Diffstat (limited to 'vendor/symfony/polyfill-intl-normalizer/Normalizer.php')
| -rw-r--r-- | vendor/symfony/polyfill-intl-normalizer/Normalizer.php | 310 |
1 files changed, 310 insertions, 0 deletions
diff --git a/vendor/symfony/polyfill-intl-normalizer/Normalizer.php b/vendor/symfony/polyfill-intl-normalizer/Normalizer.php new file mode 100644 index 0000000..81704ab --- /dev/null +++ b/vendor/symfony/polyfill-intl-normalizer/Normalizer.php | |||
| @@ -0,0 +1,310 @@ | |||
| 1 | <?php | ||
| 2 | |||
| 3 | /* | ||
| 4 | * This file is part of the Symfony package. | ||
| 5 | * | ||
| 6 | * (c) Fabien Potencier <fabien@symfony.com> | ||
| 7 | * | ||
| 8 | * For the full copyright and license information, please view the LICENSE | ||
| 9 | * file that was distributed with this source code. | ||
| 10 | */ | ||
| 11 | |||
| 12 | namespace Symfony\Polyfill\Intl\Normalizer; | ||
| 13 | |||
| 14 | /** | ||
| 15 | * Normalizer is a PHP fallback implementation of the Normalizer class provided by the intl extension. | ||
| 16 | * | ||
| 17 | * It has been validated with Unicode 6.3 Normalization Conformance Test. | ||
| 18 | * See http://www.unicode.org/reports/tr15/ for detailed info about Unicode normalizations. | ||
| 19 | * | ||
| 20 | * @author Nicolas Grekas <p@tchwork.com> | ||
| 21 | * | ||
| 22 | * @internal | ||
| 23 | */ | ||
| 24 | class Normalizer | ||
| 25 | { | ||
| 26 | public const FORM_D = \Normalizer::FORM_D; | ||
| 27 | public const FORM_KD = \Normalizer::FORM_KD; | ||
| 28 | public const FORM_C = \Normalizer::FORM_C; | ||
| 29 | public const FORM_KC = \Normalizer::FORM_KC; | ||
| 30 | public const NFD = \Normalizer::NFD; | ||
| 31 | public const NFKD = \Normalizer::NFKD; | ||
| 32 | public const NFC = \Normalizer::NFC; | ||
| 33 | public const NFKC = \Normalizer::NFKC; | ||
| 34 | |||
| 35 | private static $C; | ||
| 36 | private static $D; | ||
| 37 | private static $KD; | ||
| 38 | private static $cC; | ||
| 39 | private static $ulenMask = ["\xC0" => 2, "\xD0" => 2, "\xE0" => 3, "\xF0" => 4]; | ||
| 40 | private static $ASCII = "\x20\x65\x69\x61\x73\x6E\x74\x72\x6F\x6C\x75\x64\x5D\x5B\x63\x6D\x70\x27\x0A\x67\x7C\x68\x76\x2E\x66\x62\x2C\x3A\x3D\x2D\x71\x31\x30\x43\x32\x2A\x79\x78\x29\x28\x4C\x39\x41\x53\x2F\x50\x22\x45\x6A\x4D\x49\x6B\x33\x3E\x35\x54\x3C\x44\x34\x7D\x42\x7B\x38\x46\x77\x52\x36\x37\x55\x47\x4E\x3B\x4A\x7A\x56\x23\x48\x4F\x57\x5F\x26\x21\x4B\x3F\x58\x51\x25\x59\x5C\x09\x5A\x2B\x7E\x5E\x24\x40\x60\x7F\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F"; | ||
| 41 | |||
| 42 | public static function isNormalized(string $s, int $form = self::FORM_C) | ||
| 43 | { | ||
| 44 | if (!\in_array($form, [self::NFD, self::NFKD, self::NFC, self::NFKC])) { | ||
| 45 | return false; | ||
| 46 | } | ||
| 47 | if (!isset($s[strspn($s, self::$ASCII)])) { | ||
| 48 | return true; | ||
| 49 | } | ||
| 50 | if (self::NFC == $form && preg_match('//u', $s) && !preg_match('/[^\x00-\x{2FF}]/u', $s)) { | ||
| 51 | return true; | ||
| 52 | } | ||
| 53 | |||
| 54 | return self::normalize($s, $form) === $s; | ||
| 55 | } | ||
| 56 | |||
| 57 | public static function normalize(string $s, int $form = self::FORM_C) | ||
| 58 | { | ||
| 59 | if (!preg_match('//u', $s)) { | ||
| 60 | return false; | ||
| 61 | } | ||
| 62 | |||
| 63 | switch ($form) { | ||
| 64 | case self::NFC: $C = true; $K = false; break; | ||
| 65 | case self::NFD: $C = false; $K = false; break; | ||
| 66 | case self::NFKC: $C = true; $K = true; break; | ||
| 67 | case self::NFKD: $C = false; $K = true; break; | ||
| 68 | default: | ||
| 69 | if (\defined('Normalizer::NONE') && \Normalizer::NONE == $form) { | ||
| 70 | return $s; | ||
| 71 | } | ||
| 72 | |||
| 73 | if (80000 > \PHP_VERSION_ID) { | ||
| 74 | return false; | ||
| 75 | } | ||
| 76 | |||
| 77 | throw new \ValueError('normalizer_normalize(): Argument #2 ($form) must be a a valid normalization form'); | ||
| 78 | } | ||
| 79 | |||
| 80 | if ('' === $s) { | ||
| 81 | return ''; | ||
| 82 | } | ||
| 83 | |||
| 84 | if ($K && null === self::$KD) { | ||
| 85 | self::$KD = self::getData('compatibilityDecomposition'); | ||
| 86 | } | ||
| 87 | |||
| 88 | if (null === self::$D) { | ||
| 89 | self::$D = self::getData('canonicalDecomposition'); | ||
| 90 | self::$cC = self::getData('combiningClass'); | ||
| 91 | } | ||
| 92 | |||
| 93 | if (null !== $mbEncoding = (2 /* MB_OVERLOAD_STRING */ & (int) \ini_get('mbstring.func_overload')) ? mb_internal_encoding() : null) { | ||
| 94 | mb_internal_encoding('8bit'); | ||
| 95 | } | ||
| 96 | |||
| 97 | $r = self::decompose($s, $K); | ||
| 98 | |||
| 99 | if ($C) { | ||
| 100 | if (null === self::$C) { | ||
| 101 | self::$C = self::getData('canonicalComposition'); | ||
| 102 | } | ||
| 103 | |||
| 104 | $r = self::recompose($r); | ||
| 105 | } | ||
| 106 | if (null !== $mbEncoding) { | ||
| 107 | mb_internal_encoding($mbEncoding); | ||
| 108 | } | ||
| 109 | |||
| 110 | return $r; | ||
| 111 | } | ||
| 112 | |||
| 113 | private static function recompose($s) | ||
| 114 | { | ||
| 115 | $ASCII = self::$ASCII; | ||
| 116 | $compMap = self::$C; | ||
| 117 | $combClass = self::$cC; | ||
| 118 | $ulenMask = self::$ulenMask; | ||
| 119 | |||
| 120 | $result = $tail = ''; | ||
| 121 | |||
| 122 | $i = $s[0] < "\x80" ? 1 : $ulenMask[$s[0] & "\xF0"]; | ||
| 123 | $len = \strlen($s); | ||
| 124 | |||
| 125 | $lastUchr = substr($s, 0, $i); | ||
| 126 | $lastUcls = isset($combClass[$lastUchr]) ? 256 : 0; | ||
| 127 | |||
| 128 | while ($i < $len) { | ||
| 129 | if ($s[$i] < "\x80") { | ||
| 130 | // ASCII chars | ||
| 131 | |||
| 132 | if ($tail) { | ||
| 133 | $lastUchr .= $tail; | ||
| 134 | $tail = ''; | ||
| 135 | } | ||
| 136 | |||
| 137 | if ($j = strspn($s, $ASCII, $i + 1)) { | ||
| 138 | $lastUchr .= substr($s, $i, $j); | ||
| 139 | $i += $j; | ||
| 140 | } | ||
| 141 | |||
| 142 | $result .= $lastUchr; | ||
| 143 | $lastUchr = $s[$i]; | ||
| 144 | $lastUcls = 0; | ||
| 145 | ++$i; | ||
| 146 | continue; | ||
| 147 | } | ||
| 148 | |||
| 149 | $ulen = $ulenMask[$s[$i] & "\xF0"]; | ||
| 150 | $uchr = substr($s, $i, $ulen); | ||
| 151 | |||
| 152 | if ($lastUchr < "\xE1\x84\x80" || "\xE1\x84\x92" < $lastUchr | ||
| 153 | || $uchr < "\xE1\x85\xA1" || "\xE1\x85\xB5" < $uchr | ||
| 154 | || $lastUcls) { | ||
| 155 | // Table lookup and combining chars composition | ||
| 156 | |||
| 157 | $ucls = $combClass[$uchr] ?? 0; | ||
| 158 | |||
| 159 | if (isset($compMap[$lastUchr.$uchr]) && (!$lastUcls || $lastUcls < $ucls)) { | ||
| 160 | $lastUchr = $compMap[$lastUchr.$uchr]; | ||
| 161 | } elseif ($lastUcls = $ucls) { | ||
| 162 | $tail .= $uchr; | ||
| 163 | } else { | ||
| 164 | if ($tail) { | ||
| 165 | $lastUchr .= $tail; | ||
| 166 | $tail = ''; | ||
| 167 | } | ||
| 168 | |||
| 169 | $result .= $lastUchr; | ||
| 170 | $lastUchr = $uchr; | ||
| 171 | } | ||
| 172 | } else { | ||
| 173 | // Hangul chars | ||
| 174 | |||
| 175 | $L = \ord($lastUchr[2]) - 0x80; | ||
| 176 | $V = \ord($uchr[2]) - 0xA1; | ||
| 177 | $T = 0; | ||
| 178 | |||
| 179 | $uchr = substr($s, $i + $ulen, 3); | ||
| 180 | |||
| 181 | if ("\xE1\x86\xA7" <= $uchr && $uchr <= "\xE1\x87\x82") { | ||
| 182 | $T = \ord($uchr[2]) - 0xA7; | ||
| 183 | 0 > $T && $T += 0x40; | ||
| 184 | $ulen += 3; | ||
| 185 | } | ||
| 186 | |||
| 187 | $L = 0xAC00 + ($L * 21 + $V) * 28 + $T; | ||
| 188 | $lastUchr = \chr(0xE0 | $L >> 12).\chr(0x80 | $L >> 6 & 0x3F).\chr(0x80 | $L & 0x3F); | ||
| 189 | } | ||
| 190 | |||
| 191 | $i += $ulen; | ||
| 192 | } | ||
| 193 | |||
| 194 | return $result.$lastUchr.$tail; | ||
| 195 | } | ||
| 196 | |||
| 197 | private static function decompose($s, $c) | ||
| 198 | { | ||
| 199 | $result = ''; | ||
| 200 | |||
| 201 | $ASCII = self::$ASCII; | ||
| 202 | $decompMap = self::$D; | ||
| 203 | $combClass = self::$cC; | ||
| 204 | $ulenMask = self::$ulenMask; | ||
| 205 | if ($c) { | ||
| 206 | $compatMap = self::$KD; | ||
| 207 | } | ||
| 208 | |||
| 209 | $c = []; | ||
| 210 | $i = 0; | ||
| 211 | $len = \strlen($s); | ||
| 212 | |||
| 213 | while ($i < $len) { | ||
| 214 | if ($s[$i] < "\x80") { | ||
| 215 | // ASCII chars | ||
| 216 | |||
| 217 | if ($c) { | ||
| 218 | ksort($c); | ||
| 219 | $result .= implode('', $c); | ||
| 220 | $c = []; | ||
| 221 | } | ||
| 222 | |||
| 223 | $j = 1 + strspn($s, $ASCII, $i + 1); | ||
| 224 | $result .= substr($s, $i, $j); | ||
| 225 | $i += $j; | ||
| 226 | continue; | ||
| 227 | } | ||
| 228 | |||
| 229 | $ulen = $ulenMask[$s[$i] & "\xF0"]; | ||
| 230 | $uchr = substr($s, $i, $ulen); | ||
| 231 | $i += $ulen; | ||
| 232 | |||
| 233 | if ($uchr < "\xEA\xB0\x80" || "\xED\x9E\xA3" < $uchr) { | ||
| 234 | // Table lookup | ||
| 235 | |||
| 236 | if ($uchr !== $j = $compatMap[$uchr] ?? ($decompMap[$uchr] ?? $uchr)) { | ||
| 237 | $uchr = $j; | ||
| 238 | |||
| 239 | $j = \strlen($uchr); | ||
| 240 | $ulen = $uchr[0] < "\x80" ? 1 : $ulenMask[$uchr[0] & "\xF0"]; | ||
| 241 | |||
| 242 | if ($ulen != $j) { | ||
| 243 | // Put trailing chars in $s | ||
| 244 | |||
| 245 | $j -= $ulen; | ||
| 246 | $i -= $j; | ||
| 247 | |||
| 248 | if (0 > $i) { | ||
| 249 | $s = str_repeat(' ', -$i).$s; | ||
| 250 | $len -= $i; | ||
| 251 | $i = 0; | ||
| 252 | } | ||
| 253 | |||
| 254 | while ($j--) { | ||
| 255 | $s[$i + $j] = $uchr[$ulen + $j]; | ||
| 256 | } | ||
| 257 | |||
| 258 | $uchr = substr($uchr, 0, $ulen); | ||
| 259 | } | ||
| 260 | } | ||
| 261 | if (isset($combClass[$uchr])) { | ||
| 262 | // Combining chars, for sorting | ||
| 263 | |||
| 264 | if (!isset($c[$combClass[$uchr]])) { | ||
| 265 | $c[$combClass[$uchr]] = ''; | ||
| 266 | } | ||
| 267 | $c[$combClass[$uchr]] .= $uchr; | ||
| 268 | continue; | ||
| 269 | } | ||
| 270 | } else { | ||
| 271 | // Hangul chars | ||
| 272 | |||
| 273 | $uchr = unpack('C*', $uchr); | ||
| 274 | $j = (($uchr[1] - 224) << 12) + (($uchr[2] - 128) << 6) + $uchr[3] - 0xAC80; | ||
| 275 | |||
| 276 | $uchr = "\xE1\x84".\chr(0x80 + (int) ($j / 588)) | ||
| 277 | ."\xE1\x85".\chr(0xA1 + (int) (($j % 588) / 28)); | ||
| 278 | |||
| 279 | if ($j %= 28) { | ||
| 280 | $uchr .= $j < 25 | ||
| 281 | ? ("\xE1\x86".\chr(0xA7 + $j)) | ||
| 282 | : ("\xE1\x87".\chr(0x67 + $j)); | ||
| 283 | } | ||
| 284 | } | ||
| 285 | if ($c) { | ||
| 286 | ksort($c); | ||
| 287 | $result .= implode('', $c); | ||
| 288 | $c = []; | ||
| 289 | } | ||
| 290 | |||
| 291 | $result .= $uchr; | ||
| 292 | } | ||
| 293 | |||
| 294 | if ($c) { | ||
| 295 | ksort($c); | ||
| 296 | $result .= implode('', $c); | ||
| 297 | } | ||
| 298 | |||
| 299 | return $result; | ||
| 300 | } | ||
| 301 | |||
| 302 | private static function getData($file) | ||
| 303 | { | ||
| 304 | if (file_exists($file = __DIR__.'/Resources/unidata/'.$file.'.php')) { | ||
| 305 | return require $file; | ||
| 306 | } | ||
| 307 | |||
| 308 | return false; | ||
| 309 | } | ||
| 310 | } | ||
