diff options
Diffstat (limited to 'vendor/symfony/string/AbstractUnicodeString.php')
| -rw-r--r-- | vendor/symfony/string/AbstractUnicodeString.php | 664 |
1 files changed, 664 insertions, 0 deletions
diff --git a/vendor/symfony/string/AbstractUnicodeString.php b/vendor/symfony/string/AbstractUnicodeString.php new file mode 100644 index 0000000..2cb2917 --- /dev/null +++ b/vendor/symfony/string/AbstractUnicodeString.php | |||
| @@ -0,0 +1,664 @@ | |||
| 1 | <?php | ||
| 2 | |||
| 3 | /* | ||
| 4 | * This file is part of the Symfony package. | ||
| 5 | * | ||
| 6 | * (c) Fabien Potencier <fabien@symfony.com> | ||
| 7 | * | ||
| 8 | * For the full copyright and license information, please view the LICENSE | ||
| 9 | * file that was distributed with this source code. | ||
| 10 | */ | ||
| 11 | |||
| 12 | namespace Symfony\Component\String; | ||
| 13 | |||
| 14 | use Symfony\Component\String\Exception\ExceptionInterface; | ||
| 15 | use Symfony\Component\String\Exception\InvalidArgumentException; | ||
| 16 | use Symfony\Component\String\Exception\RuntimeException; | ||
| 17 | |||
| 18 | /** | ||
| 19 | * Represents a string of abstract Unicode characters. | ||
| 20 | * | ||
| 21 | * Unicode defines 3 types of "characters" (bytes, code points and grapheme clusters). | ||
| 22 | * This class is the abstract type to use as a type-hint when the logic you want to | ||
| 23 | * implement is Unicode-aware but doesn't care about code points vs grapheme clusters. | ||
| 24 | * | ||
| 25 | * @author Nicolas Grekas <p@tchwork.com> | ||
| 26 | * | ||
| 27 | * @throws ExceptionInterface | ||
| 28 | */ | ||
| 29 | abstract class AbstractUnicodeString extends AbstractString | ||
| 30 | { | ||
| 31 | public const NFC = \Normalizer::NFC; | ||
| 32 | public const NFD = \Normalizer::NFD; | ||
| 33 | public const NFKC = \Normalizer::NFKC; | ||
| 34 | public const NFKD = \Normalizer::NFKD; | ||
| 35 | |||
| 36 | // all ASCII letters sorted by typical frequency of occurrence | ||
| 37 | private const ASCII = "\x20\x65\x69\x61\x73\x6E\x74\x72\x6F\x6C\x75\x64\x5D\x5B\x63\x6D\x70\x27\x0A\x67\x7C\x68\x76\x2E\x66\x62\x2C\x3A\x3D\x2D\x71\x31\x30\x43\x32\x2A\x79\x78\x29\x28\x4C\x39\x41\x53\x2F\x50\x22\x45\x6A\x4D\x49\x6B\x33\x3E\x35\x54\x3C\x44\x34\x7D\x42\x7B\x38\x46\x77\x52\x36\x37\x55\x47\x4E\x3B\x4A\x7A\x56\x23\x48\x4F\x57\x5F\x26\x21\x4B\x3F\x58\x51\x25\x59\x5C\x09\x5A\x2B\x7E\x5E\x24\x40\x60\x7F\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F"; | ||
| 38 | |||
| 39 | // the subset of folded case mappings that is not in lower case mappings | ||
| 40 | private const FOLD_FROM = ['İ', 'µ', 'ſ', "\xCD\x85", 'ς', 'ϐ', 'ϑ', 'ϕ', 'ϖ', 'ϰ', 'ϱ', 'ϵ', 'ẛ', "\xE1\xBE\xBE", 'ß', 'ʼn', 'ǰ', 'ΐ', 'ΰ', 'և', 'ẖ', 'ẗ', 'ẘ', 'ẙ', 'ẚ', 'ẞ', 'ὐ', 'ὒ', 'ὔ', 'ὖ', 'ᾀ', 'ᾁ', 'ᾂ', 'ᾃ', 'ᾄ', 'ᾅ', 'ᾆ', 'ᾇ', 'ᾈ', 'ᾉ', 'ᾊ', 'ᾋ', 'ᾌ', 'ᾍ', 'ᾎ', 'ᾏ', 'ᾐ', 'ᾑ', 'ᾒ', 'ᾓ', 'ᾔ', 'ᾕ', 'ᾖ', 'ᾗ', 'ᾘ', 'ᾙ', 'ᾚ', 'ᾛ', 'ᾜ', 'ᾝ', 'ᾞ', 'ᾟ', 'ᾠ', 'ᾡ', 'ᾢ', 'ᾣ', 'ᾤ', 'ᾥ', 'ᾦ', 'ᾧ', 'ᾨ', 'ᾩ', 'ᾪ', 'ᾫ', 'ᾬ', 'ᾭ', 'ᾮ', 'ᾯ', 'ᾲ', 'ᾳ', 'ᾴ', 'ᾶ', 'ᾷ', 'ᾼ', 'ῂ', 'ῃ', 'ῄ', 'ῆ', 'ῇ', 'ῌ', 'ῒ', 'ῖ', 'ῗ', 'ῢ', 'ῤ', 'ῦ', 'ῧ', 'ῲ', 'ῳ', 'ῴ', 'ῶ', 'ῷ', 'ῼ', 'ff', 'fi', 'fl', 'ffi', 'ffl', 'ſt', 'st', 'ﬓ', 'ﬔ', 'ﬕ', 'ﬖ', 'ﬗ']; | ||
| 41 | private const FOLD_TO = ['i̇', 'μ', 's', 'ι', 'σ', 'β', 'θ', 'φ', 'π', 'κ', 'ρ', 'ε', 'ṡ', 'ι', 'ss', 'ʼn', 'ǰ', 'ΐ', 'ΰ', 'եւ', 'ẖ', 'ẗ', 'ẘ', 'ẙ', 'aʾ', 'ss', 'ὐ', 'ὒ', 'ὔ', 'ὖ', 'ἀι', 'ἁι', 'ἂι', 'ἃι', 'ἄι', 'ἅι', 'ἆι', 'ἇι', 'ἀι', 'ἁι', 'ἂι', 'ἃι', 'ἄι', 'ἅι', 'ἆι', 'ἇι', 'ἠι', 'ἡι', 'ἢι', 'ἣι', 'ἤι', 'ἥι', 'ἦι', 'ἧι', 'ἠι', 'ἡι', 'ἢι', 'ἣι', 'ἤι', 'ἥι', 'ἦι', 'ἧι', 'ὠι', 'ὡι', 'ὢι', 'ὣι', 'ὤι', 'ὥι', 'ὦι', 'ὧι', 'ὠι', 'ὡι', 'ὢι', 'ὣι', 'ὤι', 'ὥι', 'ὦι', 'ὧι', 'ὰι', 'αι', 'άι', 'ᾶ', 'ᾶι', 'αι', 'ὴι', 'ηι', 'ήι', 'ῆ', 'ῆι', 'ηι', 'ῒ', 'ῖ', 'ῗ', 'ῢ', 'ῤ', 'ῦ', 'ῧ', 'ὼι', 'ωι', 'ώι', 'ῶ', 'ῶι', 'ωι', 'ff', 'fi', 'fl', 'ffi', 'ffl', 'st', 'st', 'մն', 'մե', 'մի', 'վն', 'մխ']; | ||
| 42 | |||
| 43 | // the subset of https://github.com/unicode-org/cldr/blob/master/common/transforms/Latin-ASCII.xml that is not in NFKD | ||
| 44 | private const TRANSLIT_FROM = ['Æ', 'Ð', 'Ø', 'Þ', 'ß', 'æ', 'ð', 'ø', 'þ', 'Đ', 'đ', 'Ħ', 'ħ', 'ı', 'ĸ', 'Ŀ', 'ŀ', 'Ł', 'ł', 'ʼn', 'Ŋ', 'ŋ', 'Œ', 'œ', 'Ŧ', 'ŧ', 'ƀ', 'Ɓ', 'Ƃ', 'ƃ', 'Ƈ', 'ƈ', 'Ɖ', 'Ɗ', 'Ƌ', 'ƌ', 'Ɛ', 'Ƒ', 'ƒ', 'Ɠ', 'ƕ', 'Ɩ', 'Ɨ', 'Ƙ', 'ƙ', 'ƚ', 'Ɲ', 'ƞ', 'Ƣ', 'ƣ', 'Ƥ', 'ƥ', 'ƫ', 'Ƭ', 'ƭ', 'Ʈ', 'Ʋ', 'Ƴ', 'ƴ', 'Ƶ', 'ƶ', 'DŽ', 'Dž', 'dž', 'Ǥ', 'ǥ', 'ȡ', 'Ȥ', 'ȥ', 'ȴ', 'ȵ', 'ȶ', 'ȷ', 'ȸ', 'ȹ', 'Ⱥ', 'Ȼ', 'ȼ', 'Ƚ', 'Ⱦ', 'ȿ', 'ɀ', 'Ƀ', 'Ʉ', 'Ɇ', 'ɇ', 'Ɉ', 'ɉ', 'Ɍ', 'ɍ', 'Ɏ', 'ɏ', 'ɓ', 'ɕ', 'ɖ', 'ɗ', 'ɛ', 'ɟ', 'ɠ', 'ɡ', 'ɢ', 'ɦ', 'ɧ', 'ɨ', 'ɪ', 'ɫ', 'ɬ', 'ɭ', 'ɱ', 'ɲ', 'ɳ', 'ɴ', 'ɶ', 'ɼ', 'ɽ', 'ɾ', 'ʀ', 'ʂ', 'ʈ', 'ʉ', 'ʋ', 'ʏ', 'ʐ', 'ʑ', 'ʙ', 'ʛ', 'ʜ', 'ʝ', 'ʟ', 'ʠ', 'ʣ', 'ʥ', 'ʦ', 'ʪ', 'ʫ', 'ᴀ', 'ᴁ', 'ᴃ', 'ᴄ', 'ᴅ', 'ᴆ', 'ᴇ', 'ᴊ', 'ᴋ', 'ᴌ', 'ᴍ', 'ᴏ', 'ᴘ', 'ᴛ', 'ᴜ', 'ᴠ', 'ᴡ', 'ᴢ', 'ᵫ', 'ᵬ', 'ᵭ', 'ᵮ', 'ᵯ', 'ᵰ', 'ᵱ', 'ᵲ', 'ᵳ', 'ᵴ', 'ᵵ', 'ᵶ', 'ᵺ', 'ᵻ', 'ᵽ', 'ᵾ', 'ᶀ', 'ᶁ', 'ᶂ', 'ᶃ', 'ᶄ', 'ᶅ', 'ᶆ', 'ᶇ', 'ᶈ', 'ᶉ', 'ᶊ', 'ᶌ', 'ᶍ', 'ᶎ', 'ᶏ', 'ᶑ', 'ᶒ', 'ᶓ', 'ᶖ', 'ᶙ', 'ẚ', 'ẜ', 'ẝ', 'ẞ', 'Ỻ', 'ỻ', 'Ỽ', 'ỽ', 'Ỿ', 'ỿ', '©', '®', '₠', '₢', '₣', '₤', '₧', '₺', '₹', 'ℌ', '℞', '㎧', '㎮', '㏆', '㏗', '㏞', '㏟', '¼', '½', '¾', '⅓', '⅔', '⅕', '⅖', '⅗', '⅘', '⅙', '⅚', '⅛', '⅜', '⅝', '⅞', '⅟', '〇', '‘', '’', '‚', '‛', '“', '”', '„', '‟', '′', '″', '〝', '〞', '«', '»', '‹', '›', '‐', '‑', '‒', '–', '—', '―', '︱', '︲', '﹘', '‖', '⁄', '⁅', '⁆', '⁎', '、', '。', '〈', '〉', '《', '》', '〔', '〕', '〘', '〙', '〚', '〛', '︑', '︒', '︹', '︺', '︽', '︾', '︿', '﹀', '﹑', '﹝', '﹞', '⦅', '⦆', '。', '、', '×', '÷', '−', '∕', '∖', '∣', '∥', '≪', '≫', '⦅', '⦆']; | ||
| 45 | private const TRANSLIT_TO = ['AE', 'D', 'O', 'TH', 'ss', 'ae', 'd', 'o', 'th', 'D', 'd', 'H', 'h', 'i', 'q', 'L', 'l', 'L', 'l', '\'n', 'N', 'n', 'OE', 'oe', 'T', 't', 'b', 'B', 'B', 'b', 'C', 'c', 'D', 'D', 'D', 'd', 'E', 'F', 'f', 'G', 'hv', 'I', 'I', 'K', 'k', 'l', 'N', 'n', 'OI', 'oi', 'P', 'p', 't', 'T', 't', 'T', 'V', 'Y', 'y', 'Z', 'z', 'DZ', 'Dz', 'dz', 'G', 'g', 'd', 'Z', 'z', 'l', 'n', 't', 'j', 'db', 'qp', 'A', 'C', 'c', 'L', 'T', 's', 'z', 'B', 'U', 'E', 'e', 'J', 'j', 'R', 'r', 'Y', 'y', 'b', 'c', 'd', 'd', 'e', 'j', 'g', 'g', 'G', 'h', 'h', 'i', 'I', 'l', 'l', 'l', 'm', 'n', 'n', 'N', 'OE', 'r', 'r', 'r', 'R', 's', 't', 'u', 'v', 'Y', 'z', 'z', 'B', 'G', 'H', 'j', 'L', 'q', 'dz', 'dz', 'ts', 'ls', 'lz', 'A', 'AE', 'B', 'C', 'D', 'D', 'E', 'J', 'K', 'L', 'M', 'O', 'P', 'T', 'U', 'V', 'W', 'Z', 'ue', 'b', 'd', 'f', 'm', 'n', 'p', 'r', 'r', 's', 't', 'z', 'th', 'I', 'p', 'U', 'b', 'd', 'f', 'g', 'k', 'l', 'm', 'n', 'p', 'r', 's', 'v', 'x', 'z', 'a', 'd', 'e', 'e', 'i', 'u', 'a', 's', 's', 'SS', 'LL', 'll', 'V', 'v', 'Y', 'y', '(C)', '(R)', 'CE', 'Cr', 'Fr.', 'L.', 'Pts', 'TL', 'Rs', 'x', 'Rx', 'm/s', 'rad/s', 'C/kg', 'pH', 'V/m', 'A/m', ' 1/4', ' 1/2', ' 3/4', ' 1/3', ' 2/3', ' 1/5', ' 2/5', ' 3/5', ' 4/5', ' 1/6', ' 5/6', ' 1/8', ' 3/8', ' 5/8', ' 7/8', ' 1/', '0', '\'', '\'', ',', '\'', '"', '"', ',,', '"', '\'', '"', '"', '"', '<<', '>>', '<', '>', '-', '-', '-', '-', '-', '-', '-', '-', '-', '||', '/', '[', ']', '*', ',', '.', '<', '>', '<<', '>>', '[', ']', '[', ']', '[', ']', ',', '.', '[', ']', '<<', '>>', '<', '>', ',', '[', ']', '((', '))', '.', ',', '*', '/', '-', '/', '\\', '|', '||', '<<', '>>', '((', '))']; | ||
| 46 | |||
| 47 | private static array $transliterators = []; | ||
| 48 | private static array $tableZero; | ||
| 49 | private static array $tableWide; | ||
| 50 | |||
| 51 | public static function fromCodePoints(int ...$codes): static | ||
| 52 | { | ||
| 53 | $string = ''; | ||
| 54 | |||
| 55 | foreach ($codes as $code) { | ||
| 56 | if (0x80 > $code %= 0x200000) { | ||
| 57 | $string .= \chr($code); | ||
| 58 | } elseif (0x800 > $code) { | ||
| 59 | $string .= \chr(0xC0 | $code >> 6).\chr(0x80 | $code & 0x3F); | ||
| 60 | } elseif (0x10000 > $code) { | ||
| 61 | $string .= \chr(0xE0 | $code >> 12).\chr(0x80 | $code >> 6 & 0x3F).\chr(0x80 | $code & 0x3F); | ||
| 62 | } else { | ||
| 63 | $string .= \chr(0xF0 | $code >> 18).\chr(0x80 | $code >> 12 & 0x3F).\chr(0x80 | $code >> 6 & 0x3F).\chr(0x80 | $code & 0x3F); | ||
| 64 | } | ||
| 65 | } | ||
| 66 | |||
| 67 | return new static($string); | ||
| 68 | } | ||
| 69 | |||
| 70 | /** | ||
| 71 | * Generic UTF-8 to ASCII transliteration. | ||
| 72 | * | ||
| 73 | * Install the intl extension for best results. | ||
| 74 | * | ||
| 75 | * @param string[]|\Transliterator[]|\Closure[] $rules See "*-Latin" rules from Transliterator::listIDs() | ||
| 76 | */ | ||
| 77 | public function ascii(array $rules = []): self | ||
| 78 | { | ||
| 79 | $str = clone $this; | ||
| 80 | $s = $str->string; | ||
| 81 | $str->string = ''; | ||
| 82 | |||
| 83 | array_unshift($rules, 'nfd'); | ||
| 84 | $rules[] = 'latin-ascii'; | ||
| 85 | |||
| 86 | if (\function_exists('transliterator_transliterate')) { | ||
| 87 | $rules[] = 'any-latin/bgn'; | ||
| 88 | } | ||
| 89 | |||
| 90 | $rules[] = 'nfkd'; | ||
| 91 | $rules[] = '[:nonspacing mark:] remove'; | ||
| 92 | |||
| 93 | while (\strlen($s) - 1 > $i = strspn($s, self::ASCII)) { | ||
| 94 | if (0 < --$i) { | ||
| 95 | $str->string .= substr($s, 0, $i); | ||
| 96 | $s = substr($s, $i); | ||
| 97 | } | ||
| 98 | |||
| 99 | if (!$rule = array_shift($rules)) { | ||
| 100 | $rules = []; // An empty rule interrupts the next ones | ||
| 101 | } | ||
| 102 | |||
| 103 | if ($rule instanceof \Transliterator) { | ||
| 104 | $s = $rule->transliterate($s); | ||
| 105 | } elseif ($rule instanceof \Closure) { | ||
| 106 | $s = $rule($s); | ||
| 107 | } elseif ($rule) { | ||
| 108 | if ('nfd' === $rule = strtolower($rule)) { | ||
| 109 | normalizer_is_normalized($s, self::NFD) ?: $s = normalizer_normalize($s, self::NFD); | ||
| 110 | } elseif ('nfkd' === $rule) { | ||
| 111 | normalizer_is_normalized($s, self::NFKD) ?: $s = normalizer_normalize($s, self::NFKD); | ||
| 112 | } elseif ('[:nonspacing mark:] remove' === $rule) { | ||
| 113 | $s = preg_replace('/\p{Mn}++/u', '', $s); | ||
| 114 | } elseif ('latin-ascii' === $rule) { | ||
| 115 | $s = str_replace(self::TRANSLIT_FROM, self::TRANSLIT_TO, $s); | ||
| 116 | } elseif ('de-ascii' === $rule) { | ||
| 117 | $s = preg_replace("/([AUO])\u{0308}(?=\p{Ll})/u", '$1e', $s); | ||
| 118 | $s = str_replace(["a\u{0308}", "o\u{0308}", "u\u{0308}", "A\u{0308}", "O\u{0308}", "U\u{0308}"], ['ae', 'oe', 'ue', 'AE', 'OE', 'UE'], $s); | ||
| 119 | } elseif (\function_exists('transliterator_transliterate')) { | ||
| 120 | if (null === $transliterator = self::$transliterators[$rule] ??= \Transliterator::create($rule)) { | ||
| 121 | if ('any-latin/bgn' === $rule) { | ||
| 122 | $rule = 'any-latin'; | ||
| 123 | $transliterator = self::$transliterators[$rule] ??= \Transliterator::create($rule); | ||
| 124 | } | ||
| 125 | |||
| 126 | if (null === $transliterator) { | ||
| 127 | throw new InvalidArgumentException(sprintf('Unknown transliteration rule "%s".', $rule)); | ||
| 128 | } | ||
| 129 | |||
| 130 | self::$transliterators['any-latin/bgn'] = $transliterator; | ||
| 131 | } | ||
| 132 | |||
| 133 | $s = $transliterator->transliterate($s); | ||
| 134 | } | ||
| 135 | } elseif (!\function_exists('iconv')) { | ||
| 136 | $s = preg_replace('/[^\x00-\x7F]/u', '?', $s); | ||
| 137 | } else { | ||
| 138 | $s = @preg_replace_callback('/[^\x00-\x7F]/u', static function ($c) { | ||
| 139 | $c = (string) iconv('UTF-8', 'ASCII//TRANSLIT', $c[0]); | ||
| 140 | |||
| 141 | if ('' === $c && '' === iconv('UTF-8', 'ASCII//TRANSLIT', '²')) { | ||
| 142 | throw new \LogicException(sprintf('"%s" requires a translit-able iconv implementation, try installing "gnu-libiconv" if you\'re using Alpine Linux.', static::class)); | ||
| 143 | } | ||
| 144 | |||
| 145 | return 1 < \strlen($c) ? ltrim($c, '\'`"^~') : ('' !== $c ? $c : '?'); | ||
| 146 | }, $s); | ||
| 147 | } | ||
| 148 | } | ||
| 149 | |||
| 150 | $str->string .= $s; | ||
| 151 | |||
| 152 | return $str; | ||
| 153 | } | ||
| 154 | |||
| 155 | public function camel(): static | ||
| 156 | { | ||
| 157 | $str = clone $this; | ||
| 158 | $str->string = str_replace(' ', '', preg_replace_callback('/\b.(?!\p{Lu})/u', static function ($m) { | ||
| 159 | static $i = 0; | ||
| 160 | |||
| 161 | return 1 === ++$i ? ('İ' === $m[0] ? 'i̇' : mb_strtolower($m[0], 'UTF-8')) : mb_convert_case($m[0], \MB_CASE_TITLE, 'UTF-8'); | ||
| 162 | }, preg_replace('/[^\pL0-9]++/u', ' ', $this->string))); | ||
| 163 | |||
| 164 | return $str; | ||
| 165 | } | ||
| 166 | |||
| 167 | /** | ||
| 168 | * @return int[] | ||
| 169 | */ | ||
| 170 | public function codePointsAt(int $offset): array | ||
| 171 | { | ||
| 172 | $str = $this->slice($offset, 1); | ||
| 173 | |||
| 174 | if ('' === $str->string) { | ||
| 175 | return []; | ||
| 176 | } | ||
| 177 | |||
| 178 | $codePoints = []; | ||
| 179 | |||
| 180 | foreach (preg_split('//u', $str->string, -1, \PREG_SPLIT_NO_EMPTY) as $c) { | ||
| 181 | $codePoints[] = mb_ord($c, 'UTF-8'); | ||
| 182 | } | ||
| 183 | |||
| 184 | return $codePoints; | ||
| 185 | } | ||
| 186 | |||
| 187 | public function folded(bool $compat = true): static | ||
| 188 | { | ||
| 189 | $str = clone $this; | ||
| 190 | |||
| 191 | if (!$compat || !\defined('Normalizer::NFKC_CF')) { | ||
| 192 | $str->string = normalizer_normalize($str->string, $compat ? \Normalizer::NFKC : \Normalizer::NFC); | ||
| 193 | $str->string = mb_strtolower(str_replace(self::FOLD_FROM, self::FOLD_TO, $str->string), 'UTF-8'); | ||
| 194 | } else { | ||
| 195 | $str->string = normalizer_normalize($str->string, \Normalizer::NFKC_CF); | ||
| 196 | } | ||
| 197 | |||
| 198 | return $str; | ||
| 199 | } | ||
| 200 | |||
| 201 | public function join(array $strings, ?string $lastGlue = null): static | ||
| 202 | { | ||
| 203 | $str = clone $this; | ||
| 204 | |||
| 205 | $tail = null !== $lastGlue && 1 < \count($strings) ? $lastGlue.array_pop($strings) : ''; | ||
| 206 | $str->string = implode($this->string, $strings).$tail; | ||
| 207 | |||
| 208 | if (!preg_match('//u', $str->string)) { | ||
| 209 | throw new InvalidArgumentException('Invalid UTF-8 string.'); | ||
| 210 | } | ||
| 211 | |||
| 212 | return $str; | ||
| 213 | } | ||
| 214 | |||
| 215 | public function lower(): static | ||
| 216 | { | ||
| 217 | $str = clone $this; | ||
| 218 | $str->string = mb_strtolower(str_replace('İ', 'i̇', $str->string), 'UTF-8'); | ||
| 219 | |||
| 220 | return $str; | ||
| 221 | } | ||
| 222 | |||
| 223 | /** | ||
| 224 | * @param string $locale In the format language_region (e.g. tr_TR) | ||
| 225 | */ | ||
| 226 | public function localeLower(string $locale): static | ||
| 227 | { | ||
| 228 | if (null !== $transliterator = $this->getLocaleTransliterator($locale, 'Lower')) { | ||
| 229 | $str = clone $this; | ||
| 230 | $str->string = $transliterator->transliterate($str->string); | ||
| 231 | |||
| 232 | return $str; | ||
| 233 | } | ||
| 234 | |||
| 235 | return $this->lower(); | ||
| 236 | } | ||
| 237 | |||
| 238 | public function match(string $regexp, int $flags = 0, int $offset = 0): array | ||
| 239 | { | ||
| 240 | $match = ((\PREG_PATTERN_ORDER | \PREG_SET_ORDER) & $flags) ? 'preg_match_all' : 'preg_match'; | ||
| 241 | |||
| 242 | if ($this->ignoreCase) { | ||
| 243 | $regexp .= 'i'; | ||
| 244 | } | ||
| 245 | |||
| 246 | set_error_handler(static fn ($t, $m) => throw new InvalidArgumentException($m)); | ||
| 247 | |||
| 248 | try { | ||
| 249 | if (false === $match($regexp.'u', $this->string, $matches, $flags | \PREG_UNMATCHED_AS_NULL, $offset)) { | ||
| 250 | throw new RuntimeException('Matching failed with error: '.preg_last_error_msg()); | ||
| 251 | } | ||
| 252 | } finally { | ||
| 253 | restore_error_handler(); | ||
| 254 | } | ||
| 255 | |||
| 256 | return $matches; | ||
| 257 | } | ||
| 258 | |||
| 259 | public function normalize(int $form = self::NFC): static | ||
| 260 | { | ||
| 261 | if (!\in_array($form, [self::NFC, self::NFD, self::NFKC, self::NFKD])) { | ||
| 262 | throw new InvalidArgumentException('Unsupported normalization form.'); | ||
| 263 | } | ||
| 264 | |||
| 265 | $str = clone $this; | ||
| 266 | normalizer_is_normalized($str->string, $form) ?: $str->string = normalizer_normalize($str->string, $form); | ||
| 267 | |||
| 268 | return $str; | ||
| 269 | } | ||
| 270 | |||
| 271 | public function padBoth(int $length, string $padStr = ' '): static | ||
| 272 | { | ||
| 273 | if ('' === $padStr || !preg_match('//u', $padStr)) { | ||
| 274 | throw new InvalidArgumentException('Invalid UTF-8 string.'); | ||
| 275 | } | ||
| 276 | |||
| 277 | $pad = clone $this; | ||
| 278 | $pad->string = $padStr; | ||
| 279 | |||
| 280 | return $this->pad($length, $pad, \STR_PAD_BOTH); | ||
| 281 | } | ||
| 282 | |||
| 283 | public function padEnd(int $length, string $padStr = ' '): static | ||
| 284 | { | ||
| 285 | if ('' === $padStr || !preg_match('//u', $padStr)) { | ||
| 286 | throw new InvalidArgumentException('Invalid UTF-8 string.'); | ||
| 287 | } | ||
| 288 | |||
| 289 | $pad = clone $this; | ||
| 290 | $pad->string = $padStr; | ||
| 291 | |||
| 292 | return $this->pad($length, $pad, \STR_PAD_RIGHT); | ||
| 293 | } | ||
| 294 | |||
| 295 | public function padStart(int $length, string $padStr = ' '): static | ||
| 296 | { | ||
| 297 | if ('' === $padStr || !preg_match('//u', $padStr)) { | ||
| 298 | throw new InvalidArgumentException('Invalid UTF-8 string.'); | ||
| 299 | } | ||
| 300 | |||
| 301 | $pad = clone $this; | ||
| 302 | $pad->string = $padStr; | ||
| 303 | |||
| 304 | return $this->pad($length, $pad, \STR_PAD_LEFT); | ||
| 305 | } | ||
| 306 | |||
| 307 | public function replaceMatches(string $fromRegexp, string|callable $to): static | ||
| 308 | { | ||
| 309 | if ($this->ignoreCase) { | ||
| 310 | $fromRegexp .= 'i'; | ||
| 311 | } | ||
| 312 | |||
| 313 | if (\is_array($to) || $to instanceof \Closure) { | ||
| 314 | $replace = 'preg_replace_callback'; | ||
| 315 | $to = static function (array $m) use ($to): string { | ||
| 316 | $to = $to($m); | ||
| 317 | |||
| 318 | if ('' !== $to && (!\is_string($to) || !preg_match('//u', $to))) { | ||
| 319 | throw new InvalidArgumentException('Replace callback must return a valid UTF-8 string.'); | ||
| 320 | } | ||
| 321 | |||
| 322 | return $to; | ||
| 323 | }; | ||
| 324 | } elseif ('' !== $to && !preg_match('//u', $to)) { | ||
| 325 | throw new InvalidArgumentException('Invalid UTF-8 string.'); | ||
| 326 | } else { | ||
| 327 | $replace = 'preg_replace'; | ||
| 328 | } | ||
| 329 | |||
| 330 | set_error_handler(static fn ($t, $m) => throw new InvalidArgumentException($m)); | ||
| 331 | |||
| 332 | try { | ||
| 333 | if (null === $string = $replace($fromRegexp.'u', $to, $this->string)) { | ||
| 334 | $lastError = preg_last_error(); | ||
| 335 | |||
| 336 | foreach (get_defined_constants(true)['pcre'] as $k => $v) { | ||
| 337 | if ($lastError === $v && str_ends_with($k, '_ERROR')) { | ||
| 338 | throw new RuntimeException('Matching failed with '.$k.'.'); | ||
| 339 | } | ||
| 340 | } | ||
| 341 | |||
| 342 | throw new RuntimeException('Matching failed with unknown error code.'); | ||
| 343 | } | ||
| 344 | } finally { | ||
| 345 | restore_error_handler(); | ||
| 346 | } | ||
| 347 | |||
| 348 | $str = clone $this; | ||
| 349 | $str->string = $string; | ||
| 350 | |||
| 351 | return $str; | ||
| 352 | } | ||
| 353 | |||
| 354 | public function reverse(): static | ||
| 355 | { | ||
| 356 | $str = clone $this; | ||
| 357 | $str->string = implode('', array_reverse(preg_split('/(\X)/u', $str->string, -1, \PREG_SPLIT_DELIM_CAPTURE | \PREG_SPLIT_NO_EMPTY))); | ||
| 358 | |||
| 359 | return $str; | ||
| 360 | } | ||
| 361 | |||
| 362 | public function snake(): static | ||
| 363 | { | ||
| 364 | $str = $this->camel(); | ||
| 365 | $str->string = mb_strtolower(preg_replace(['/(\p{Lu}+)(\p{Lu}\p{Ll})/u', '/([\p{Ll}0-9])(\p{Lu})/u'], '\1_\2', $str->string), 'UTF-8'); | ||
| 366 | |||
| 367 | return $str; | ||
| 368 | } | ||
| 369 | |||
| 370 | public function title(bool $allWords = false): static | ||
| 371 | { | ||
| 372 | $str = clone $this; | ||
| 373 | |||
| 374 | $limit = $allWords ? -1 : 1; | ||
| 375 | |||
| 376 | $str->string = preg_replace_callback('/\b./u', static fn (array $m): string => mb_convert_case($m[0], \MB_CASE_TITLE, 'UTF-8'), $str->string, $limit); | ||
| 377 | |||
| 378 | return $str; | ||
| 379 | } | ||
| 380 | |||
| 381 | /** | ||
| 382 | * @param string $locale In the format language_region (e.g. tr_TR) | ||
| 383 | */ | ||
| 384 | public function localeTitle(string $locale): static | ||
| 385 | { | ||
| 386 | if (null !== $transliterator = $this->getLocaleTransliterator($locale, 'Title')) { | ||
| 387 | $str = clone $this; | ||
| 388 | $str->string = $transliterator->transliterate($str->string); | ||
| 389 | |||
| 390 | return $str; | ||
| 391 | } | ||
| 392 | |||
| 393 | return $this->title(); | ||
| 394 | } | ||
| 395 | |||
| 396 | public function trim(string $chars = " \t\n\r\0\x0B\x0C\u{A0}\u{FEFF}"): static | ||
| 397 | { | ||
| 398 | if (" \t\n\r\0\x0B\x0C\u{A0}\u{FEFF}" !== $chars && !preg_match('//u', $chars)) { | ||
| 399 | throw new InvalidArgumentException('Invalid UTF-8 chars.'); | ||
| 400 | } | ||
| 401 | $chars = preg_quote($chars); | ||
| 402 | |||
| 403 | $str = clone $this; | ||
| 404 | $str->string = preg_replace("{^[$chars]++|[$chars]++$}uD", '', $str->string); | ||
| 405 | |||
| 406 | return $str; | ||
| 407 | } | ||
| 408 | |||
| 409 | public function trimEnd(string $chars = " \t\n\r\0\x0B\x0C\u{A0}\u{FEFF}"): static | ||
| 410 | { | ||
| 411 | if (" \t\n\r\0\x0B\x0C\u{A0}\u{FEFF}" !== $chars && !preg_match('//u', $chars)) { | ||
| 412 | throw new InvalidArgumentException('Invalid UTF-8 chars.'); | ||
| 413 | } | ||
| 414 | $chars = preg_quote($chars); | ||
| 415 | |||
| 416 | $str = clone $this; | ||
| 417 | $str->string = preg_replace("{[$chars]++$}uD", '', $str->string); | ||
| 418 | |||
| 419 | return $str; | ||
| 420 | } | ||
| 421 | |||
| 422 | public function trimPrefix($prefix): static | ||
| 423 | { | ||
| 424 | if (!$this->ignoreCase) { | ||
| 425 | return parent::trimPrefix($prefix); | ||
| 426 | } | ||
| 427 | |||
| 428 | $str = clone $this; | ||
| 429 | |||
| 430 | if ($prefix instanceof \Traversable) { | ||
| 431 | $prefix = iterator_to_array($prefix, false); | ||
| 432 | } elseif ($prefix instanceof parent) { | ||
| 433 | $prefix = $prefix->string; | ||
| 434 | } | ||
| 435 | |||
| 436 | $prefix = implode('|', array_map('preg_quote', (array) $prefix)); | ||
| 437 | $str->string = preg_replace("{^(?:$prefix)}iuD", '', $this->string); | ||
| 438 | |||
| 439 | return $str; | ||
| 440 | } | ||
| 441 | |||
| 442 | public function trimStart(string $chars = " \t\n\r\0\x0B\x0C\u{A0}\u{FEFF}"): static | ||
| 443 | { | ||
| 444 | if (" \t\n\r\0\x0B\x0C\u{A0}\u{FEFF}" !== $chars && !preg_match('//u', $chars)) { | ||
| 445 | throw new InvalidArgumentException('Invalid UTF-8 chars.'); | ||
| 446 | } | ||
| 447 | $chars = preg_quote($chars); | ||
| 448 | |||
| 449 | $str = clone $this; | ||
| 450 | $str->string = preg_replace("{^[$chars]++}uD", '', $str->string); | ||
| 451 | |||
| 452 | return $str; | ||
| 453 | } | ||
| 454 | |||
| 455 | public function trimSuffix($suffix): static | ||
| 456 | { | ||
| 457 | if (!$this->ignoreCase) { | ||
| 458 | return parent::trimSuffix($suffix); | ||
| 459 | } | ||
| 460 | |||
| 461 | $str = clone $this; | ||
| 462 | |||
| 463 | if ($suffix instanceof \Traversable) { | ||
| 464 | $suffix = iterator_to_array($suffix, false); | ||
| 465 | } elseif ($suffix instanceof parent) { | ||
| 466 | $suffix = $suffix->string; | ||
| 467 | } | ||
| 468 | |||
| 469 | $suffix = implode('|', array_map('preg_quote', (array) $suffix)); | ||
| 470 | $str->string = preg_replace("{(?:$suffix)$}iuD", '', $this->string); | ||
| 471 | |||
| 472 | return $str; | ||
| 473 | } | ||
| 474 | |||
| 475 | public function upper(): static | ||
| 476 | { | ||
| 477 | $str = clone $this; | ||
| 478 | $str->string = mb_strtoupper($str->string, 'UTF-8'); | ||
| 479 | |||
| 480 | return $str; | ||
| 481 | } | ||
| 482 | |||
| 483 | /** | ||
| 484 | * @param string $locale In the format language_region (e.g. tr_TR) | ||
| 485 | */ | ||
| 486 | public function localeUpper(string $locale): static | ||
| 487 | { | ||
| 488 | if (null !== $transliterator = $this->getLocaleTransliterator($locale, 'Upper')) { | ||
| 489 | $str = clone $this; | ||
| 490 | $str->string = $transliterator->transliterate($str->string); | ||
| 491 | |||
| 492 | return $str; | ||
| 493 | } | ||
| 494 | |||
| 495 | return $this->upper(); | ||
| 496 | } | ||
| 497 | |||
| 498 | public function width(bool $ignoreAnsiDecoration = true): int | ||
| 499 | { | ||
| 500 | $width = 0; | ||
| 501 | $s = str_replace(["\x00", "\x05", "\x07"], '', $this->string); | ||
| 502 | |||
| 503 | if (str_contains($s, "\r")) { | ||
| 504 | $s = str_replace(["\r\n", "\r"], "\n", $s); | ||
| 505 | } | ||
| 506 | |||
| 507 | if (!$ignoreAnsiDecoration) { | ||
| 508 | $s = preg_replace('/[\p{Cc}\x7F]++/u', '', $s); | ||
| 509 | } | ||
| 510 | |||
| 511 | foreach (explode("\n", $s) as $s) { | ||
| 512 | if ($ignoreAnsiDecoration) { | ||
| 513 | $s = preg_replace('/(?:\x1B(?: | ||
| 514 | \[ [\x30-\x3F]*+ [\x20-\x2F]*+ [\x40-\x7E] | ||
| 515 | | [P\]X^_] .*? \x1B\\\\ | ||
| 516 | | [\x41-\x7E] | ||
| 517 | )|[\p{Cc}\x7F]++)/xu', '', $s); | ||
| 518 | } | ||
| 519 | |||
| 520 | $lineWidth = $this->wcswidth($s); | ||
| 521 | |||
| 522 | if ($lineWidth > $width) { | ||
| 523 | $width = $lineWidth; | ||
| 524 | } | ||
| 525 | } | ||
| 526 | |||
| 527 | return $width; | ||
| 528 | } | ||
| 529 | |||
| 530 | private function pad(int $len, self $pad, int $type): static | ||
| 531 | { | ||
| 532 | $sLen = $this->length(); | ||
| 533 | |||
| 534 | if ($len <= $sLen) { | ||
| 535 | return clone $this; | ||
| 536 | } | ||
| 537 | |||
| 538 | $padLen = $pad->length(); | ||
| 539 | $freeLen = $len - $sLen; | ||
| 540 | $len = $freeLen % $padLen; | ||
| 541 | |||
| 542 | switch ($type) { | ||
| 543 | case \STR_PAD_RIGHT: | ||
| 544 | return $this->append(str_repeat($pad->string, intdiv($freeLen, $padLen)).($len ? $pad->slice(0, $len) : '')); | ||
| 545 | |||
| 546 | case \STR_PAD_LEFT: | ||
| 547 | return $this->prepend(str_repeat($pad->string, intdiv($freeLen, $padLen)).($len ? $pad->slice(0, $len) : '')); | ||
| 548 | |||
| 549 | case \STR_PAD_BOTH: | ||
| 550 | $freeLen /= 2; | ||
| 551 | |||
| 552 | $rightLen = ceil($freeLen); | ||
| 553 | $len = $rightLen % $padLen; | ||
| 554 | $str = $this->append(str_repeat($pad->string, intdiv($rightLen, $padLen)).($len ? $pad->slice(0, $len) : '')); | ||
| 555 | |||
| 556 | $leftLen = floor($freeLen); | ||
| 557 | $len = $leftLen % $padLen; | ||
| 558 | |||
| 559 | return $str->prepend(str_repeat($pad->string, intdiv($leftLen, $padLen)).($len ? $pad->slice(0, $len) : '')); | ||
| 560 | |||
| 561 | default: | ||
| 562 | throw new InvalidArgumentException('Invalid padding type.'); | ||
| 563 | } | ||
| 564 | } | ||
| 565 | |||
| 566 | /** | ||
| 567 | * Based on https://github.com/jquast/wcwidth, a Python implementation of https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c. | ||
| 568 | */ | ||
| 569 | private function wcswidth(string $string): int | ||
| 570 | { | ||
| 571 | $width = 0; | ||
| 572 | |||
| 573 | foreach (preg_split('//u', $string, -1, \PREG_SPLIT_NO_EMPTY) as $c) { | ||
| 574 | $codePoint = mb_ord($c, 'UTF-8'); | ||
| 575 | |||
| 576 | if (0 === $codePoint // NULL | ||
| 577 | || 0x034F === $codePoint // COMBINING GRAPHEME JOINER | ||
| 578 | || (0x200B <= $codePoint && 0x200F >= $codePoint) // ZERO WIDTH SPACE to RIGHT-TO-LEFT MARK | ||
| 579 | || 0x2028 === $codePoint // LINE SEPARATOR | ||
| 580 | || 0x2029 === $codePoint // PARAGRAPH SEPARATOR | ||
| 581 | || (0x202A <= $codePoint && 0x202E >= $codePoint) // LEFT-TO-RIGHT EMBEDDING to RIGHT-TO-LEFT OVERRIDE | ||
| 582 | || (0x2060 <= $codePoint && 0x2063 >= $codePoint) // WORD JOINER to INVISIBLE SEPARATOR | ||
| 583 | ) { | ||
| 584 | continue; | ||
| 585 | } | ||
| 586 | |||
| 587 | // Non printable characters | ||
| 588 | if (32 > $codePoint // C0 control characters | ||
| 589 | || (0x07F <= $codePoint && 0x0A0 > $codePoint) // C1 control characters and DEL | ||
| 590 | ) { | ||
| 591 | return -1; | ||
| 592 | } | ||
| 593 | |||
| 594 | self::$tableZero ??= require __DIR__.'/Resources/data/wcswidth_table_zero.php'; | ||
| 595 | |||
| 596 | if ($codePoint >= self::$tableZero[0][0] && $codePoint <= self::$tableZero[$ubound = \count(self::$tableZero) - 1][1]) { | ||
| 597 | $lbound = 0; | ||
| 598 | while ($ubound >= $lbound) { | ||
| 599 | $mid = floor(($lbound + $ubound) / 2); | ||
| 600 | |||
| 601 | if ($codePoint > self::$tableZero[$mid][1]) { | ||
| 602 | $lbound = $mid + 1; | ||
| 603 | } elseif ($codePoint < self::$tableZero[$mid][0]) { | ||
| 604 | $ubound = $mid - 1; | ||
| 605 | } else { | ||
| 606 | continue 2; | ||
| 607 | } | ||
| 608 | } | ||
| 609 | } | ||
| 610 | |||
| 611 | self::$tableWide ??= require __DIR__.'/Resources/data/wcswidth_table_wide.php'; | ||
| 612 | |||
| 613 | if ($codePoint >= self::$tableWide[0][0] && $codePoint <= self::$tableWide[$ubound = \count(self::$tableWide) - 1][1]) { | ||
| 614 | $lbound = 0; | ||
| 615 | while ($ubound >= $lbound) { | ||
| 616 | $mid = floor(($lbound + $ubound) / 2); | ||
| 617 | |||
| 618 | if ($codePoint > self::$tableWide[$mid][1]) { | ||
| 619 | $lbound = $mid + 1; | ||
| 620 | } elseif ($codePoint < self::$tableWide[$mid][0]) { | ||
| 621 | $ubound = $mid - 1; | ||
| 622 | } else { | ||
| 623 | $width += 2; | ||
| 624 | |||
| 625 | continue 2; | ||
| 626 | } | ||
| 627 | } | ||
| 628 | } | ||
| 629 | |||
| 630 | ++$width; | ||
| 631 | } | ||
| 632 | |||
| 633 | return $width; | ||
| 634 | } | ||
| 635 | |||
| 636 | private function getLocaleTransliterator(string $locale, string $id): ?\Transliterator | ||
| 637 | { | ||
| 638 | $rule = $locale.'-'.$id; | ||
| 639 | if (\array_key_exists($rule, self::$transliterators)) { | ||
| 640 | return self::$transliterators[$rule]; | ||
| 641 | } | ||
| 642 | |||
| 643 | if (null !== $transliterator = self::$transliterators[$rule] = \Transliterator::create($rule)) { | ||
| 644 | return $transliterator; | ||
| 645 | } | ||
| 646 | |||
| 647 | // Try to find a parent locale (nl_BE -> nl) | ||
| 648 | if (false === $i = strpos($locale, '_')) { | ||
| 649 | return null; | ||
| 650 | } | ||
| 651 | |||
| 652 | $parentRule = substr_replace($locale, '-'.$id, $i); | ||
| 653 | |||
| 654 | // Parent locale was already cached, return and store as current locale | ||
| 655 | if (\array_key_exists($parentRule, self::$transliterators)) { | ||
| 656 | return self::$transliterators[$rule] = self::$transliterators[$parentRule]; | ||
| 657 | } | ||
| 658 | |||
| 659 | // Create transliterator based on parent locale and cache the result on both initial and parent locale values | ||
| 660 | $transliterator = \Transliterator::create($parentRule); | ||
| 661 | |||
| 662 | return self::$transliterators[$rule] = self::$transliterators[$parentRule] = $transliterator; | ||
| 663 | } | ||
| 664 | } | ||
