summaryrefslogtreecommitdiff
path: root/vendor/symfony/string/AbstractUnicodeString.php
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/symfony/string/AbstractUnicodeString.php')
-rw-r--r--vendor/symfony/string/AbstractUnicodeString.php664
1 files changed, 664 insertions, 0 deletions
diff --git a/vendor/symfony/string/AbstractUnicodeString.php b/vendor/symfony/string/AbstractUnicodeString.php
new file mode 100644
index 0000000..2cb2917
--- /dev/null
+++ b/vendor/symfony/string/AbstractUnicodeString.php
@@ -0,0 +1,664 @@
1<?php
2
3/*
4 * This file is part of the Symfony package.
5 *
6 * (c) Fabien Potencier <fabien@symfony.com>
7 *
8 * For the full copyright and license information, please view the LICENSE
9 * file that was distributed with this source code.
10 */
11
12namespace Symfony\Component\String;
13
14use Symfony\Component\String\Exception\ExceptionInterface;
15use Symfony\Component\String\Exception\InvalidArgumentException;
16use Symfony\Component\String\Exception\RuntimeException;
17
18/**
19 * Represents a string of abstract Unicode characters.
20 *
21 * Unicode defines 3 types of "characters" (bytes, code points and grapheme clusters).
22 * This class is the abstract type to use as a type-hint when the logic you want to
23 * implement is Unicode-aware but doesn't care about code points vs grapheme clusters.
24 *
25 * @author Nicolas Grekas <p@tchwork.com>
26 *
27 * @throws ExceptionInterface
28 */
29abstract class AbstractUnicodeString extends AbstractString
30{
31 public const NFC = \Normalizer::NFC;
32 public const NFD = \Normalizer::NFD;
33 public const NFKC = \Normalizer::NFKC;
34 public const NFKD = \Normalizer::NFKD;
35
36 // all ASCII letters sorted by typical frequency of occurrence
37 private const ASCII = "\x20\x65\x69\x61\x73\x6E\x74\x72\x6F\x6C\x75\x64\x5D\x5B\x63\x6D\x70\x27\x0A\x67\x7C\x68\x76\x2E\x66\x62\x2C\x3A\x3D\x2D\x71\x31\x30\x43\x32\x2A\x79\x78\x29\x28\x4C\x39\x41\x53\x2F\x50\x22\x45\x6A\x4D\x49\x6B\x33\x3E\x35\x54\x3C\x44\x34\x7D\x42\x7B\x38\x46\x77\x52\x36\x37\x55\x47\x4E\x3B\x4A\x7A\x56\x23\x48\x4F\x57\x5F\x26\x21\x4B\x3F\x58\x51\x25\x59\x5C\x09\x5A\x2B\x7E\x5E\x24\x40\x60\x7F\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F";
38
39 // the subset of folded case mappings that is not in lower case mappings
40 private const FOLD_FROM = ['İ', 'µ', 'ſ', "\xCD\x85", 'ς', 'ϐ', 'ϑ', 'ϕ', 'ϖ', 'ϰ', 'ϱ', 'ϵ', 'ẛ', "\xE1\xBE\xBE", 'ß', 'ʼn', 'ǰ', 'ΐ', 'ΰ', 'և', 'ẖ', 'ẗ', 'ẘ', 'ẙ', 'ẚ', 'ẞ', 'ὐ', 'ὒ', 'ὔ', 'ὖ', 'ᾀ', 'ᾁ', 'ᾂ', 'ᾃ', 'ᾄ', 'ᾅ', 'ᾆ', 'ᾇ', 'ᾈ', 'ᾉ', 'ᾊ', 'ᾋ', 'ᾌ', 'ᾍ', 'ᾎ', 'ᾏ', 'ᾐ', 'ᾑ', 'ᾒ', 'ᾓ', 'ᾔ', 'ᾕ', 'ᾖ', 'ᾗ', 'ᾘ', 'ᾙ', 'ᾚ', 'ᾛ', 'ᾜ', 'ᾝ', 'ᾞ', 'ᾟ', 'ᾠ', 'ᾡ', 'ᾢ', 'ᾣ', 'ᾤ', 'ᾥ', 'ᾦ', 'ᾧ', 'ᾨ', 'ᾩ', 'ᾪ', 'ᾫ', 'ᾬ', 'ᾭ', 'ᾮ', 'ᾯ', 'ᾲ', 'ᾳ', 'ᾴ', 'ᾶ', 'ᾷ', 'ᾼ', 'ῂ', 'ῃ', 'ῄ', 'ῆ', 'ῇ', 'ῌ', 'ῒ', 'ῖ', 'ῗ', 'ῢ', 'ῤ', 'ῦ', 'ῧ', 'ῲ', 'ῳ', 'ῴ', 'ῶ', 'ῷ', 'ῼ', 'ff', 'fi', 'fl', 'ffi', 'ffl', 'ſt', 'st', 'ﬓ', 'ﬔ', 'ﬕ', 'ﬖ', 'ﬗ'];
41 private const FOLD_TO = ['i̇', 'μ', 's', 'ι', 'σ', 'β', 'θ', 'φ', 'π', 'κ', 'ρ', 'ε', 'ṡ', 'ι', 'ss', 'ʼn', 'ǰ', 'ΐ', 'ΰ', 'եւ', 'ẖ', 'ẗ', 'ẘ', 'ẙ', 'aʾ', 'ss', 'ὐ', 'ὒ', 'ὔ', 'ὖ', 'ἀι', 'ἁι', 'ἂι', 'ἃι', 'ἄι', 'ἅι', 'ἆι', 'ἇι', 'ἀι', 'ἁι', 'ἂι', 'ἃι', 'ἄι', 'ἅι', 'ἆι', 'ἇι', 'ἠι', 'ἡι', 'ἢι', 'ἣι', 'ἤι', 'ἥι', 'ἦι', 'ἧι', 'ἠι', 'ἡι', 'ἢι', 'ἣι', 'ἤι', 'ἥι', 'ἦι', 'ἧι', 'ὠι', 'ὡι', 'ὢι', 'ὣι', 'ὤι', 'ὥι', 'ὦι', 'ὧι', 'ὠι', 'ὡι', 'ὢι', 'ὣι', 'ὤι', 'ὥι', 'ὦι', 'ὧι', 'ὰι', 'αι', 'άι', 'ᾶ', 'ᾶι', 'αι', 'ὴι', 'ηι', 'ήι', 'ῆ', 'ῆι', 'ηι', 'ῒ', 'ῖ', 'ῗ', 'ῢ', 'ῤ', 'ῦ', 'ῧ', 'ὼι', 'ωι', 'ώι', 'ῶ', 'ῶι', 'ωι', 'ff', 'fi', 'fl', 'ffi', 'ffl', 'st', 'st', 'մն', 'մե', 'մի', 'վն', 'մխ'];
42
43 // the subset of https://github.com/unicode-org/cldr/blob/master/common/transforms/Latin-ASCII.xml that is not in NFKD
44 private const TRANSLIT_FROM = ['Æ', 'Ð', 'Ø', 'Þ', 'ß', 'æ', 'ð', 'ø', 'þ', 'Đ', 'đ', 'Ħ', 'ħ', 'ı', 'ĸ', 'Ŀ', 'ŀ', 'Ł', 'ł', 'ʼn', 'Ŋ', 'ŋ', 'Œ', 'œ', 'Ŧ', 'ŧ', 'ƀ', 'Ɓ', 'Ƃ', 'ƃ', 'Ƈ', 'ƈ', 'Ɖ', 'Ɗ', 'Ƌ', 'ƌ', 'Ɛ', 'Ƒ', 'ƒ', 'Ɠ', 'ƕ', 'Ɩ', 'Ɨ', 'Ƙ', 'ƙ', 'ƚ', 'Ɲ', 'ƞ', 'Ƣ', 'ƣ', 'Ƥ', 'ƥ', 'ƫ', 'Ƭ', 'ƭ', 'Ʈ', 'Ʋ', 'Ƴ', 'ƴ', 'Ƶ', 'ƶ', 'DŽ', 'Dž', 'dž', 'Ǥ', 'ǥ', 'ȡ', 'Ȥ', 'ȥ', 'ȴ', 'ȵ', 'ȶ', 'ȷ', 'ȸ', 'ȹ', 'Ⱥ', 'Ȼ', 'ȼ', 'Ƚ', 'Ⱦ', 'ȿ', 'ɀ', 'Ƀ', 'Ʉ', 'Ɇ', 'ɇ', 'Ɉ', 'ɉ', 'Ɍ', 'ɍ', 'Ɏ', 'ɏ', 'ɓ', 'ɕ', 'ɖ', 'ɗ', 'ɛ', 'ɟ', 'ɠ', 'ɡ', 'ɢ', 'ɦ', 'ɧ', 'ɨ', 'ɪ', 'ɫ', 'ɬ', 'ɭ', 'ɱ', 'ɲ', 'ɳ', 'ɴ', 'ɶ', 'ɼ', 'ɽ', 'ɾ', 'ʀ', 'ʂ', 'ʈ', 'ʉ', 'ʋ', 'ʏ', 'ʐ', 'ʑ', 'ʙ', 'ʛ', 'ʜ', 'ʝ', 'ʟ', 'ʠ', 'ʣ', 'ʥ', 'ʦ', 'ʪ', 'ʫ', 'ᴀ', 'ᴁ', 'ᴃ', 'ᴄ', 'ᴅ', 'ᴆ', 'ᴇ', 'ᴊ', 'ᴋ', 'ᴌ', 'ᴍ', 'ᴏ', 'ᴘ', 'ᴛ', 'ᴜ', 'ᴠ', 'ᴡ', 'ᴢ', 'ᵫ', 'ᵬ', 'ᵭ', 'ᵮ', 'ᵯ', 'ᵰ', 'ᵱ', 'ᵲ', 'ᵳ', 'ᵴ', 'ᵵ', 'ᵶ', 'ᵺ', 'ᵻ', 'ᵽ', 'ᵾ', 'ᶀ', 'ᶁ', 'ᶂ', 'ᶃ', 'ᶄ', 'ᶅ', 'ᶆ', 'ᶇ', 'ᶈ', 'ᶉ', 'ᶊ', 'ᶌ', 'ᶍ', 'ᶎ', 'ᶏ', 'ᶑ', 'ᶒ', 'ᶓ', 'ᶖ', 'ᶙ', 'ẚ', 'ẜ', 'ẝ', 'ẞ', 'Ỻ', 'ỻ', 'Ỽ', 'ỽ', 'Ỿ', 'ỿ', '©', '®', '₠', '₢', '₣', '₤', '₧', '₺', '₹', 'ℌ', '℞', '㎧', '㎮', '㏆', '㏗', '㏞', '㏟', '¼', '½', '¾', '⅓', '⅔', '⅕', '⅖', '⅗', '⅘', '⅙', '⅚', '⅛', '⅜', '⅝', '⅞', '⅟', '〇', '‘', '’', '‚', '‛', '“', '”', '„', '‟', '′', '″', '〝', '〞', '«', '»', '‹', '›', '‐', '‑', '‒', '–', '—', '―', '︱', '︲', '﹘', '‖', '⁄', '⁅', '⁆', '⁎', '、', '。', '〈', '〉', '《', '》', '〔', '〕', '〘', '〙', '〚', '〛', '︑', '︒', '︹', '︺', '︽', '︾', '︿', '﹀', '﹑', '﹝', '﹞', '⦅', '⦆', '。', '、', '×', '÷', '−', '∕', '∖', '∣', '∥', '≪', '≫', '⦅', '⦆'];
45 private const TRANSLIT_TO = ['AE', 'D', 'O', 'TH', 'ss', 'ae', 'd', 'o', 'th', 'D', 'd', 'H', 'h', 'i', 'q', 'L', 'l', 'L', 'l', '\'n', 'N', 'n', 'OE', 'oe', 'T', 't', 'b', 'B', 'B', 'b', 'C', 'c', 'D', 'D', 'D', 'd', 'E', 'F', 'f', 'G', 'hv', 'I', 'I', 'K', 'k', 'l', 'N', 'n', 'OI', 'oi', 'P', 'p', 't', 'T', 't', 'T', 'V', 'Y', 'y', 'Z', 'z', 'DZ', 'Dz', 'dz', 'G', 'g', 'd', 'Z', 'z', 'l', 'n', 't', 'j', 'db', 'qp', 'A', 'C', 'c', 'L', 'T', 's', 'z', 'B', 'U', 'E', 'e', 'J', 'j', 'R', 'r', 'Y', 'y', 'b', 'c', 'd', 'd', 'e', 'j', 'g', 'g', 'G', 'h', 'h', 'i', 'I', 'l', 'l', 'l', 'm', 'n', 'n', 'N', 'OE', 'r', 'r', 'r', 'R', 's', 't', 'u', 'v', 'Y', 'z', 'z', 'B', 'G', 'H', 'j', 'L', 'q', 'dz', 'dz', 'ts', 'ls', 'lz', 'A', 'AE', 'B', 'C', 'D', 'D', 'E', 'J', 'K', 'L', 'M', 'O', 'P', 'T', 'U', 'V', 'W', 'Z', 'ue', 'b', 'd', 'f', 'm', 'n', 'p', 'r', 'r', 's', 't', 'z', 'th', 'I', 'p', 'U', 'b', 'd', 'f', 'g', 'k', 'l', 'm', 'n', 'p', 'r', 's', 'v', 'x', 'z', 'a', 'd', 'e', 'e', 'i', 'u', 'a', 's', 's', 'SS', 'LL', 'll', 'V', 'v', 'Y', 'y', '(C)', '(R)', 'CE', 'Cr', 'Fr.', 'L.', 'Pts', 'TL', 'Rs', 'x', 'Rx', 'm/s', 'rad/s', 'C/kg', 'pH', 'V/m', 'A/m', ' 1/4', ' 1/2', ' 3/4', ' 1/3', ' 2/3', ' 1/5', ' 2/5', ' 3/5', ' 4/5', ' 1/6', ' 5/6', ' 1/8', ' 3/8', ' 5/8', ' 7/8', ' 1/', '0', '\'', '\'', ',', '\'', '"', '"', ',,', '"', '\'', '"', '"', '"', '<<', '>>', '<', '>', '-', '-', '-', '-', '-', '-', '-', '-', '-', '||', '/', '[', ']', '*', ',', '.', '<', '>', '<<', '>>', '[', ']', '[', ']', '[', ']', ',', '.', '[', ']', '<<', '>>', '<', '>', ',', '[', ']', '((', '))', '.', ',', '*', '/', '-', '/', '\\', '|', '||', '<<', '>>', '((', '))'];
46
47 private static array $transliterators = [];
48 private static array $tableZero;
49 private static array $tableWide;
50
51 public static function fromCodePoints(int ...$codes): static
52 {
53 $string = '';
54
55 foreach ($codes as $code) {
56 if (0x80 > $code %= 0x200000) {
57 $string .= \chr($code);
58 } elseif (0x800 > $code) {
59 $string .= \chr(0xC0 | $code >> 6).\chr(0x80 | $code & 0x3F);
60 } elseif (0x10000 > $code) {
61 $string .= \chr(0xE0 | $code >> 12).\chr(0x80 | $code >> 6 & 0x3F).\chr(0x80 | $code & 0x3F);
62 } else {
63 $string .= \chr(0xF0 | $code >> 18).\chr(0x80 | $code >> 12 & 0x3F).\chr(0x80 | $code >> 6 & 0x3F).\chr(0x80 | $code & 0x3F);
64 }
65 }
66
67 return new static($string);
68 }
69
70 /**
71 * Generic UTF-8 to ASCII transliteration.
72 *
73 * Install the intl extension for best results.
74 *
75 * @param string[]|\Transliterator[]|\Closure[] $rules See "*-Latin" rules from Transliterator::listIDs()
76 */
77 public function ascii(array $rules = []): self
78 {
79 $str = clone $this;
80 $s = $str->string;
81 $str->string = '';
82
83 array_unshift($rules, 'nfd');
84 $rules[] = 'latin-ascii';
85
86 if (\function_exists('transliterator_transliterate')) {
87 $rules[] = 'any-latin/bgn';
88 }
89
90 $rules[] = 'nfkd';
91 $rules[] = '[:nonspacing mark:] remove';
92
93 while (\strlen($s) - 1 > $i = strspn($s, self::ASCII)) {
94 if (0 < --$i) {
95 $str->string .= substr($s, 0, $i);
96 $s = substr($s, $i);
97 }
98
99 if (!$rule = array_shift($rules)) {
100 $rules = []; // An empty rule interrupts the next ones
101 }
102
103 if ($rule instanceof \Transliterator) {
104 $s = $rule->transliterate($s);
105 } elseif ($rule instanceof \Closure) {
106 $s = $rule($s);
107 } elseif ($rule) {
108 if ('nfd' === $rule = strtolower($rule)) {
109 normalizer_is_normalized($s, self::NFD) ?: $s = normalizer_normalize($s, self::NFD);
110 } elseif ('nfkd' === $rule) {
111 normalizer_is_normalized($s, self::NFKD) ?: $s = normalizer_normalize($s, self::NFKD);
112 } elseif ('[:nonspacing mark:] remove' === $rule) {
113 $s = preg_replace('/\p{Mn}++/u', '', $s);
114 } elseif ('latin-ascii' === $rule) {
115 $s = str_replace(self::TRANSLIT_FROM, self::TRANSLIT_TO, $s);
116 } elseif ('de-ascii' === $rule) {
117 $s = preg_replace("/([AUO])\u{0308}(?=\p{Ll})/u", '$1e', $s);
118 $s = str_replace(["a\u{0308}", "o\u{0308}", "u\u{0308}", "A\u{0308}", "O\u{0308}", "U\u{0308}"], ['ae', 'oe', 'ue', 'AE', 'OE', 'UE'], $s);
119 } elseif (\function_exists('transliterator_transliterate')) {
120 if (null === $transliterator = self::$transliterators[$rule] ??= \Transliterator::create($rule)) {
121 if ('any-latin/bgn' === $rule) {
122 $rule = 'any-latin';
123 $transliterator = self::$transliterators[$rule] ??= \Transliterator::create($rule);
124 }
125
126 if (null === $transliterator) {
127 throw new InvalidArgumentException(sprintf('Unknown transliteration rule "%s".', $rule));
128 }
129
130 self::$transliterators['any-latin/bgn'] = $transliterator;
131 }
132
133 $s = $transliterator->transliterate($s);
134 }
135 } elseif (!\function_exists('iconv')) {
136 $s = preg_replace('/[^\x00-\x7F]/u', '?', $s);
137 } else {
138 $s = @preg_replace_callback('/[^\x00-\x7F]/u', static function ($c) {
139 $c = (string) iconv('UTF-8', 'ASCII//TRANSLIT', $c[0]);
140
141 if ('' === $c && '' === iconv('UTF-8', 'ASCII//TRANSLIT', '²')) {
142 throw new \LogicException(sprintf('"%s" requires a translit-able iconv implementation, try installing "gnu-libiconv" if you\'re using Alpine Linux.', static::class));
143 }
144
145 return 1 < \strlen($c) ? ltrim($c, '\'`"^~') : ('' !== $c ? $c : '?');
146 }, $s);
147 }
148 }
149
150 $str->string .= $s;
151
152 return $str;
153 }
154
155 public function camel(): static
156 {
157 $str = clone $this;
158 $str->string = str_replace(' ', '', preg_replace_callback('/\b.(?!\p{Lu})/u', static function ($m) {
159 static $i = 0;
160
161 return 1 === ++$i ? ('İ' === $m[0] ? 'i̇' : mb_strtolower($m[0], 'UTF-8')) : mb_convert_case($m[0], \MB_CASE_TITLE, 'UTF-8');
162 }, preg_replace('/[^\pL0-9]++/u', ' ', $this->string)));
163
164 return $str;
165 }
166
167 /**
168 * @return int[]
169 */
170 public function codePointsAt(int $offset): array
171 {
172 $str = $this->slice($offset, 1);
173
174 if ('' === $str->string) {
175 return [];
176 }
177
178 $codePoints = [];
179
180 foreach (preg_split('//u', $str->string, -1, \PREG_SPLIT_NO_EMPTY) as $c) {
181 $codePoints[] = mb_ord($c, 'UTF-8');
182 }
183
184 return $codePoints;
185 }
186
187 public function folded(bool $compat = true): static
188 {
189 $str = clone $this;
190
191 if (!$compat || !\defined('Normalizer::NFKC_CF')) {
192 $str->string = normalizer_normalize($str->string, $compat ? \Normalizer::NFKC : \Normalizer::NFC);
193 $str->string = mb_strtolower(str_replace(self::FOLD_FROM, self::FOLD_TO, $str->string), 'UTF-8');
194 } else {
195 $str->string = normalizer_normalize($str->string, \Normalizer::NFKC_CF);
196 }
197
198 return $str;
199 }
200
201 public function join(array $strings, ?string $lastGlue = null): static
202 {
203 $str = clone $this;
204
205 $tail = null !== $lastGlue && 1 < \count($strings) ? $lastGlue.array_pop($strings) : '';
206 $str->string = implode($this->string, $strings).$tail;
207
208 if (!preg_match('//u', $str->string)) {
209 throw new InvalidArgumentException('Invalid UTF-8 string.');
210 }
211
212 return $str;
213 }
214
215 public function lower(): static
216 {
217 $str = clone $this;
218 $str->string = mb_strtolower(str_replace('İ', 'i̇', $str->string), 'UTF-8');
219
220 return $str;
221 }
222
223 /**
224 * @param string $locale In the format language_region (e.g. tr_TR)
225 */
226 public function localeLower(string $locale): static
227 {
228 if (null !== $transliterator = $this->getLocaleTransliterator($locale, 'Lower')) {
229 $str = clone $this;
230 $str->string = $transliterator->transliterate($str->string);
231
232 return $str;
233 }
234
235 return $this->lower();
236 }
237
238 public function match(string $regexp, int $flags = 0, int $offset = 0): array
239 {
240 $match = ((\PREG_PATTERN_ORDER | \PREG_SET_ORDER) & $flags) ? 'preg_match_all' : 'preg_match';
241
242 if ($this->ignoreCase) {
243 $regexp .= 'i';
244 }
245
246 set_error_handler(static fn ($t, $m) => throw new InvalidArgumentException($m));
247
248 try {
249 if (false === $match($regexp.'u', $this->string, $matches, $flags | \PREG_UNMATCHED_AS_NULL, $offset)) {
250 throw new RuntimeException('Matching failed with error: '.preg_last_error_msg());
251 }
252 } finally {
253 restore_error_handler();
254 }
255
256 return $matches;
257 }
258
259 public function normalize(int $form = self::NFC): static
260 {
261 if (!\in_array($form, [self::NFC, self::NFD, self::NFKC, self::NFKD])) {
262 throw new InvalidArgumentException('Unsupported normalization form.');
263 }
264
265 $str = clone $this;
266 normalizer_is_normalized($str->string, $form) ?: $str->string = normalizer_normalize($str->string, $form);
267
268 return $str;
269 }
270
271 public function padBoth(int $length, string $padStr = ' '): static
272 {
273 if ('' === $padStr || !preg_match('//u', $padStr)) {
274 throw new InvalidArgumentException('Invalid UTF-8 string.');
275 }
276
277 $pad = clone $this;
278 $pad->string = $padStr;
279
280 return $this->pad($length, $pad, \STR_PAD_BOTH);
281 }
282
283 public function padEnd(int $length, string $padStr = ' '): static
284 {
285 if ('' === $padStr || !preg_match('//u', $padStr)) {
286 throw new InvalidArgumentException('Invalid UTF-8 string.');
287 }
288
289 $pad = clone $this;
290 $pad->string = $padStr;
291
292 return $this->pad($length, $pad, \STR_PAD_RIGHT);
293 }
294
295 public function padStart(int $length, string $padStr = ' '): static
296 {
297 if ('' === $padStr || !preg_match('//u', $padStr)) {
298 throw new InvalidArgumentException('Invalid UTF-8 string.');
299 }
300
301 $pad = clone $this;
302 $pad->string = $padStr;
303
304 return $this->pad($length, $pad, \STR_PAD_LEFT);
305 }
306
307 public function replaceMatches(string $fromRegexp, string|callable $to): static
308 {
309 if ($this->ignoreCase) {
310 $fromRegexp .= 'i';
311 }
312
313 if (\is_array($to) || $to instanceof \Closure) {
314 $replace = 'preg_replace_callback';
315 $to = static function (array $m) use ($to): string {
316 $to = $to($m);
317
318 if ('' !== $to && (!\is_string($to) || !preg_match('//u', $to))) {
319 throw new InvalidArgumentException('Replace callback must return a valid UTF-8 string.');
320 }
321
322 return $to;
323 };
324 } elseif ('' !== $to && !preg_match('//u', $to)) {
325 throw new InvalidArgumentException('Invalid UTF-8 string.');
326 } else {
327 $replace = 'preg_replace';
328 }
329
330 set_error_handler(static fn ($t, $m) => throw new InvalidArgumentException($m));
331
332 try {
333 if (null === $string = $replace($fromRegexp.'u', $to, $this->string)) {
334 $lastError = preg_last_error();
335
336 foreach (get_defined_constants(true)['pcre'] as $k => $v) {
337 if ($lastError === $v && str_ends_with($k, '_ERROR')) {
338 throw new RuntimeException('Matching failed with '.$k.'.');
339 }
340 }
341
342 throw new RuntimeException('Matching failed with unknown error code.');
343 }
344 } finally {
345 restore_error_handler();
346 }
347
348 $str = clone $this;
349 $str->string = $string;
350
351 return $str;
352 }
353
354 public function reverse(): static
355 {
356 $str = clone $this;
357 $str->string = implode('', array_reverse(preg_split('/(\X)/u', $str->string, -1, \PREG_SPLIT_DELIM_CAPTURE | \PREG_SPLIT_NO_EMPTY)));
358
359 return $str;
360 }
361
362 public function snake(): static
363 {
364 $str = $this->camel();
365 $str->string = mb_strtolower(preg_replace(['/(\p{Lu}+)(\p{Lu}\p{Ll})/u', '/([\p{Ll}0-9])(\p{Lu})/u'], '\1_\2', $str->string), 'UTF-8');
366
367 return $str;
368 }
369
370 public function title(bool $allWords = false): static
371 {
372 $str = clone $this;
373
374 $limit = $allWords ? -1 : 1;
375
376 $str->string = preg_replace_callback('/\b./u', static fn (array $m): string => mb_convert_case($m[0], \MB_CASE_TITLE, 'UTF-8'), $str->string, $limit);
377
378 return $str;
379 }
380
381 /**
382 * @param string $locale In the format language_region (e.g. tr_TR)
383 */
384 public function localeTitle(string $locale): static
385 {
386 if (null !== $transliterator = $this->getLocaleTransliterator($locale, 'Title')) {
387 $str = clone $this;
388 $str->string = $transliterator->transliterate($str->string);
389
390 return $str;
391 }
392
393 return $this->title();
394 }
395
396 public function trim(string $chars = " \t\n\r\0\x0B\x0C\u{A0}\u{FEFF}"): static
397 {
398 if (" \t\n\r\0\x0B\x0C\u{A0}\u{FEFF}" !== $chars && !preg_match('//u', $chars)) {
399 throw new InvalidArgumentException('Invalid UTF-8 chars.');
400 }
401 $chars = preg_quote($chars);
402
403 $str = clone $this;
404 $str->string = preg_replace("{^[$chars]++|[$chars]++$}uD", '', $str->string);
405
406 return $str;
407 }
408
409 public function trimEnd(string $chars = " \t\n\r\0\x0B\x0C\u{A0}\u{FEFF}"): static
410 {
411 if (" \t\n\r\0\x0B\x0C\u{A0}\u{FEFF}" !== $chars && !preg_match('//u', $chars)) {
412 throw new InvalidArgumentException('Invalid UTF-8 chars.');
413 }
414 $chars = preg_quote($chars);
415
416 $str = clone $this;
417 $str->string = preg_replace("{[$chars]++$}uD", '', $str->string);
418
419 return $str;
420 }
421
422 public function trimPrefix($prefix): static
423 {
424 if (!$this->ignoreCase) {
425 return parent::trimPrefix($prefix);
426 }
427
428 $str = clone $this;
429
430 if ($prefix instanceof \Traversable) {
431 $prefix = iterator_to_array($prefix, false);
432 } elseif ($prefix instanceof parent) {
433 $prefix = $prefix->string;
434 }
435
436 $prefix = implode('|', array_map('preg_quote', (array) $prefix));
437 $str->string = preg_replace("{^(?:$prefix)}iuD", '', $this->string);
438
439 return $str;
440 }
441
442 public function trimStart(string $chars = " \t\n\r\0\x0B\x0C\u{A0}\u{FEFF}"): static
443 {
444 if (" \t\n\r\0\x0B\x0C\u{A0}\u{FEFF}" !== $chars && !preg_match('//u', $chars)) {
445 throw new InvalidArgumentException('Invalid UTF-8 chars.');
446 }
447 $chars = preg_quote($chars);
448
449 $str = clone $this;
450 $str->string = preg_replace("{^[$chars]++}uD", '', $str->string);
451
452 return $str;
453 }
454
455 public function trimSuffix($suffix): static
456 {
457 if (!$this->ignoreCase) {
458 return parent::trimSuffix($suffix);
459 }
460
461 $str = clone $this;
462
463 if ($suffix instanceof \Traversable) {
464 $suffix = iterator_to_array($suffix, false);
465 } elseif ($suffix instanceof parent) {
466 $suffix = $suffix->string;
467 }
468
469 $suffix = implode('|', array_map('preg_quote', (array) $suffix));
470 $str->string = preg_replace("{(?:$suffix)$}iuD", '', $this->string);
471
472 return $str;
473 }
474
475 public function upper(): static
476 {
477 $str = clone $this;
478 $str->string = mb_strtoupper($str->string, 'UTF-8');
479
480 return $str;
481 }
482
483 /**
484 * @param string $locale In the format language_region (e.g. tr_TR)
485 */
486 public function localeUpper(string $locale): static
487 {
488 if (null !== $transliterator = $this->getLocaleTransliterator($locale, 'Upper')) {
489 $str = clone $this;
490 $str->string = $transliterator->transliterate($str->string);
491
492 return $str;
493 }
494
495 return $this->upper();
496 }
497
498 public function width(bool $ignoreAnsiDecoration = true): int
499 {
500 $width = 0;
501 $s = str_replace(["\x00", "\x05", "\x07"], '', $this->string);
502
503 if (str_contains($s, "\r")) {
504 $s = str_replace(["\r\n", "\r"], "\n", $s);
505 }
506
507 if (!$ignoreAnsiDecoration) {
508 $s = preg_replace('/[\p{Cc}\x7F]++/u', '', $s);
509 }
510
511 foreach (explode("\n", $s) as $s) {
512 if ($ignoreAnsiDecoration) {
513 $s = preg_replace('/(?:\x1B(?:
514 \[ [\x30-\x3F]*+ [\x20-\x2F]*+ [\x40-\x7E]
515 | [P\]X^_] .*? \x1B\\\\
516 | [\x41-\x7E]
517 )|[\p{Cc}\x7F]++)/xu', '', $s);
518 }
519
520 $lineWidth = $this->wcswidth($s);
521
522 if ($lineWidth > $width) {
523 $width = $lineWidth;
524 }
525 }
526
527 return $width;
528 }
529
530 private function pad(int $len, self $pad, int $type): static
531 {
532 $sLen = $this->length();
533
534 if ($len <= $sLen) {
535 return clone $this;
536 }
537
538 $padLen = $pad->length();
539 $freeLen = $len - $sLen;
540 $len = $freeLen % $padLen;
541
542 switch ($type) {
543 case \STR_PAD_RIGHT:
544 return $this->append(str_repeat($pad->string, intdiv($freeLen, $padLen)).($len ? $pad->slice(0, $len) : ''));
545
546 case \STR_PAD_LEFT:
547 return $this->prepend(str_repeat($pad->string, intdiv($freeLen, $padLen)).($len ? $pad->slice(0, $len) : ''));
548
549 case \STR_PAD_BOTH:
550 $freeLen /= 2;
551
552 $rightLen = ceil($freeLen);
553 $len = $rightLen % $padLen;
554 $str = $this->append(str_repeat($pad->string, intdiv($rightLen, $padLen)).($len ? $pad->slice(0, $len) : ''));
555
556 $leftLen = floor($freeLen);
557 $len = $leftLen % $padLen;
558
559 return $str->prepend(str_repeat($pad->string, intdiv($leftLen, $padLen)).($len ? $pad->slice(0, $len) : ''));
560
561 default:
562 throw new InvalidArgumentException('Invalid padding type.');
563 }
564 }
565
566 /**
567 * Based on https://github.com/jquast/wcwidth, a Python implementation of https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c.
568 */
569 private function wcswidth(string $string): int
570 {
571 $width = 0;
572
573 foreach (preg_split('//u', $string, -1, \PREG_SPLIT_NO_EMPTY) as $c) {
574 $codePoint = mb_ord($c, 'UTF-8');
575
576 if (0 === $codePoint // NULL
577 || 0x034F === $codePoint // COMBINING GRAPHEME JOINER
578 || (0x200B <= $codePoint && 0x200F >= $codePoint) // ZERO WIDTH SPACE to RIGHT-TO-LEFT MARK
579 || 0x2028 === $codePoint // LINE SEPARATOR
580 || 0x2029 === $codePoint // PARAGRAPH SEPARATOR
581 || (0x202A <= $codePoint && 0x202E >= $codePoint) // LEFT-TO-RIGHT EMBEDDING to RIGHT-TO-LEFT OVERRIDE
582 || (0x2060 <= $codePoint && 0x2063 >= $codePoint) // WORD JOINER to INVISIBLE SEPARATOR
583 ) {
584 continue;
585 }
586
587 // Non printable characters
588 if (32 > $codePoint // C0 control characters
589 || (0x07F <= $codePoint && 0x0A0 > $codePoint) // C1 control characters and DEL
590 ) {
591 return -1;
592 }
593
594 self::$tableZero ??= require __DIR__.'/Resources/data/wcswidth_table_zero.php';
595
596 if ($codePoint >= self::$tableZero[0][0] && $codePoint <= self::$tableZero[$ubound = \count(self::$tableZero) - 1][1]) {
597 $lbound = 0;
598 while ($ubound >= $lbound) {
599 $mid = floor(($lbound + $ubound) / 2);
600
601 if ($codePoint > self::$tableZero[$mid][1]) {
602 $lbound = $mid + 1;
603 } elseif ($codePoint < self::$tableZero[$mid][0]) {
604 $ubound = $mid - 1;
605 } else {
606 continue 2;
607 }
608 }
609 }
610
611 self::$tableWide ??= require __DIR__.'/Resources/data/wcswidth_table_wide.php';
612
613 if ($codePoint >= self::$tableWide[0][0] && $codePoint <= self::$tableWide[$ubound = \count(self::$tableWide) - 1][1]) {
614 $lbound = 0;
615 while ($ubound >= $lbound) {
616 $mid = floor(($lbound + $ubound) / 2);
617
618 if ($codePoint > self::$tableWide[$mid][1]) {
619 $lbound = $mid + 1;
620 } elseif ($codePoint < self::$tableWide[$mid][0]) {
621 $ubound = $mid - 1;
622 } else {
623 $width += 2;
624
625 continue 2;
626 }
627 }
628 }
629
630 ++$width;
631 }
632
633 return $width;
634 }
635
636 private function getLocaleTransliterator(string $locale, string $id): ?\Transliterator
637 {
638 $rule = $locale.'-'.$id;
639 if (\array_key_exists($rule, self::$transliterators)) {
640 return self::$transliterators[$rule];
641 }
642
643 if (null !== $transliterator = self::$transliterators[$rule] = \Transliterator::create($rule)) {
644 return $transliterator;
645 }
646
647 // Try to find a parent locale (nl_BE -> nl)
648 if (false === $i = strpos($locale, '_')) {
649 return null;
650 }
651
652 $parentRule = substr_replace($locale, '-'.$id, $i);
653
654 // Parent locale was already cached, return and store as current locale
655 if (\array_key_exists($parentRule, self::$transliterators)) {
656 return self::$transliterators[$rule] = self::$transliterators[$parentRule];
657 }
658
659 // Create transliterator based on parent locale and cache the result on both initial and parent locale values
660 $transliterator = \Transliterator::create($parentRule);
661
662 return self::$transliterators[$rule] = self::$transliterators[$parentRule] = $transliterator;
663 }
664}