summaryrefslogtreecommitdiff
path: root/vendor/symfony/string/UnicodeString.php
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/symfony/string/UnicodeString.php')
-rw-r--r--vendor/symfony/string/UnicodeString.php382
1 files changed, 382 insertions, 0 deletions
diff --git a/vendor/symfony/string/UnicodeString.php b/vendor/symfony/string/UnicodeString.php
new file mode 100644
index 0000000..4b16caf
--- /dev/null
+++ b/vendor/symfony/string/UnicodeString.php
@@ -0,0 +1,382 @@
1<?php
2
3/*
4 * This file is part of the Symfony package.
5 *
6 * (c) Fabien Potencier <fabien@symfony.com>
7 *
8 * For the full copyright and license information, please view the LICENSE
9 * file that was distributed with this source code.
10 */
11
12namespace Symfony\Component\String;
13
14use Symfony\Component\String\Exception\ExceptionInterface;
15use Symfony\Component\String\Exception\InvalidArgumentException;
16
17/**
18 * Represents a string of Unicode grapheme clusters encoded as UTF-8.
19 *
20 * A letter followed by combining characters (accents typically) form what Unicode defines
21 * as a grapheme cluster: a character as humans mean it in written texts. This class knows
22 * about the concept and won't split a letter apart from its combining accents. It also
23 * ensures all string comparisons happen on their canonically-composed representation,
24 * ignoring e.g. the order in which accents are listed when a letter has many of them.
25 *
26 * @see https://unicode.org/reports/tr15/
27 *
28 * @author Nicolas Grekas <p@tchwork.com>
29 * @author Hugo Hamon <hugohamon@neuf.fr>
30 *
31 * @throws ExceptionInterface
32 */
33class UnicodeString extends AbstractUnicodeString
34{
35 public function __construct(string $string = '')
36 {
37 if ('' === $string || normalizer_is_normalized($this->string = $string)) {
38 return;
39 }
40
41 if (false === $string = normalizer_normalize($string)) {
42 throw new InvalidArgumentException('Invalid UTF-8 string.');
43 }
44
45 $this->string = $string;
46 }
47
48 public function append(string ...$suffix): static
49 {
50 $str = clone $this;
51 $str->string = $this->string.(1 >= \count($suffix) ? ($suffix[0] ?? '') : implode('', $suffix));
52
53 if (normalizer_is_normalized($str->string)) {
54 return $str;
55 }
56
57 if (false === $string = normalizer_normalize($str->string)) {
58 throw new InvalidArgumentException('Invalid UTF-8 string.');
59 }
60
61 $str->string = $string;
62
63 return $str;
64 }
65
66 public function chunk(int $length = 1): array
67 {
68 if (1 > $length) {
69 throw new InvalidArgumentException('The chunk length must be greater than zero.');
70 }
71
72 if ('' === $this->string) {
73 return [];
74 }
75
76 $rx = '/(';
77 while (65535 < $length) {
78 $rx .= '\X{65535}';
79 $length -= 65535;
80 }
81 $rx .= '\X{'.$length.'})/u';
82
83 $str = clone $this;
84 $chunks = [];
85
86 foreach (preg_split($rx, $this->string, -1, \PREG_SPLIT_DELIM_CAPTURE | \PREG_SPLIT_NO_EMPTY) as $chunk) {
87 $str->string = $chunk;
88 $chunks[] = clone $str;
89 }
90
91 return $chunks;
92 }
93
94 public function endsWith(string|iterable|AbstractString $suffix): bool
95 {
96 if ($suffix instanceof AbstractString) {
97 $suffix = $suffix->string;
98 } elseif (!\is_string($suffix)) {
99 return parent::endsWith($suffix);
100 }
101
102 $form = null === $this->ignoreCase ? \Normalizer::NFD : \Normalizer::NFC;
103 normalizer_is_normalized($suffix, $form) ?: $suffix = normalizer_normalize($suffix, $form);
104
105 if ('' === $suffix || false === $suffix) {
106 return false;
107 }
108
109 if ($this->ignoreCase) {
110 return 0 === mb_stripos(grapheme_extract($this->string, \strlen($suffix), \GRAPHEME_EXTR_MAXBYTES, \strlen($this->string) - \strlen($suffix)), $suffix, 0, 'UTF-8');
111 }
112
113 return $suffix === grapheme_extract($this->string, \strlen($suffix), \GRAPHEME_EXTR_MAXBYTES, \strlen($this->string) - \strlen($suffix));
114 }
115
116 public function equalsTo(string|iterable|AbstractString $string): bool
117 {
118 if ($string instanceof AbstractString) {
119 $string = $string->string;
120 } elseif (!\is_string($string)) {
121 return parent::equalsTo($string);
122 }
123
124 $form = null === $this->ignoreCase ? \Normalizer::NFD : \Normalizer::NFC;
125 normalizer_is_normalized($string, $form) ?: $string = normalizer_normalize($string, $form);
126
127 if ('' !== $string && false !== $string && $this->ignoreCase) {
128 return \strlen($string) === \strlen($this->string) && 0 === mb_stripos($this->string, $string, 0, 'UTF-8');
129 }
130
131 return $string === $this->string;
132 }
133
134 public function indexOf(string|iterable|AbstractString $needle, int $offset = 0): ?int
135 {
136 if ($needle instanceof AbstractString) {
137 $needle = $needle->string;
138 } elseif (!\is_string($needle)) {
139 return parent::indexOf($needle, $offset);
140 }
141
142 $form = null === $this->ignoreCase ? \Normalizer::NFD : \Normalizer::NFC;
143 normalizer_is_normalized($needle, $form) ?: $needle = normalizer_normalize($needle, $form);
144
145 if ('' === $needle || false === $needle) {
146 return null;
147 }
148
149 try {
150 $i = $this->ignoreCase ? grapheme_stripos($this->string, $needle, $offset) : grapheme_strpos($this->string, $needle, $offset);
151 } catch (\ValueError) {
152 return null;
153 }
154
155 return false === $i ? null : $i;
156 }
157
158 public function indexOfLast(string|iterable|AbstractString $needle, int $offset = 0): ?int
159 {
160 if ($needle instanceof AbstractString) {
161 $needle = $needle->string;
162 } elseif (!\is_string($needle)) {
163 return parent::indexOfLast($needle, $offset);
164 }
165
166 $form = null === $this->ignoreCase ? \Normalizer::NFD : \Normalizer::NFC;
167 normalizer_is_normalized($needle, $form) ?: $needle = normalizer_normalize($needle, $form);
168
169 if ('' === $needle || false === $needle) {
170 return null;
171 }
172
173 $string = $this->string;
174
175 if (0 > $offset) {
176 // workaround https://bugs.php.net/74264
177 if (0 > $offset += grapheme_strlen($needle)) {
178 $string = grapheme_substr($string, 0, $offset);
179 }
180 $offset = 0;
181 }
182
183 $i = $this->ignoreCase ? grapheme_strripos($string, $needle, $offset) : grapheme_strrpos($string, $needle, $offset);
184
185 return false === $i ? null : $i;
186 }
187
188 public function join(array $strings, ?string $lastGlue = null): static
189 {
190 $str = parent::join($strings, $lastGlue);
191 normalizer_is_normalized($str->string) ?: $str->string = normalizer_normalize($str->string);
192
193 return $str;
194 }
195
196 public function length(): int
197 {
198 return grapheme_strlen($this->string);
199 }
200
201 public function normalize(int $form = self::NFC): static
202 {
203 $str = clone $this;
204
205 if (\in_array($form, [self::NFC, self::NFKC], true)) {
206 normalizer_is_normalized($str->string, $form) ?: $str->string = normalizer_normalize($str->string, $form);
207 } elseif (!\in_array($form, [self::NFD, self::NFKD], true)) {
208 throw new InvalidArgumentException('Unsupported normalization form.');
209 } elseif (!normalizer_is_normalized($str->string, $form)) {
210 $str->string = normalizer_normalize($str->string, $form);
211 $str->ignoreCase = null;
212 }
213
214 return $str;
215 }
216
217 public function prepend(string ...$prefix): static
218 {
219 $str = clone $this;
220 $str->string = (1 >= \count($prefix) ? ($prefix[0] ?? '') : implode('', $prefix)).$this->string;
221
222 if (normalizer_is_normalized($str->string)) {
223 return $str;
224 }
225
226 if (false === $string = normalizer_normalize($str->string)) {
227 throw new InvalidArgumentException('Invalid UTF-8 string.');
228 }
229
230 $str->string = $string;
231
232 return $str;
233 }
234
235 public function replace(string $from, string $to): static
236 {
237 $str = clone $this;
238 normalizer_is_normalized($from) ?: $from = normalizer_normalize($from);
239
240 if ('' !== $from && false !== $from) {
241 $tail = $str->string;
242 $result = '';
243 $indexOf = $this->ignoreCase ? 'grapheme_stripos' : 'grapheme_strpos';
244
245 while ('' !== $tail && false !== $i = $indexOf($tail, $from)) {
246 $slice = grapheme_substr($tail, 0, $i);
247 $result .= $slice.$to;
248 $tail = substr($tail, \strlen($slice) + \strlen($from));
249 }
250
251 $str->string = $result.$tail;
252
253 if (normalizer_is_normalized($str->string)) {
254 return $str;
255 }
256
257 if (false === $string = normalizer_normalize($str->string)) {
258 throw new InvalidArgumentException('Invalid UTF-8 string.');
259 }
260
261 $str->string = $string;
262 }
263
264 return $str;
265 }
266
267 public function replaceMatches(string $fromRegexp, string|callable $to): static
268 {
269 $str = parent::replaceMatches($fromRegexp, $to);
270 normalizer_is_normalized($str->string) ?: $str->string = normalizer_normalize($str->string);
271
272 return $str;
273 }
274
275 public function slice(int $start = 0, ?int $length = null): static
276 {
277 $str = clone $this;
278
279 $str->string = (string) grapheme_substr($this->string, $start, $length ?? 2147483647);
280
281 return $str;
282 }
283
284 public function splice(string $replacement, int $start = 0, ?int $length = null): static
285 {
286 $str = clone $this;
287
288 $start = $start ? \strlen(grapheme_substr($this->string, 0, $start)) : 0;
289 $length = $length ? \strlen(grapheme_substr($this->string, $start, $length ?? 2147483647)) : $length;
290 $str->string = substr_replace($this->string, $replacement, $start, $length ?? 2147483647);
291
292 if (normalizer_is_normalized($str->string)) {
293 return $str;
294 }
295
296 if (false === $string = normalizer_normalize($str->string)) {
297 throw new InvalidArgumentException('Invalid UTF-8 string.');
298 }
299
300 $str->string = $string;
301
302 return $str;
303 }
304
305 public function split(string $delimiter, ?int $limit = null, ?int $flags = null): array
306 {
307 if (1 > $limit ??= 2147483647) {
308 throw new InvalidArgumentException('Split limit must be a positive integer.');
309 }
310
311 if ('' === $delimiter) {
312 throw new InvalidArgumentException('Split delimiter is empty.');
313 }
314
315 if (null !== $flags) {
316 return parent::split($delimiter.'u', $limit, $flags);
317 }
318
319 normalizer_is_normalized($delimiter) ?: $delimiter = normalizer_normalize($delimiter);
320
321 if (false === $delimiter) {
322 throw new InvalidArgumentException('Split delimiter is not a valid UTF-8 string.');
323 }
324
325 $str = clone $this;
326 $tail = $this->string;
327 $chunks = [];
328 $indexOf = $this->ignoreCase ? 'grapheme_stripos' : 'grapheme_strpos';
329
330 while (1 < $limit && false !== $i = $indexOf($tail, $delimiter)) {
331 $str->string = grapheme_substr($tail, 0, $i);
332 $chunks[] = clone $str;
333 $tail = substr($tail, \strlen($str->string) + \strlen($delimiter));
334 --$limit;
335 }
336
337 $str->string = $tail;
338 $chunks[] = clone $str;
339
340 return $chunks;
341 }
342
343 public function startsWith(string|iterable|AbstractString $prefix): bool
344 {
345 if ($prefix instanceof AbstractString) {
346 $prefix = $prefix->string;
347 } elseif (!\is_string($prefix)) {
348 return parent::startsWith($prefix);
349 }
350
351 $form = null === $this->ignoreCase ? \Normalizer::NFD : \Normalizer::NFC;
352 normalizer_is_normalized($prefix, $form) ?: $prefix = normalizer_normalize($prefix, $form);
353
354 if ('' === $prefix || false === $prefix) {
355 return false;
356 }
357
358 if ($this->ignoreCase) {
359 return 0 === mb_stripos(grapheme_extract($this->string, \strlen($prefix), \GRAPHEME_EXTR_MAXBYTES), $prefix, 0, 'UTF-8');
360 }
361
362 return $prefix === grapheme_extract($this->string, \strlen($prefix), \GRAPHEME_EXTR_MAXBYTES);
363 }
364
365 public function __wakeup(): void
366 {
367 if (!\is_string($this->string)) {
368 throw new \BadMethodCallException('Cannot unserialize '.__CLASS__);
369 }
370
371 normalizer_is_normalized($this->string) ?: $this->string = normalizer_normalize($this->string);
372 }
373
374 public function __clone()
375 {
376 if (null === $this->ignoreCase) {
377 normalizer_is_normalized($this->string) ?: $this->string = normalizer_normalize($this->string);
378 }
379
380 $this->ignoreCase = false;
381 }
382}