AbstractString.php 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718
  1. <?php
  2. /*
  3. * This file is part of the Symfony package.
  4. *
  5. * (c) Fabien Potencier <fabien@symfony.com>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. namespace Symfony\Component\String;
  11. use Symfony\Component\String\Exception\ExceptionInterface;
  12. use Symfony\Component\String\Exception\InvalidArgumentException;
  13. use Symfony\Component\String\Exception\RuntimeException;
  14. /**
  15. * Represents a string of abstract characters.
  16. *
  17. * Unicode defines 3 types of "characters" (bytes, code points and grapheme clusters).
  18. * This class is the abstract type to use as a type-hint when the logic you want to
  19. * implement doesn't care about the exact variant it deals with.
  20. *
  21. * @author Nicolas Grekas <p@tchwork.com>
  22. * @author Hugo Hamon <hugohamon@neuf.fr>
  23. *
  24. * @throws ExceptionInterface
  25. */
  26. abstract class AbstractString implements \Stringable, \JsonSerializable
  27. {
  28. public const PREG_PATTERN_ORDER = \PREG_PATTERN_ORDER;
  29. public const PREG_SET_ORDER = \PREG_SET_ORDER;
  30. public const PREG_OFFSET_CAPTURE = \PREG_OFFSET_CAPTURE;
  31. public const PREG_UNMATCHED_AS_NULL = \PREG_UNMATCHED_AS_NULL;
  32. public const PREG_SPLIT = 0;
  33. public const PREG_SPLIT_NO_EMPTY = \PREG_SPLIT_NO_EMPTY;
  34. public const PREG_SPLIT_DELIM_CAPTURE = \PREG_SPLIT_DELIM_CAPTURE;
  35. public const PREG_SPLIT_OFFSET_CAPTURE = \PREG_SPLIT_OFFSET_CAPTURE;
  36. protected string $string = '';
  37. protected ?bool $ignoreCase = false;
  38. abstract public function __construct(string $string = '');
  39. /**
  40. * Unwraps instances of AbstractString back to strings.
  41. *
  42. * @return string[]|array
  43. */
  44. public static function unwrap(array $values): array
  45. {
  46. foreach ($values as $k => $v) {
  47. if ($v instanceof self) {
  48. $values[$k] = $v->__toString();
  49. } elseif (\is_array($v) && $values[$k] !== $v = static::unwrap($v)) {
  50. $values[$k] = $v;
  51. }
  52. }
  53. return $values;
  54. }
  55. /**
  56. * Wraps (and normalizes) strings in instances of AbstractString.
  57. *
  58. * @return static[]|array
  59. */
  60. public static function wrap(array $values): array
  61. {
  62. $i = 0;
  63. $keys = null;
  64. foreach ($values as $k => $v) {
  65. if (\is_string($k) && '' !== $k && $k !== $j = (string) new static($k)) {
  66. $keys ??= array_keys($values);
  67. $keys[$i] = $j;
  68. }
  69. if (\is_string($v)) {
  70. $values[$k] = new static($v);
  71. } elseif (\is_array($v) && $values[$k] !== $v = static::wrap($v)) {
  72. $values[$k] = $v;
  73. }
  74. ++$i;
  75. }
  76. return null !== $keys ? array_combine($keys, $values) : $values;
  77. }
  78. /**
  79. * @param string|string[] $needle
  80. */
  81. public function after(string|iterable $needle, bool $includeNeedle = false, int $offset = 0): static
  82. {
  83. $str = clone $this;
  84. $i = \PHP_INT_MAX;
  85. if (\is_string($needle)) {
  86. $needle = [$needle];
  87. }
  88. foreach ($needle as $n) {
  89. $n = (string) $n;
  90. $j = $this->indexOf($n, $offset);
  91. if (null !== $j && $j < $i) {
  92. $i = $j;
  93. $str->string = $n;
  94. }
  95. }
  96. if (\PHP_INT_MAX === $i) {
  97. return $str;
  98. }
  99. if (!$includeNeedle) {
  100. $i += $str->length();
  101. }
  102. return $this->slice($i);
  103. }
  104. /**
  105. * @param string|string[] $needle
  106. */
  107. public function afterLast(string|iterable $needle, bool $includeNeedle = false, int $offset = 0): static
  108. {
  109. $str = clone $this;
  110. $i = null;
  111. if (\is_string($needle)) {
  112. $needle = [$needle];
  113. }
  114. foreach ($needle as $n) {
  115. $n = (string) $n;
  116. $j = $this->indexOfLast($n, $offset);
  117. if (null !== $j && $j >= $i) {
  118. $i = $offset = $j;
  119. $str->string = $n;
  120. }
  121. }
  122. if (null === $i) {
  123. return $str;
  124. }
  125. if (!$includeNeedle) {
  126. $i += $str->length();
  127. }
  128. return $this->slice($i);
  129. }
  130. abstract public function append(string ...$suffix): static;
  131. /**
  132. * @param string|string[] $needle
  133. */
  134. public function before(string|iterable $needle, bool $includeNeedle = false, int $offset = 0): static
  135. {
  136. $str = clone $this;
  137. $i = \PHP_INT_MAX;
  138. if (\is_string($needle)) {
  139. $needle = [$needle];
  140. }
  141. foreach ($needle as $n) {
  142. $n = (string) $n;
  143. $j = $this->indexOf($n, $offset);
  144. if (null !== $j && $j < $i) {
  145. $i = $j;
  146. $str->string = $n;
  147. }
  148. }
  149. if (\PHP_INT_MAX === $i) {
  150. return $str;
  151. }
  152. if ($includeNeedle) {
  153. $i += $str->length();
  154. }
  155. return $this->slice(0, $i);
  156. }
  157. /**
  158. * @param string|string[] $needle
  159. */
  160. public function beforeLast(string|iterable $needle, bool $includeNeedle = false, int $offset = 0): static
  161. {
  162. $str = clone $this;
  163. $i = null;
  164. if (\is_string($needle)) {
  165. $needle = [$needle];
  166. }
  167. foreach ($needle as $n) {
  168. $n = (string) $n;
  169. $j = $this->indexOfLast($n, $offset);
  170. if (null !== $j && $j >= $i) {
  171. $i = $offset = $j;
  172. $str->string = $n;
  173. }
  174. }
  175. if (null === $i) {
  176. return $str;
  177. }
  178. if ($includeNeedle) {
  179. $i += $str->length();
  180. }
  181. return $this->slice(0, $i);
  182. }
  183. /**
  184. * @return int[]
  185. */
  186. public function bytesAt(int $offset): array
  187. {
  188. $str = $this->slice($offset, 1);
  189. return '' === $str->string ? [] : array_values(unpack('C*', $str->string));
  190. }
  191. abstract public function camel(): static;
  192. /**
  193. * @return static[]
  194. */
  195. abstract public function chunk(int $length = 1): array;
  196. public function collapseWhitespace(): static
  197. {
  198. $str = clone $this;
  199. $str->string = trim(preg_replace("/(?:[ \n\r\t\x0C]{2,}+|[\n\r\t\x0C])/", ' ', $str->string), " \n\r\t\x0C");
  200. return $str;
  201. }
  202. /**
  203. * @param string|string[] $needle
  204. */
  205. public function containsAny(string|iterable $needle): bool
  206. {
  207. return null !== $this->indexOf($needle);
  208. }
  209. /**
  210. * @param string|string[] $suffix
  211. */
  212. public function endsWith(string|iterable $suffix): bool
  213. {
  214. if (\is_string($suffix)) {
  215. throw new \TypeError(\sprintf('Method "%s()" must be overridden by class "%s" to deal with non-iterable values.', __FUNCTION__, static::class));
  216. }
  217. foreach ($suffix as $s) {
  218. if ($this->endsWith((string) $s)) {
  219. return true;
  220. }
  221. }
  222. return false;
  223. }
  224. public function ensureEnd(string $suffix): static
  225. {
  226. if (!$this->endsWith($suffix)) {
  227. return $this->append($suffix);
  228. }
  229. $suffix = preg_quote($suffix);
  230. $regex = '{('.$suffix.')(?:'.$suffix.')++$}D';
  231. return $this->replaceMatches($regex.($this->ignoreCase ? 'i' : ''), '$1');
  232. }
  233. public function ensureStart(string $prefix): static
  234. {
  235. $prefix = new static($prefix);
  236. if (!$this->startsWith($prefix)) {
  237. return $this->prepend($prefix);
  238. }
  239. $str = clone $this;
  240. $i = $prefixLen = $prefix->length();
  241. while ($this->indexOf($prefix, $i) === $i) {
  242. $str = $str->slice($prefixLen);
  243. $i += $prefixLen;
  244. }
  245. return $str;
  246. }
  247. /**
  248. * @param string|string[] $string
  249. */
  250. public function equalsTo(string|iterable $string): bool
  251. {
  252. if (\is_string($string)) {
  253. throw new \TypeError(\sprintf('Method "%s()" must be overridden by class "%s" to deal with non-iterable values.', __FUNCTION__, static::class));
  254. }
  255. foreach ($string as $s) {
  256. if ($this->equalsTo((string) $s)) {
  257. return true;
  258. }
  259. }
  260. return false;
  261. }
  262. abstract public function folded(): static;
  263. public function ignoreCase(): static
  264. {
  265. $str = clone $this;
  266. $str->ignoreCase = true;
  267. return $str;
  268. }
  269. /**
  270. * @param string|string[] $needle
  271. */
  272. public function indexOf(string|iterable $needle, int $offset = 0): ?int
  273. {
  274. if (\is_string($needle)) {
  275. throw new \TypeError(\sprintf('Method "%s()" must be overridden by class "%s" to deal with non-iterable values.', __FUNCTION__, static::class));
  276. }
  277. $i = \PHP_INT_MAX;
  278. foreach ($needle as $n) {
  279. $j = $this->indexOf((string) $n, $offset);
  280. if (null !== $j && $j < $i) {
  281. $i = $j;
  282. }
  283. }
  284. return \PHP_INT_MAX === $i ? null : $i;
  285. }
  286. /**
  287. * @param string|string[] $needle
  288. */
  289. public function indexOfLast(string|iterable $needle, int $offset = 0): ?int
  290. {
  291. if (\is_string($needle)) {
  292. throw new \TypeError(\sprintf('Method "%s()" must be overridden by class "%s" to deal with non-iterable values.', __FUNCTION__, static::class));
  293. }
  294. $i = null;
  295. foreach ($needle as $n) {
  296. $j = $this->indexOfLast((string) $n, $offset);
  297. if (null !== $j && $j >= $i) {
  298. $i = $offset = $j;
  299. }
  300. }
  301. return $i;
  302. }
  303. public function isEmpty(): bool
  304. {
  305. return '' === $this->string;
  306. }
  307. abstract public function join(array $strings, ?string $lastGlue = null): static;
  308. public function jsonSerialize(): string
  309. {
  310. return $this->string;
  311. }
  312. abstract public function length(): int;
  313. abstract public function lower(): static;
  314. /**
  315. * Matches the string using a regular expression.
  316. *
  317. * Pass PREG_PATTERN_ORDER or PREG_SET_ORDER as $flags to get all occurrences matching the regular expression.
  318. *
  319. * @return array All matches in a multi-dimensional array ordered according to flags
  320. */
  321. abstract public function match(string $regexp, int $flags = 0, int $offset = 0): array;
  322. abstract public function padBoth(int $length, string $padStr = ' '): static;
  323. abstract public function padEnd(int $length, string $padStr = ' '): static;
  324. abstract public function padStart(int $length, string $padStr = ' '): static;
  325. abstract public function prepend(string ...$prefix): static;
  326. public function repeat(int $multiplier): static
  327. {
  328. if (0 > $multiplier) {
  329. throw new InvalidArgumentException(\sprintf('Multiplier must be positive, %d given.', $multiplier));
  330. }
  331. $str = clone $this;
  332. $str->string = str_repeat($str->string, $multiplier);
  333. return $str;
  334. }
  335. abstract public function replace(string $from, string $to): static;
  336. abstract public function replaceMatches(string $fromRegexp, string|callable $to): static;
  337. abstract public function reverse(): static;
  338. abstract public function slice(int $start = 0, ?int $length = null): static;
  339. abstract public function snake(): static;
  340. public function kebab(): static
  341. {
  342. return $this->snake()->replace('_', '-');
  343. }
  344. abstract public function splice(string $replacement, int $start = 0, ?int $length = null): static;
  345. /**
  346. * @return static[]
  347. */
  348. public function split(string $delimiter, ?int $limit = null, ?int $flags = null): array
  349. {
  350. if (null === $flags) {
  351. throw new \TypeError('Split behavior when $flags is null must be implemented by child classes.');
  352. }
  353. if ($this->ignoreCase) {
  354. $delimiter .= 'i';
  355. }
  356. set_error_handler(static fn ($t, $m) => throw new InvalidArgumentException($m));
  357. try {
  358. if (false === $chunks = preg_split($delimiter, $this->string, $limit, $flags)) {
  359. throw new RuntimeException('Splitting failed with error: '.preg_last_error_msg());
  360. }
  361. } finally {
  362. restore_error_handler();
  363. }
  364. $str = clone $this;
  365. if (self::PREG_SPLIT_OFFSET_CAPTURE & $flags) {
  366. foreach ($chunks as &$chunk) {
  367. $str->string = $chunk[0];
  368. $chunk[0] = clone $str;
  369. }
  370. } else {
  371. foreach ($chunks as &$chunk) {
  372. $str->string = $chunk;
  373. $chunk = clone $str;
  374. }
  375. }
  376. return $chunks;
  377. }
  378. /**
  379. * @param string|string[] $prefix
  380. */
  381. public function startsWith(string|iterable $prefix): bool
  382. {
  383. if (\is_string($prefix)) {
  384. throw new \TypeError(\sprintf('Method "%s()" must be overridden by class "%s" to deal with non-iterable values.', __FUNCTION__, static::class));
  385. }
  386. foreach ($prefix as $prefix) {
  387. if ($this->startsWith((string) $prefix)) {
  388. return true;
  389. }
  390. }
  391. return false;
  392. }
  393. abstract public function title(bool $allWords = false): static;
  394. public function toByteString(?string $toEncoding = null): ByteString
  395. {
  396. $b = new ByteString();
  397. $toEncoding = \in_array($toEncoding, ['utf8', 'utf-8', 'UTF8'], true) ? 'UTF-8' : $toEncoding;
  398. if (null === $toEncoding || $toEncoding === $fromEncoding = $this instanceof AbstractUnicodeString || preg_match('//u', $b->string) ? 'UTF-8' : 'Windows-1252') {
  399. $b->string = $this->string;
  400. return $b;
  401. }
  402. try {
  403. $b->string = mb_convert_encoding($this->string, $toEncoding, 'UTF-8');
  404. } catch (\ValueError $e) {
  405. if (!\function_exists('iconv')) {
  406. throw new InvalidArgumentException($e->getMessage(), $e->getCode(), $e);
  407. }
  408. $b->string = iconv('UTF-8', $toEncoding, $this->string);
  409. }
  410. return $b;
  411. }
  412. public function toCodePointString(): CodePointString
  413. {
  414. return new CodePointString($this->string);
  415. }
  416. public function toString(): string
  417. {
  418. return $this->string;
  419. }
  420. public function toUnicodeString(): UnicodeString
  421. {
  422. return new UnicodeString($this->string);
  423. }
  424. abstract public function trim(string $chars = " \t\n\r\0\x0B\x0C\u{A0}\u{FEFF}"): static;
  425. abstract public function trimEnd(string $chars = " \t\n\r\0\x0B\x0C\u{A0}\u{FEFF}"): static;
  426. /**
  427. * @param string|string[] $prefix
  428. */
  429. public function trimPrefix($prefix): static
  430. {
  431. if (\is_array($prefix) || $prefix instanceof \Traversable) { // don't use is_iterable(), it's slow
  432. foreach ($prefix as $s) {
  433. $t = $this->trimPrefix($s);
  434. if ($t->string !== $this->string) {
  435. return $t;
  436. }
  437. }
  438. return clone $this;
  439. }
  440. $str = clone $this;
  441. if ($prefix instanceof self) {
  442. $prefix = $prefix->string;
  443. } else {
  444. $prefix = (string) $prefix;
  445. }
  446. if ('' !== $prefix && \strlen($this->string) >= \strlen($prefix) && 0 === substr_compare($this->string, $prefix, 0, \strlen($prefix), $this->ignoreCase)) {
  447. $str->string = substr($this->string, \strlen($prefix));
  448. }
  449. return $str;
  450. }
  451. abstract public function trimStart(string $chars = " \t\n\r\0\x0B\x0C\u{A0}\u{FEFF}"): static;
  452. /**
  453. * @param string|string[] $suffix
  454. */
  455. public function trimSuffix($suffix): static
  456. {
  457. if (\is_array($suffix) || $suffix instanceof \Traversable) { // don't use is_iterable(), it's slow
  458. foreach ($suffix as $s) {
  459. $t = $this->trimSuffix($s);
  460. if ($t->string !== $this->string) {
  461. return $t;
  462. }
  463. }
  464. return clone $this;
  465. }
  466. $str = clone $this;
  467. if ($suffix instanceof self) {
  468. $suffix = $suffix->string;
  469. } else {
  470. $suffix = (string) $suffix;
  471. }
  472. if ('' !== $suffix && \strlen($this->string) >= \strlen($suffix) && 0 === substr_compare($this->string, $suffix, -\strlen($suffix), null, $this->ignoreCase)) {
  473. $str->string = substr($this->string, 0, -\strlen($suffix));
  474. }
  475. return $str;
  476. }
  477. public function truncate(int $length, string $ellipsis = '', bool|TruncateMode $cut = TruncateMode::Char): static
  478. {
  479. $stringLength = $this->length();
  480. if ($stringLength <= $length) {
  481. return clone $this;
  482. }
  483. $ellipsisLength = '' !== $ellipsis ? (new static($ellipsis))->length() : 0;
  484. if ($length < $ellipsisLength) {
  485. $ellipsisLength = 0;
  486. }
  487. $desiredLength = $length;
  488. if (TruncateMode::WordAfter === $cut || !$cut) {
  489. if (null === $length = $this->indexOf([' ', "\r", "\n", "\t"], ($length ?: 1) - 1)) {
  490. return clone $this;
  491. }
  492. $length += $ellipsisLength;
  493. } elseif (TruncateMode::WordBefore === $cut && null !== $this->indexOf([' ', "\r", "\n", "\t"], ($length ?: 1) - 1)) {
  494. $length += $ellipsisLength;
  495. }
  496. $str = $this->slice(0, $length - $ellipsisLength);
  497. if (TruncateMode::WordBefore === $cut) {
  498. if (0 === $ellipsisLength && $desiredLength === $this->indexOf([' ', "\r", "\n", "\t"], $length)) {
  499. return $str;
  500. }
  501. $str = $str->beforeLast([' ', "\r", "\n", "\t"]);
  502. }
  503. return $ellipsisLength ? $str->trimEnd()->append($ellipsis) : $str;
  504. }
  505. abstract public function upper(): static;
  506. /**
  507. * Returns the printable length on a terminal.
  508. */
  509. abstract public function width(bool $ignoreAnsiDecoration = true): int;
  510. public function wordwrap(int $width = 75, string $break = "\n", bool $cut = false): static
  511. {
  512. $lines = '' !== $break ? $this->split($break) : [clone $this];
  513. $chars = [];
  514. $mask = '';
  515. if (1 === \count($lines) && '' === $lines[0]->string) {
  516. return $lines[0];
  517. }
  518. foreach ($lines as $i => $line) {
  519. if ($i) {
  520. $chars[] = $break;
  521. $mask .= '#';
  522. }
  523. foreach ($line->chunk() as $char) {
  524. $chars[] = $char->string;
  525. $mask .= ' ' === $char->string ? ' ' : '?';
  526. }
  527. }
  528. $string = '';
  529. $j = 0;
  530. $b = $i = -1;
  531. $mask = wordwrap($mask, $width, '#', $cut);
  532. while (false !== $b = strpos($mask, '#', $b + 1)) {
  533. for (++$i; $i < $b; ++$i) {
  534. $string .= $chars[$j];
  535. unset($chars[$j++]);
  536. }
  537. if ($break === $chars[$j] || ' ' === $chars[$j]) {
  538. unset($chars[$j++]);
  539. }
  540. $string .= $break;
  541. }
  542. $str = clone $this;
  543. $str->string = $string.implode('', $chars);
  544. return $str;
  545. }
  546. public function __sleep(): array
  547. {
  548. return ['string'];
  549. }
  550. public function __clone()
  551. {
  552. $this->ignoreCase = false;
  553. }
  554. public function __toString(): string
  555. {
  556. return $this->string;
  557. }
  558. }