| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218 |
- <?php
- /**
- * League.Uri (https://uri.thephpleague.com)
- *
- * (c) Ignace Nyamagana Butera <nyamsprod@gmail.com>
- *
- * For the full copyright and license information, please view the LICENSE
- * file that was distributed with this source code.
- */
- declare(strict_types=1);
- namespace League\Uri\Idna;
- use League\Uri\Exceptions\ConversionFailed;
- use League\Uri\Exceptions\SyntaxError;
- use League\Uri\FeatureDetection;
- use Stringable;
- use function idn_to_ascii;
- use function idn_to_utf8;
- use function rawurldecode;
- use const INTL_IDNA_VARIANT_UTS46;
- /**
- * @see https://unicode-org.github.io/icu-docs/apidoc/released/icu4c/uidna_8h.html
- */
- final class Converter
- {
- private const REGEXP_IDNA_PATTERN = '/[^\x20-\x7f]/';
- private const MAX_DOMAIN_LENGTH = 253;
- private const MAX_LABEL_LENGTH = 63;
- /**
- * General registered name regular expression.
- *
- * @see https://tools.ietf.org/html/rfc3986#section-3.2.2
- * @see https://regex101.com/r/fptU8V/1
- */
- private const REGEXP_REGISTERED_NAME = '/
- (?(DEFINE)
- (?<unreserved>[a-z0-9_~\-]) # . is missing as it is used to separate labels
- (?<sub_delims>[!$&\'()*+,;=])
- (?<encoded>%[A-F0-9]{2})
- (?<reg_name>(?:(?&unreserved)|(?&sub_delims)|(?&encoded))*)
- )
- ^(?:(?®_name)\.)*(?®_name)\.?$
- /ix';
- /**
- * Converts the input to its IDNA ASCII form or throw on failure.
- *
- * @see Converter::toAscii()
- *
- * @throws SyntaxError if the string cannot be converted to UNICODE using IDN UTS46 algorithm
- * @throws ConversionFailed if the conversion returns error
- */
- public static function toAsciiOrFail(Stringable|string $domain, Option|int|null $options = null): string
- {
- $result = self::toAscii($domain, $options);
- return match (true) {
- $result->hasErrors() => throw ConversionFailed::dueToIdnError($domain, $result),
- default => $result->domain(),
- };
- }
- /**
- * Converts the input to its IDNA ASCII form.
- *
- * This method returns the string converted to IDN ASCII form
- *
- * @throws SyntaxError if the string cannot be converted to ASCII using IDN UTS46 algorithm
- */
- public static function toAscii(Stringable|string $domain, Option|int|null $options = null): Result
- {
- $domain = rawurldecode((string) $domain);
- if (1 === preg_match(self::REGEXP_IDNA_PATTERN, $domain)) {
- FeatureDetection::supportsIdn();
- $flags = match (true) {
- null === $options => Option::forIDNA2008Ascii(),
- $options instanceof Option => $options,
- default => Option::new($options),
- };
- idn_to_ascii($domain, $flags->toBytes(), INTL_IDNA_VARIANT_UTS46, $idnaInfo);
- if ([] === $idnaInfo) {
- return Result::fromIntl([
- 'result' => strtolower($domain),
- 'isTransitionalDifferent' => false,
- 'errors' => self::validateDomainAndLabelLength($domain),
- ]);
- }
- return Result::fromIntl($idnaInfo);
- }
- $error = Error::NONE->value;
- if (1 !== preg_match(self::REGEXP_REGISTERED_NAME, $domain)) {
- $error |= Error::DISALLOWED->value;
- }
- return Result::fromIntl([
- 'result' => strtolower($domain),
- 'isTransitionalDifferent' => false,
- 'errors' => self::validateDomainAndLabelLength($domain) | $error,
- ]);
- }
- /**
- * Converts the input to its IDNA UNICODE form or throw on failure.
- *
- * @see Converter::toUnicode()
- *
- * @throws ConversionFailed if the conversion returns error
- */
- public static function toUnicodeOrFail(Stringable|string $domain, Option|int|null $options = null): string
- {
- $result = self::toUnicode($domain, $options);
- return match (true) {
- $result->hasErrors() => throw ConversionFailed::dueToIdnError($domain, $result),
- default => $result->domain(),
- };
- }
- /**
- * Converts the input to its IDNA UNICODE form.
- *
- * This method returns the string converted to IDN UNICODE form
- *
- * @throws SyntaxError if the string cannot be converted to UNICODE using IDN UTS46 algorithm
- */
- public static function toUnicode(Stringable|string $domain, Option|int|null $options = null): Result
- {
- $domain = rawurldecode((string) $domain);
- if (false === stripos($domain, 'xn--')) {
- return Result::fromIntl(['result' => $domain, 'isTransitionalDifferent' => false, 'errors' => Error::NONE->value]);
- }
- FeatureDetection::supportsIdn();
- $flags = match (true) {
- null === $options => Option::forIDNA2008Unicode(),
- $options instanceof Option => $options,
- default => Option::new($options),
- };
- idn_to_utf8($domain, $flags->toBytes(), INTL_IDNA_VARIANT_UTS46, $idnaInfo);
- if ([] === $idnaInfo) {
- return Result::fromIntl(['result' => $domain, 'isTransitionalDifferent' => false, 'errors' => Error::NONE->value]);
- }
- return Result::fromIntl($idnaInfo);
- }
- /**
- * Tells whether the submitted host is a valid IDN regardless of its format.
- *
- * Returns false if the host is invalid or if its conversion yield the same result
- */
- public static function isIdn(Stringable|string|null $domain): bool
- {
- $domain = strtolower(rawurldecode((string) $domain));
- $result = match (1) {
- preg_match(self::REGEXP_IDNA_PATTERN, $domain) => self::toAscii($domain),
- default => self::toUnicode($domain),
- };
- return match (true) {
- $result->hasErrors() => false,
- default => $result->domain() !== $domain,
- };
- }
- /**
- * Adapted from https://github.com/TRowbotham/idna.
- *
- * @see https://github.com/TRowbotham/idna/blob/master/src/Idna.php#L236
- */
- private static function validateDomainAndLabelLength(string $domain): int
- {
- $error = Error::NONE->value;
- $labels = explode('.', $domain);
- $maxDomainSize = self::MAX_DOMAIN_LENGTH;
- $length = count($labels);
- // If the last label is empty, and it is not the first label, then it is the root label.
- // Increase the max size by 1, making it 254, to account for the root label's "."
- // delimiter. This also means we don't need to check the last label's length for being too
- // long.
- if ($length > 1 && '' === $labels[$length - 1]) {
- ++$maxDomainSize;
- array_pop($labels);
- }
- if (strlen($domain) > $maxDomainSize) {
- $error |= Error::DOMAIN_NAME_TOO_LONG->value;
- }
- foreach ($labels as $label) {
- if (strlen($label) > self::MAX_LABEL_LENGTH) {
- $error |= Error::LABEL_TOO_LONG->value;
- break;
- }
- }
- return $error;
- }
- }
|