Converter.php 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218
  1. <?php
  2. /**
  3. * League.Uri (https://uri.thephpleague.com)
  4. *
  5. * (c) Ignace Nyamagana Butera <nyamsprod@gmail.com>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. declare(strict_types=1);
  11. namespace League\Uri\Idna;
  12. use League\Uri\Exceptions\ConversionFailed;
  13. use League\Uri\Exceptions\SyntaxError;
  14. use League\Uri\FeatureDetection;
  15. use Stringable;
  16. use function idn_to_ascii;
  17. use function idn_to_utf8;
  18. use function rawurldecode;
  19. use const INTL_IDNA_VARIANT_UTS46;
  20. /**
  21. * @see https://unicode-org.github.io/icu-docs/apidoc/released/icu4c/uidna_8h.html
  22. */
  23. final class Converter
  24. {
  25. private const REGEXP_IDNA_PATTERN = '/[^\x20-\x7f]/';
  26. private const MAX_DOMAIN_LENGTH = 253;
  27. private const MAX_LABEL_LENGTH = 63;
  28. /**
  29. * General registered name regular expression.
  30. *
  31. * @see https://tools.ietf.org/html/rfc3986#section-3.2.2
  32. * @see https://regex101.com/r/fptU8V/1
  33. */
  34. private const REGEXP_REGISTERED_NAME = '/
  35. (?(DEFINE)
  36. (?<unreserved>[a-z0-9_~\-]) # . is missing as it is used to separate labels
  37. (?<sub_delims>[!$&\'()*+,;=])
  38. (?<encoded>%[A-F0-9]{2})
  39. (?<reg_name>(?:(?&unreserved)|(?&sub_delims)|(?&encoded))*)
  40. )
  41. ^(?:(?&reg_name)\.)*(?&reg_name)\.?$
  42. /ix';
  43. /**
  44. * Converts the input to its IDNA ASCII form or throw on failure.
  45. *
  46. * @see Converter::toAscii()
  47. *
  48. * @throws SyntaxError if the string cannot be converted to UNICODE using IDN UTS46 algorithm
  49. * @throws ConversionFailed if the conversion returns error
  50. */
  51. public static function toAsciiOrFail(Stringable|string $domain, Option|int|null $options = null): string
  52. {
  53. $result = self::toAscii($domain, $options);
  54. return match (true) {
  55. $result->hasErrors() => throw ConversionFailed::dueToIdnError($domain, $result),
  56. default => $result->domain(),
  57. };
  58. }
  59. /**
  60. * Converts the input to its IDNA ASCII form.
  61. *
  62. * This method returns the string converted to IDN ASCII form
  63. *
  64. * @throws SyntaxError if the string cannot be converted to ASCII using IDN UTS46 algorithm
  65. */
  66. public static function toAscii(Stringable|string $domain, Option|int|null $options = null): Result
  67. {
  68. $domain = rawurldecode((string) $domain);
  69. if (1 === preg_match(self::REGEXP_IDNA_PATTERN, $domain)) {
  70. FeatureDetection::supportsIdn();
  71. $flags = match (true) {
  72. null === $options => Option::forIDNA2008Ascii(),
  73. $options instanceof Option => $options,
  74. default => Option::new($options),
  75. };
  76. idn_to_ascii($domain, $flags->toBytes(), INTL_IDNA_VARIANT_UTS46, $idnaInfo);
  77. if ([] === $idnaInfo) {
  78. return Result::fromIntl([
  79. 'result' => strtolower($domain),
  80. 'isTransitionalDifferent' => false,
  81. 'errors' => self::validateDomainAndLabelLength($domain),
  82. ]);
  83. }
  84. return Result::fromIntl($idnaInfo);
  85. }
  86. $error = Error::NONE->value;
  87. if (1 !== preg_match(self::REGEXP_REGISTERED_NAME, $domain)) {
  88. $error |= Error::DISALLOWED->value;
  89. }
  90. return Result::fromIntl([
  91. 'result' => strtolower($domain),
  92. 'isTransitionalDifferent' => false,
  93. 'errors' => self::validateDomainAndLabelLength($domain) | $error,
  94. ]);
  95. }
  96. /**
  97. * Converts the input to its IDNA UNICODE form or throw on failure.
  98. *
  99. * @see Converter::toUnicode()
  100. *
  101. * @throws ConversionFailed if the conversion returns error
  102. */
  103. public static function toUnicodeOrFail(Stringable|string $domain, Option|int|null $options = null): string
  104. {
  105. $result = self::toUnicode($domain, $options);
  106. return match (true) {
  107. $result->hasErrors() => throw ConversionFailed::dueToIdnError($domain, $result),
  108. default => $result->domain(),
  109. };
  110. }
  111. /**
  112. * Converts the input to its IDNA UNICODE form.
  113. *
  114. * This method returns the string converted to IDN UNICODE form
  115. *
  116. * @throws SyntaxError if the string cannot be converted to UNICODE using IDN UTS46 algorithm
  117. */
  118. public static function toUnicode(Stringable|string $domain, Option|int|null $options = null): Result
  119. {
  120. $domain = rawurldecode((string) $domain);
  121. if (false === stripos($domain, 'xn--')) {
  122. return Result::fromIntl(['result' => $domain, 'isTransitionalDifferent' => false, 'errors' => Error::NONE->value]);
  123. }
  124. FeatureDetection::supportsIdn();
  125. $flags = match (true) {
  126. null === $options => Option::forIDNA2008Unicode(),
  127. $options instanceof Option => $options,
  128. default => Option::new($options),
  129. };
  130. idn_to_utf8($domain, $flags->toBytes(), INTL_IDNA_VARIANT_UTS46, $idnaInfo);
  131. if ([] === $idnaInfo) {
  132. return Result::fromIntl(['result' => $domain, 'isTransitionalDifferent' => false, 'errors' => Error::NONE->value]);
  133. }
  134. return Result::fromIntl($idnaInfo);
  135. }
  136. /**
  137. * Tells whether the submitted host is a valid IDN regardless of its format.
  138. *
  139. * Returns false if the host is invalid or if its conversion yield the same result
  140. */
  141. public static function isIdn(Stringable|string|null $domain): bool
  142. {
  143. $domain = strtolower(rawurldecode((string) $domain));
  144. $result = match (1) {
  145. preg_match(self::REGEXP_IDNA_PATTERN, $domain) => self::toAscii($domain),
  146. default => self::toUnicode($domain),
  147. };
  148. return match (true) {
  149. $result->hasErrors() => false,
  150. default => $result->domain() !== $domain,
  151. };
  152. }
  153. /**
  154. * Adapted from https://github.com/TRowbotham/idna.
  155. *
  156. * @see https://github.com/TRowbotham/idna/blob/master/src/Idna.php#L236
  157. */
  158. private static function validateDomainAndLabelLength(string $domain): int
  159. {
  160. $error = Error::NONE->value;
  161. $labels = explode('.', $domain);
  162. $maxDomainSize = self::MAX_DOMAIN_LENGTH;
  163. $length = count($labels);
  164. // If the last label is empty, and it is not the first label, then it is the root label.
  165. // Increase the max size by 1, making it 254, to account for the root label's "."
  166. // delimiter. This also means we don't need to check the last label's length for being too
  167. // long.
  168. if ($length > 1 && '' === $labels[$length - 1]) {
  169. ++$maxDomainSize;
  170. array_pop($labels);
  171. }
  172. if (strlen($domain) > $maxDomainSize) {
  173. $error |= Error::DOMAIN_NAME_TOO_LONG->value;
  174. }
  175. foreach ($labels as $label) {
  176. if (strlen($label) > self::MAX_LABEL_LENGTH) {
  177. $error |= Error::LABEL_TOO_LONG->value;
  178. break;
  179. }
  180. }
  181. return $error;
  182. }
  183. }