| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513 |
- <?php
- /**
- * League.Uri (https://uri.thephpleague.com)
- *
- * (c) Ignace Nyamagana Butera <nyamsprod@gmail.com>
- *
- * For the full copyright and license information, please view the LICENSE
- * file that was distributed with this source code.
- */
- declare(strict_types=1);
- namespace League\Uri;
- use League\Uri\Exceptions\ConversionFailed;
- use League\Uri\Exceptions\MissingFeature;
- use League\Uri\Exceptions\SyntaxError;
- use League\Uri\Idna\Converter;
- use Stringable;
- use function array_merge;
- use function explode;
- use function filter_var;
- use function inet_pton;
- use function preg_match;
- use function rawurldecode;
- use function sprintf;
- use function strpos;
- use function substr;
- use const FILTER_FLAG_IPV6;
- use const FILTER_VALIDATE_IP;
- /**
- * A class to parse a URI string according to RFC3986.
- *
- * @link https://tools.ietf.org/html/rfc3986
- * @package League\Uri
- * @author Ignace Nyamagana Butera <nyamsprod@gmail.com>
- * @since 6.0.0
- *
- * @phpstan-type AuthorityMap array{user:?string, pass:?string, host:?string, port:?int}
- * @phpstan-type ComponentMap array{scheme:?string, user:?string, pass:?string, host:?string, port:?int, path:string, query:?string, fragment:?string}
- * @phpstan-type InputComponentMap array{scheme? : ?string, user? : ?string, pass? : ?string, host? : ?string, port? : ?int, path? : ?string, query? : ?string, fragment? : ?string}
- */
- final class UriString
- {
- /**
- * Default URI component values.
- *
- * @var ComponentMap
- */
- private const URI_COMPONENTS = [
- 'scheme' => null, 'user' => null, 'pass' => null, 'host' => null,
- 'port' => null, 'path' => '', 'query' => null, 'fragment' => null,
- ];
- /**
- * Simple URI which do not need any parsing.
- *
- * @var array<string, array<string>>
- */
- private const URI_SHORTCUTS = [
- '' => [],
- '#' => ['fragment' => ''],
- '?' => ['query' => ''],
- '?#' => ['query' => '', 'fragment' => ''],
- '/' => ['path' => '/'],
- '//' => ['host' => ''],
- ];
- /**
- * Range of invalid characters in URI string.
- *
- * @var string
- */
- private const REGEXP_INVALID_URI_CHARS = '/[\x00-\x1f\x7f]/';
- /**
- * RFC3986 regular expression URI splitter.
- *
- * @link https://tools.ietf.org/html/rfc3986#appendix-B
- * @var string
- */
- private const REGEXP_URI_PARTS = ',^
- (?<scheme>(?<scontent>[^:/?\#]+):)? # URI scheme component
- (?<authority>//(?<acontent>[^/?\#]*))? # URI authority part
- (?<path>[^?\#]*) # URI path component
- (?<query>\?(?<qcontent>[^\#]*))? # URI query component
- (?<fragment>\#(?<fcontent>.*))? # URI fragment component
- ,x';
- /**
- * URI scheme regular expression.
- *
- * @link https://tools.ietf.org/html/rfc3986#section-3.1
- * @var string
- */
- private const REGEXP_URI_SCHEME = '/^([a-z][a-z\d+.-]*)?$/i';
- /**
- * IPvFuture regular expression.
- *
- * @link https://tools.ietf.org/html/rfc3986#section-3.2.2
- * @var string
- */
- private const REGEXP_IP_FUTURE = '/^
- v(?<version>[A-F0-9])+\.
- (?:
- (?<unreserved>[a-z0-9_~\-\.])|
- (?<sub_delims>[!$&\'()*+,;=:]) # also include the : character
- )+
- $/ix';
- /**
- * General registered name regular expression.
- *
- * @link https://tools.ietf.org/html/rfc3986#section-3.2.2
- * @var string
- */
- private const REGEXP_REGISTERED_NAME = '/(?(DEFINE)
- (?<unreserved>[a-z0-9_~\-]) # . is missing as it is used to separate labels
- (?<sub_delims>[!$&\'()*+,;=])
- (?<encoded>%[A-F0-9]{2})
- (?<reg_name>(?:(?&unreserved)|(?&sub_delims)|(?&encoded))*)
- )
- ^(?:(?®_name)\.)*(?®_name)\.?$/ix';
- /**
- * Invalid characters in host regular expression.
- *
- * @link https://tools.ietf.org/html/rfc3986#section-3.2.2
- * @var string
- */
- private const REGEXP_INVALID_HOST_CHARS = '/
- [:\/?#\[\]@ ] # gen-delims characters as well as the space character
- /ix';
- /**
- * Invalid path for URI without scheme and authority regular expression.
- *
- * @link https://tools.ietf.org/html/rfc3986#section-3.3
- * @var string
- */
- private const REGEXP_INVALID_PATH = ',^(([^/]*):)(.*)?/,';
- /**
- * Host and Port splitter regular expression.
- *
- * @var string
- */
- private const REGEXP_HOST_PORT = ',^(?<host>\[.*\]|[^:]*)(:(?<port>.*))?$,';
- /**
- * IDN Host detector regular expression.
- *
- * @var string
- */
- private const REGEXP_IDN_PATTERN = '/[^\x20-\x7f]/';
- /**
- * Only the address block fe80::/10 can have a Zone ID attach to
- * let's detect the link local significant 10 bits.
- *
- * @var string
- */
- private const ZONE_ID_ADDRESS_BLOCK = "\xfe\x80";
- /**
- * Maximum number of host cached.
- *
- * @var int
- */
- private const MAXIMUM_HOST_CACHED = 100;
- /**
- * Generate a URI string representation from its parsed representation
- * returned by League\UriString::parse() or PHP's parse_url.
- *
- * If you supply your own array, you are responsible for providing
- * valid components without their URI delimiters.
- *
- * @link https://tools.ietf.org/html/rfc3986#section-5.3
- * @link https://tools.ietf.org/html/rfc3986#section-7.5
- *
- * @param InputComponentMap $components
- */
- public static function build(array $components): string
- {
- return self::buildUri(
- $components['scheme'] ?? null,
- self::buildAuthority($components),
- $components['path'] ?? '',
- $components['query'] ?? null,
- $components['fragment'] ?? null,
- );
- }
- /**
- * Generate a URI string representation based on RFC3986 algorithm.
- *
- * valid URI component MUST be provided without their URI delimiters
- * but properly encoded.
- *
- * @link https://tools.ietf.org/html/rfc3986#section-5.3
- * @link https://tools.ietf.org/html/rfc3986#section-7.5
- */
- public static function buildUri(
- ?string $scheme,
- ?string $authority,
- string $path,
- ?string $query,
- ?string $fragment,
- ): string {
- $uri = '';
- if (null !== $scheme) {
- $uri .= $scheme.':';
- }
- if (null !== $authority) {
- $uri .= '//'.$authority;
- }
- $uri .= $path;
- if (null !== $query) {
- $uri .= '?'.$query;
- }
- if (null !== $fragment) {
- $uri .= '#'.$fragment;
- }
- return $uri;
- }
- /**
- * Generate a URI authority representation from its parsed representation.
- *
- * @param InputComponentMap $components
- */
- public static function buildAuthority(array $components): ?string
- {
- if (!isset($components['host'])) {
- return null;
- }
- $authority = $components['host'];
- if (isset($components['port'])) {
- $authority .= ':'.$components['port'];
- }
- if (!isset($components['user'])) {
- return $authority;
- }
- $authority = '@'.$authority;
- if (!isset($components['pass'])) {
- return $components['user'].$authority;
- }
- return $components['user'].':'.$components['pass'].$authority;
- }
- /**
- * Parse a URI string into its components.
- *
- * This method parses a URI and returns an associative array containing any
- * of the various components of the URI that are present.
- *
- * <code>
- * $components = UriString::parse('http://foo@test.example.com:42?query#');
- * var_export($components);
- * //will display
- * array(
- * 'scheme' => 'http', // the URI scheme component
- * 'user' => 'foo', // the URI user component
- * 'pass' => null, // the URI pass component
- * 'host' => 'test.example.com', // the URI host component
- * 'port' => 42, // the URI port component
- * 'path' => '', // the URI path component
- * 'query' => 'query', // the URI query component
- * 'fragment' => '', // the URI fragment component
- * );
- * </code>
- *
- * The returned array is similar to PHP's parse_url return value with the following
- * differences:
- *
- * <ul>
- * <li>All components are always present in the returned array</li>
- * <li>Empty and undefined component are treated differently. And empty component is
- * set to the empty string while an undefined component is set to the `null` value.</li>
- * <li>The path component is never undefined</li>
- * <li>The method parses the URI following the RFC3986 rules, but you are still
- * required to validate the returned components against its related scheme specific rules.</li>
- * </ul>
- *
- * @link https://tools.ietf.org/html/rfc3986
- *
- * @throws SyntaxError if the URI contains invalid characters
- * @throws SyntaxError if the URI contains an invalid scheme
- * @throws SyntaxError if the URI contains an invalid path
- *
- * @return ComponentMap
- */
- public static function parse(Stringable|string|int $uri): array
- {
- $uri = (string) $uri;
- if (isset(self::URI_SHORTCUTS[$uri])) {
- /** @var ComponentMap $components */
- $components = array_merge(self::URI_COMPONENTS, self::URI_SHORTCUTS[$uri]);
- return $components;
- }
- if (1 === preg_match(self::REGEXP_INVALID_URI_CHARS, $uri)) {
- throw new SyntaxError(sprintf('The uri `%s` contains invalid characters', $uri));
- }
- //if the first character is a known URI delimiter parsing can be simplified
- $first_char = $uri[0];
- //The URI is made of the fragment only
- if ('#' === $first_char) {
- [, $fragment] = explode('#', $uri, 2);
- $components = self::URI_COMPONENTS;
- $components['fragment'] = $fragment;
- return $components;
- }
- //The URI is made of the query and fragment
- if ('?' === $first_char) {
- [, $partial] = explode('?', $uri, 2);
- [$query, $fragment] = explode('#', $partial, 2) + [1 => null];
- $components = self::URI_COMPONENTS;
- $components['query'] = $query;
- $components['fragment'] = $fragment;
- return $components;
- }
- //use RFC3986 URI regexp to split the URI
- preg_match(self::REGEXP_URI_PARTS, $uri, $parts);
- $parts += ['query' => '', 'fragment' => ''];
- if (':' === ($parts['scheme'] ?? null) || 1 !== preg_match(self::REGEXP_URI_SCHEME, $parts['scontent'] ?? '')) {
- throw new SyntaxError(sprintf('The uri `%s` contains an invalid scheme', $uri));
- }
- if ('' === ($parts['scheme'] ?? '').($parts['authority'] ?? '') && 1 === preg_match(self::REGEXP_INVALID_PATH, $parts['path'] ?? '')) {
- throw new SyntaxError(sprintf('The uri `%s` contains an invalid path.', $uri));
- }
- /** @var ComponentMap $components */
- $components = array_merge(
- self::URI_COMPONENTS,
- '' === ($parts['authority'] ?? null) ? [] : self::parseAuthority($parts['acontent'] ?? null),
- [
- 'path' => $parts['path'] ?? '',
- 'scheme' => '' === ($parts['scheme'] ?? null) ? null : ($parts['scontent'] ?? null),
- 'query' => '' === $parts['query'] ? null : ($parts['qcontent'] ?? null),
- 'fragment' => '' === $parts['fragment'] ? null : ($parts['fcontent'] ?? null),
- ]
- );
- return $components;
- }
- /**
- * Parses the URI authority part.
- *
- * @link https://tools.ietf.org/html/rfc3986#section-3.2
- *
- * @throws SyntaxError If the port component is invalid
- *
- * @return AuthorityMap
- */
- public static function parseAuthority(Stringable|string|null $authority): array
- {
- $components = ['user' => null, 'pass' => null, 'host' => null, 'port' => null];
- if (null === $authority) {
- return $components;
- }
- $authority = (string) $authority;
- $components['host'] = '';
- if ('' === $authority) {
- return $components;
- }
- $parts = explode('@', $authority, 2);
- if (isset($parts[1])) {
- [$components['user'], $components['pass']] = explode(':', $parts[0], 2) + [1 => null];
- }
- preg_match(self::REGEXP_HOST_PORT, $parts[1] ?? $parts[0], $matches);
- $matches += ['port' => ''];
- $components['port'] = self::filterPort($matches['port']);
- $components['host'] = self::filterHost($matches['host'] ?? '');
- return $components;
- }
- /**
- * Filter and format the port component.
- *
- * @link https://tools.ietf.org/html/rfc3986#section-3.2.2
- *
- * @throws SyntaxError if the registered name is invalid
- */
- private static function filterPort(string $port): ?int
- {
- return match (true) {
- '' === $port => null,
- 1 === preg_match('/^\d*$/', $port) => (int) $port,
- default => throw new SyntaxError(sprintf('The port `%s` is invalid', $port)),
- };
- }
- /**
- * Returns whether a hostname is valid.
- *
- * @link https://tools.ietf.org/html/rfc3986#section-3.2.2
- *
- * @throws SyntaxError if the registered name is invalid
- */
- private static function filterHost(string $host): string
- {
- if ('' === $host) {
- return $host;
- }
- /** @var array<string, 1> $hostCache */
- static $hostCache = [];
- if (isset($hostCache[$host])) {
- return $host;
- }
- if (self::MAXIMUM_HOST_CACHED < count($hostCache)) {
- array_shift($hostCache);
- }
- if ('[' !== $host[0] || !str_ends_with($host, ']')) {
- self::filterRegisteredName($host);
- $hostCache[$host] = 1;
- return $host;
- }
- if (self::isIpHost(substr($host, 1, -1))) {
- $hostCache[$host] = 1;
- return $host;
- }
- throw new SyntaxError(sprintf('Host `%s` is invalid : the IP host is malformed', $host));
- }
- /**
- * Throws if the host is not a registered name and not a valid IDN host.
- *
- * @link https://tools.ietf.org/html/rfc3986#section-3.2.2
- *
- * @throws SyntaxError if the registered name is invalid
- * @throws MissingFeature if IDN support or ICU requirement are not available or met.
- * @throws ConversionFailed if the submitted IDN host cannot be converted to a valid ascii form
- */
- private static function filterRegisteredName(string $host): void
- {
- $formattedHost = rawurldecode($host);
- if (1 === preg_match(self::REGEXP_REGISTERED_NAME, $formattedHost)) {
- return;
- }
- //to test IDN host non-ascii characters must be present in the host
- if (1 !== preg_match(self::REGEXP_IDN_PATTERN, $formattedHost)) {
- throw new SyntaxError(sprintf('Host `%s` is invalid: the host is not a valid registered name', $host));
- }
- Converter::toAsciiOrFail($host);
- }
- /**
- * Validates a IPv6/IPfuture host.
- *
- * @link https://tools.ietf.org/html/rfc3986#section-3.2.2
- * @link https://tools.ietf.org/html/rfc6874#section-2
- * @link https://tools.ietf.org/html/rfc6874#section-4
- */
- private static function isIpHost(string $ipHost): bool
- {
- if (false !== filter_var($ipHost, FILTER_VALIDATE_IP, FILTER_FLAG_IPV6)) {
- return true;
- }
- if (1 === preg_match(self::REGEXP_IP_FUTURE, $ipHost, $matches)) {
- return !in_array($matches['version'], ['4', '6'], true);
- }
- $pos = strpos($ipHost, '%');
- if (false === $pos || 1 === preg_match(self::REGEXP_INVALID_HOST_CHARS, rawurldecode(substr($ipHost, $pos)))) {
- return false;
- }
- $ipHost = substr($ipHost, 0, $pos);
- return false !== filter_var($ipHost, FILTER_VALIDATE_IP, FILTER_FLAG_IPV6)
- && str_starts_with((string)inet_pton($ipHost), self::ZONE_ID_ADDRESS_BLOCK);
- }
- }
|