From 9c40e3808833eb5d142b89bffc0cb467124a21f2 Mon Sep 17 00:00:00 2001 From: alandefreitas Date: Wed, 17 Aug 2022 04:22:26 -0300 Subject: [PATCH] relative fix #407 --- doc/qbk/quickref.xml | 1 + include/boost/url/authority_view.hpp | 175 +++++++++++++++ include/boost/url/impl/authority_view.ipp | 59 ++++++ include/boost/url/impl/url.ipp | 247 ++++++++++++++++++++++ include/boost/url/impl/url_view_base.ipp | 64 +++--- include/boost/url/url.hpp | 166 ++++++++++++++- test/unit/url.cpp | 106 ++++++++++ 7 files changed, 788 insertions(+), 30 deletions(-) diff --git a/doc/qbk/quickref.xml b/doc/qbk/quickref.xml index 592d4fdf7..49c1abc30 100644 --- a/doc/qbk/quickref.xml +++ b/doc/qbk/quickref.xml @@ -42,6 +42,7 @@ parse_relative_ref parse_uri parse_uri_reference + relative resolve diff --git a/include/boost/url/authority_view.hpp b/include/boost/url/authority_view.hpp index 66033fd17..a044ffdf7 100644 --- a/include/boost/url/authority_view.hpp +++ b/include/boost/url/authority_view.hpp @@ -1187,6 +1187,181 @@ class BOOST_SYMBOL_VISIBLE encoded_host_and_port() const noexcept; //-------------------------------------------- + // + // Comparison + // + //-------------------------------------------- + + /** Return the result of comparing this with another authority + + This function compares two authorities + according to Syntax-Based comparison + algorithm. + + @par Exception Safety + Throws nothing. + + @return -1 if `*this < other`, 0 if + `this == other`, and 1 if `this > other`. + + @par Specification + @li 6.2.2 Syntax-Based Normalization (rfc3986) + */ + BOOST_URL_DECL + int + compare(authority_view const& other) const noexcept; + + /** Return the result of comparing two authorities + + The authorities are compared component + by component as if they were first + normalized. + + @par Complexity + Linear in `min( a0.size(), a1.size() )` + + @par Exception Safety + Throws nothing + */ + friend + bool + operator==( + authority_view const& a0, + authority_view const& a1) noexcept + { + return a0.compare(a1) == 0; + } + + /** Return the result of comparing two authorities + + The authorities are compared component + by component as if they were first + normalized. + + @par Effects + @code + return url( a0 ).normalize() != url( a1 ).normalize(); + @endcode + + @par Complexity + Linear in `min( a0.size(), a1.size() )` + + @par Exception Safety + Throws nothing + */ + friend + bool + operator!=( + authority_view const& a0, + authority_view const& a1) noexcept + { + return ! (a0 == a1); + } + + /** Return the result of comparing two authorities + + The authorities are compared component + by component as if they were first + normalized. + + @par Effects + @code + return url( a0 ).normalize() < url( a1 ).normalize(); + @endcode + + @par Complexity + Linear in `min( a0.size(), a1.size() )` + + @par Exception Safety + Throws nothing + */ + friend + bool + operator<( + authority_view const& a0, + authority_view const& a1) noexcept + { + return a0.compare(a1) < 0; + } + + /** Return the result of comparing two authorities + + The authorities are compared component + by component as if they were first + normalized. + + @par Effects + @code + return url( a0 ).normalize() <= url( a1 ).normalize(); + @endcode + + @par Complexity + Linear in `min( a0.size(), a1.size() )` + + @par Exception Safety + Throws nothing + */ + friend + bool + operator<=( + authority_view const& a0, + authority_view const& a1) noexcept + { + return a0.compare(a1) <= 0; + } + + /** Return the result of comparing two authorities + + The authorities are compared component + by component as if they were first + normalized. + + @par Effects + @code + return url( a0 ).normalize() > url( a1 ).normalize(); + @endcode + + @par Complexity + Linear in `min( a0.size(), a1.size() )` + + @par Exception Safety + Throws nothing + */ + friend + bool + operator>( + authority_view const& a0, + authority_view const& a1) noexcept + { + return a0.compare(a1) > 0; + } + + /** Return the result of comparing two authorities + + The authorities are compared component + by component as if they were first + normalized. + + @par Effects + @code + return url( a0 ).normalize() >= url( a1 ).normalize(); + @endcode + + @par Complexity + Linear in `min( a0.size(), a1.size() )` + + @par Exception Safety + Throws nothing + */ + friend + bool + operator>=( + authority_view const& a0, + authority_view const& a1) noexcept + { + return a0.compare(a1) >= 0; + } // hidden friend friend diff --git a/include/boost/url/impl/authority_view.ipp b/include/boost/url/impl/authority_view.ipp index 48becf456..c429b2af1 100644 --- a/include/boost/url/impl/authority_view.ipp +++ b/include/boost/url/impl/authority_view.ipp @@ -387,6 +387,65 @@ parse_authority( return grammar::parse(s, authority_rule); } +//------------------------------------------------ +// +// Comparisons +// +//------------------------------------------------ + +int +authority_view:: +compare(const authority_view& other) const noexcept +{ + int comp = has_userinfo() - other.has_userinfo(); + if ( comp != 0 ) + return comp; + + if (has_userinfo()) + { + comp = detail::compare_encoded( + encoded_user(), + other.encoded_user()); + if ( comp != 0 ) + return comp; + + comp = has_password() - other.has_password(); + if ( comp != 0 ) + return comp; + + if (has_password()) + { + comp = detail::compare_encoded( + encoded_password(), + other.encoded_password()); + if ( comp != 0 ) + return comp; + } + } + + comp = detail::ci_compare_encoded( + encoded_host(), + other.encoded_host()); + if ( comp != 0 ) + return comp; + + comp = has_port() - other.has_port(); + if ( comp != 0 ) + return comp; + + if (has_port()) + { + comp = detail::compare( + port(), + other.port()); + if ( comp != 0 ) + return comp; + } + + return 0; +} + + } // urls } // boost diff --git a/include/boost/url/impl/url.ipp b/include/boost/url/impl/url.ipp index 131eeb1a1..395af4e5d 100644 --- a/include/boost/url/impl/url.ipp +++ b/include/boost/url/impl/url.ipp @@ -140,6 +140,253 @@ reserve_impl( u_.cs_ = s_; } +result +relative( + url_view_base const& base, + url_view_base const& href, + url_base& dest) +{ + // AFREITAS: + // - filesystem functions use + // (const path&, const path& base) + // - this function name is probably still bad + // - the function in URI.js behaves slightly + // differently from their own examples + // see https://medialize.github.io/URI.js/docs.html#relativeto + // - we have a bug where u.set_path("") or + // u.segments() = {} crash, so this function + // uses u.segments() = {"."}, which gives + // us slightly different results. + // - The basic exception guarantee is not + // satisfied in case of allocation errors. + + BOOST_ASSERT(&dest != &base); + BOOST_ASSERT(&dest != &href); + + // Validate input + if (!href.is_path_absolute()) + { + // href is already relative + return error::not_a_base; + } + if (!base.is_path_absolute()) + { + // cannot calculate a URI relative to another relative URI + return error::not_a_base; + } + + // Resolve scheme + if (href.scheme() == base.scheme() || + !href.has_scheme()) + dest.remove_scheme(); + else + dest.set_scheme(href.scheme()); + + // Resolve authority + if (dest.has_scheme() || + href.has_authority() != base.has_authority() || + href.authority() != base.authority() || + href.has_userinfo() || + href.has_password()) + { + // Otherwise, copy all but scheme from href + if (href.has_authority()) + dest.set_encoded_authority(href.encoded_authority()); + else + dest.remove_authority(); + dest.set_encoded_path(href.encoded_path()); + dest.normalize_path(); + if (href.has_query()) + dest.set_encoded_query(href.encoded_query()); + else + dest.remove_query(); + if (href.has_fragment()) + dest.set_encoded_fragment(href.encoded_fragment()); + else + dest.remove_fragment(); + return {}; + } + dest.remove_authority(); + + // Resolve path + // 0. Get segments + auto segs0 = base.segments(); + auto segs1 = href.segments(); + auto begin0 = segs0.begin(); + auto it0 = begin0; + auto end0 = segs0.end(); + auto last0 = begin0 != end0 ? std::prev(end0) : end0; + auto begin1 = segs1.begin(); + auto it1 = begin1; + auto end1 = segs1.end(); + auto last1 = begin0 != end1 ? std::prev(end1) : end1; + pct_encoded_view const dotdot(".."); + pct_encoded_view const dot("."); + + // 1. Find the longest common path + while ( + it0 != last0 && + it1 != last1) + { + if (*it0 == *it1) + { + ++it0; + ++it1; + } + else if (*it0 == dot) + { + ++it0; + } + else if (*it1 == dot) + { + ++it1; + } + else if (*it0 == dotdot) + { + ++it0; + if (it1 != begin1) + --it1; + } + else if (*it1 == dotdot) + { + if (it0 != begin0) + --it0; + ++it1; + } + else + { + // Check if *it0 will be consumed by a dotdot + auto it2 = std::next(it0); + std::size_t l = 1; + while (it2 != last0) + { + if (*it2 == dotdot) + { + if (--l == 0) + { + ++it2; + it0 = it2; + break; + } + } + else if (*it2 != dot) + { + ++l; + } + ++it2; + } + if (it0 == it2) + continue; + + // Check if *it1 will be consumed by a dotdot + auto it3 = std::next(it1); + l = 1; + while (it3 != last1) + { + if (*it3 == dotdot) + { + if (--l == 0) + { + ++it3; + it1 = it3; + break; + } + } + else if (*it3 != dot) + { + ++l; + } + ++it3; + } + if (it1 == it3) + continue; + + break; + } + } + + // 1.b Check if paths are the same + if (it0 == last0 && + it1 == last1 && + it0 != end0 && + it1 != end1 && + *it0 == *it1) + { + // Return empty path + dest.segments() = {dot.encoded()}; + if (href.has_query()) + dest.set_encoded_query(href.encoded_query()); + else + dest.remove_query(); + if (href.has_fragment()) + dest.set_encoded_fragment(href.encoded_fragment()); + else + dest.remove_fragment(); + return {}; + } + + // 2. replace each path component in the + // base path with ../ + segments_encoded segs = dest.encoded_segments(); + segs = {dot.encoded()}; + if (it0 != end0) + { + dest.set_path_absolute(false); + auto last0 = std::prev(end0); + while (it0 != last0) + { + if (*it0 == dotdot) + { + if (segs.size() > 1) + segs.pop_back(); + else + segs = {dot.encoded()}; + } + else if (*it0 != dot) + { + if (dest.path() == dot) + segs = {dotdot.encoded()}; + else + segs.push_back(dotdot.encoded()); + } + ++it0; + } + } + + // 3. Append the reference path + while (it1 != end1) + { + if (*it1 == dotdot) + { + if (segs.size() > 1) + segs.pop_back(); + else + segs = {dot.encoded()}; + } + else if (*it1 != dot) + { + string_view v = (*it1).encoded(); + if (dest.path() == dot) + segs = {v}; + else + segs.push_back(v); + } + ++it1; + } + if (href.has_query()) + dest.set_encoded_query(href.encoded_query()); + else + dest.remove_query(); + + if (href.has_fragment()) + dest.set_encoded_fragment(href.encoded_fragment()); + else + dest.remove_fragment(); + + return {}; +} + + void url:: cleanup( diff --git a/include/boost/url/impl/url_view_base.ipp b/include/boost/url/impl/url_view_base.ipp index fcc66b3d5..2ff7de2b1 100644 --- a/include/boost/url/impl/url_view_base.ipp +++ b/include/boost/url/impl/url_view_base.ipp @@ -673,35 +673,29 @@ int url_view_base:: compare(const url_view_base& other) const noexcept { - int comp = detail::ci_compare( - scheme(), - other.scheme()); + int comp = has_scheme() - other.has_scheme(); if ( comp != 0 ) return comp; - comp = detail::compare_encoded( - encoded_user(), - other.encoded_user()); - if ( comp != 0 ) - return comp; - - comp = detail::compare_encoded( - encoded_password(), - other.encoded_password()); - if ( comp != 0 ) - return comp; + if (has_scheme()) + { + comp = detail::ci_compare( + scheme(), + other.scheme()); + if ( comp != 0 ) + return comp; + } - comp = detail::ci_compare_encoded( - encoded_host(), - other.encoded_host()); + comp = has_authority() - other.has_authority(); if ( comp != 0 ) return comp; - comp = detail::compare( - port(), - other.port()); - if ( comp != 0 ) - return comp; + if (has_authority()) + { + comp = authority().compare(other.authority()); + if ( comp != 0 ) + return comp; + } comp = detail::normalized_path_compare( encoded_path(), @@ -711,18 +705,32 @@ compare(const url_view_base& other) const noexcept if ( comp != 0 ) return comp; - comp = detail::compare_encoded( - encoded_query(), - other.encoded_query()); + comp = has_query() - other.has_query(); if ( comp != 0 ) return comp; - comp = detail::compare_encoded( - encoded_fragment(), - other.encoded_fragment()); + if (has_query()) + { + comp = detail::compare_encoded( + encoded_query(), + other.encoded_query()); + if ( comp != 0 ) + return comp; + } + + comp = has_fragment() - other.has_fragment(); if ( comp != 0 ) return comp; + if (has_fragment()) + { + comp = detail::compare_encoded( + encoded_fragment(), + other.encoded_fragment()); + if ( comp != 0 ) + return comp; + } + return 0; } diff --git a/include/boost/url/url.hpp b/include/boost/url/url.hpp index 6b491ba29..4f87b9ae1 100644 --- a/include/boost/url/url.hpp +++ b/include/boost/url/url.hpp @@ -69,7 +69,7 @@ class BOOST_SYMBOL_VISIBLE url /** Return the maximum number of characters possible This represents the largest number of - characters that are possible in a + characters that are possible in a Currently the limit is either 2^32-2 characters or 2^64-2 characters, depending on the system architecture. @@ -311,7 +311,6 @@ class BOOST_SYMBOL_VISIBLE url v0.swap(v1); } - private: char* allocate(std::size_t); void deallocate(char* s); @@ -321,6 +320,169 @@ class BOOST_SYMBOL_VISIBLE url BOOST_URL_DECL void cleanup(op_t&) override; }; +//---------------------------------------------------------- + +/** Resolve a URL reference against a base URL + + This function attempts to resolve a URL + reference `ref` against the base URL `base` + in a manner similar to that of a web browser + resolving an anchor tag. The base URL + must satisfy the absolute-URI + grammar. + + Relative references are only usable when + in the context of a base absolute URI. + This process of resolving a relative + reference within the context of + a base URI is defined in detail + in rfc3986 (see below). + + The resolution process works as if the + relative reference is appended to the base + URI and the result is normalized. + + Given the input base URL, this function + resolves the relative reference + as if performing the following steps: + + @li Ensure the base URI has at least a scheme + @li Normalizing the reference path + @li Merge base and reference paths + @li Normalize the merged path + + This function places the result of the + resolution into `dest`, which can be + any of the url containers that inherit + from @ref url_base. + + If an error occurs, the contents of + `dest` is unspecified and `ec` is set. + + @par Example + @code + url dest; + error_code ec; + + resolve("/one/two/three", "four", dest, ec); + assert( dest.str() == "/one/two/four" ); + + resolve("http://example.com/", "/one", dest, ec); + assert( dest.str() == "http://example.com/one" ); + + resolve("http://example.com/one", "/two", dest, ec); + assert( dest.str() == "http://example.com/two" ); + + resolve("http://a/b/c/d;p?q", "g#s", dest, ec); + assert( dest.str() == "http://a/b/c/g#s" ); + @endcode + + @par BNF + @code + absolute-URI = scheme ":" hier-part [ "?" query ] + @endcode + + @par Exception Safety + Basic guarantee. + Calls to allocate may throw. + + @return Error if any occurred + + @param base The base URL to resolve against. + + @param ref The URL reference to resolve. + + @param dest The container where the result + is written, upon success. + + @par Specification + 5. Reference Resolution (rfc3986) + + @see + @ref url, + @ref url_view. +*/ +inline +result +resolve( + url_view_base const& base, + url_view_base const& ref, + url_base& dest) +{ + BOOST_ASSERT(&dest != &base); + BOOST_ASSERT(&dest != &ref); + return dest.resolve_impl(base, ref); +} + +/** Compares two absolute paths and make one relative to the other + + This function compares the absolute paths in + two urls. It returns a new url with a relative + path that references the target path relative + to the base path. + + Unlike @ref resolve, this function takes + two absolute paths to create a relative + path. + + If the input URLs contain schemes and + authorities, these are resolved. If the + schemes and authorities are the same, they + are removed before the relative path is + calculated. If they are different, only + the relative path of the reference URL + is returned. + + Given the input base URL, this function + resolves the reference URL as if performing + the following steps: + + @li Normalize both URLs + @li Remove the longest common path from both paths + @li Replace each segment in the base path with ".." + @li Append the reference path + + This function places the result of the + resolution into `dest`, which can be + any of the url containers that inherit + from @ref url_base. + + If an error occurs, the contents of + `dest` is unspecified and `ec` is set. + + @par Example + @code + url dest; + error_code ec; + relative("/relative/sub/foo/sub/file", "/relative/path", dest, ec); + assert( dest.str() == "../../../path" ); + @endcode + + @par Exception Safety + Basic guarantee. + Calls to allocate may throw. + + @param base The base URL to resolve against. + + @param href The target URL the relative URL should point to + + @param dest The container where the relative result + is written, upon success. + + @param ec Set to the error if any occurred. + + @see + @ref url, + @ref url_view. +*/ +BOOST_URL_DECL +result +relative( + url_view_base const& base, + url_view_base const& href, + url_base& dest); + } // urls } // boost diff --git a/test/unit/url.cpp b/test/unit/url.cpp index cb140991c..cf9ab2519 100644 --- a/test/unit/url.cpp +++ b/test/unit/url.cpp @@ -810,6 +810,111 @@ struct url_test check("g#s/../x" , "http://a/b/c/g#s/../x"); } + void + testRelative() + { + auto const check = []( + string_view b, + string_view r, + string_view e) + { + auto ub = + parse_uri_reference(b).value(); + auto ur = + parse_uri_reference(r).value(); + url dest = parse_uri_reference("x/y" ).value(); + auto rv = relative(ub, ur, dest); + BOOST_TEST(!rv.has_error()); + BOOST_TEST_EQ(dest.string(), e); + }; + + // relative URL / absolute paths + check("/a/path/to/somewhere/else", "/a/path/to/a", "../a"); + check("/relative/sub/foo/sub/file", "/relative/path", "../../../path"); + check("http://google.com/baz", "http://example.org/world.html", "//example.org/world.html"); + check("http://google.com/baz", "http:/world.html", "/world.html"); + // AFREITAS: The paths below should be "" but set_path("") and segments() = {} crash + // "." is still equivalent + check("http://www.example.com:8080/dir/file", "http://www.example.com:8080/dir/file", "."); + check("http://www.example.com:8080/dir/file", "http://www.example.com:8080/dir/file?foo=bar#abcd", ".?foo=bar#abcd"); + // "." should be ignored + check("/a/path/././to/./somewhere/else", "/a/./path/./to/a", "../a"); + // ".." should be normalized + check("/a/path/x/../to/y/../somewhere/else", "/b/../a/path/to/a", "../a"); + // same parent + check("/relative/file?some=query#hash", "/relative/path?blubber=1#hash1", "path?blubber=1#hash1"); + // direct parent + check("/relative/sub/file?some=query#hash", "/relative/path?blubber=1#hash1", "../path?blubber=1#hash1"); + // second parent + check("/relative/sub/sub/file?some=query#hash", "/relative/path?blubber=1#hash1", "../../path?blubber=1#hash1"); + // third parent + check("/relative/sub/foo/sub/file?some=query#hash", "/relative/path?blubber=1#hash1", "../../../path?blubber=1#hash1"); + // parent top level + check("/path/to/file?some=query#hash", "/relative/path?blubber=1#hash1", "../../relative/path?blubber=1#hash1"); + // descendant + check("/base/path/top.html", "/base/path/with/subdir/inner.html", "with/subdir/inner.html"); + // same directory + check("/path/top.html", "/path/", "./"); + // absolute / + check("http://example.org/", "http://example.org/foo/bar/bat", "foo/bar/bat"); + // absolute /foo + check("http://example.org/foo", "http://example.org/foo/bar/bat", "foo/bar/bat"); + // absolute /foo/ + check("http://example.org/foo/", "http://example.org/foo/bar/bat", "bar/bat"); + // same scheme + check("http://example.com/foo/", "http://example.org/foo/bar/bat", "//example.org/foo/bar/bat"); + // different scheme + check("https://example.org/foo/", "http://example.org/foo/bar", "http://example.org/foo/bar"); + // base with no scheme or host + check("/foo/", "http://example.org/foo/bar", "http://example.org/foo/bar"); + // base with no scheme + check("//example.org/foo/bar", "http://example.org/foo/bar", "http://example.org/foo/bar"); + // denormalized base + check("/foo/./bar/", "/foo/bar/bat", "bat"); + // denormalized url + check("/foo/bar/", "/foo//bar/bat", "..//bar/bat"); + // credentials + check("http://example.org/foo/", "http://user:pass@example.org/foo/bar", "//user:pass@example.org/foo/bar"); + // base credentials + check("http://user:pass@example.org/foo/bar", "http://example.org/foo/bar", "//example.org/foo/bar"); + // same credentials different host + check("http://user:pass@example.com/foo/bar", "http://user:pass@example.org/foo/bar", "//user:pass@example.org/foo/bar"); + // different port 1 + check("http://example.org:8080/foo/bar", "http://example.org/foo/bar", "//example.org/foo/bar"); + // different port 2 + check("http://example.org:8080/foo/bar", "http://example.org:8081/foo/bar", "//example.org:8081/foo/bar"); + // different port 3 + check("http://example.org/foo/bar", "http://example.org:8081/foo/bar", "//example.org:8081/foo/bar"); + // same path - fragment + check("http://www.example.com:8080/dir/file", "http://www.example.com:8080/dir/file#abcd", ".#abcd"); + // same path - query + check("http://www.example.com:8080/dir/file", "http://www.example.com:8080/dir/file?abcd=123", ".?abcd=123"); + // same path - query and fragment + check("http://www.example.com:8080/dir/file", "http://www.example.com:8080/dir/file?abcd=123#alpha", ".?abcd=123#alpha"); + + auto const fail = []( + string_view b, + string_view r, + error_code e) + { + auto ub = + parse_uri_reference(b).value(); + auto ur = + parse_uri_reference(r).value(); + url dest = parse_uri_reference("x/y" ).value(); + auto rv = relative(ub, ur, dest); + BOOST_TEST(rv.has_error()); + BOOST_TEST_EQ(rv.error(), e); + }; + + // already relative + fail("/foo/", "foo/bar", error::not_a_base); + + // relative base + fail("foo/", "/foo/bar", error::not_a_base); + + } + //-------------------------------------------- void @@ -1003,6 +1108,7 @@ struct url_test testFragment(); testSegments(); testResolution(); + testRelative(); testOstream(); testNormalize(); testSwap();