From 75582753609dedbc90f28e63304e5dbb58438e2f Mon Sep 17 00:00:00 2001 From: alandefreitas Date: Wed, 17 Aug 2022 04:22:26 -0300 Subject: [PATCH] relative fix #407 --- doc/qbk/quickref.xml | 1 + include/boost/url/authority_view.hpp | 177 ++++++++++++++++ include/boost/url/impl/authority_view.ipp | 59 ++++++ include/boost/url/impl/url.ipp | 247 ++++++++++++++++++++++ include/boost/url/impl/url_view_base.ipp | 64 +++--- include/boost/url/url.hpp | 68 ++++++ include/boost/url/url_view_base.hpp | 29 ++- test/unit/url.cpp | 106 ++++++++++ 8 files changed, 707 insertions(+), 44 deletions(-) diff --git a/doc/qbk/quickref.xml b/doc/qbk/quickref.xml index fdacd3d65..b4d1550dc 100644 --- a/doc/qbk/quickref.xml +++ b/doc/qbk/quickref.xml @@ -42,6 +42,7 @@ parse_relative_ref parse_uri parse_uri_reference + relative resolve diff --git a/include/boost/url/authority_view.hpp b/include/boost/url/authority_view.hpp index f0837a533..c5aa5db42 100644 --- a/include/boost/url/authority_view.hpp +++ b/include/boost/url/authority_view.hpp @@ -1013,6 +1013,183 @@ class BOOST_SYMBOL_VISIBLE string_view encoded_host_and_port() const noexcept; + //-------------------------------------------- + // + // Comparison + // + //-------------------------------------------- + + /** Return the result of comparing this with another authority + + This function compares two authorities + according to Syntax-Based comparison + algorithm. + + @par Exception Safety + Throws nothing. + + @par Specification + @li 6.2.2 Syntax-Based Normalization (rfc3986) + + @return -1 if `*this < other`, 0 if + `this == other`, and 1 if `this > other`. + */ + BOOST_URL_DECL + int + compare(authority_view const& other) const noexcept; + + /** Return the result of comparing two authorities + + The authorities are compared component + by component as if they were first + normalized. + + @par Complexity + Linear in `min( a0.size(), a1.size() )` + + @par Exception Safety + Throws nothing + */ + friend + bool + operator==( + authority_view const& a0, + authority_view const& a1) noexcept + { + return a0.compare(a1) == 0; + } + + /** Return the result of comparing two authorities + + The authorities are compared component + by component as if they were first + normalized. + + @par Effects + @code + return url( a0 ).normalize() != url( a1 ).normalize(); + @endcode + + @par Complexity + Linear in `min( a0.size(), a1.size() )` + + @par Exception Safety + Throws nothing + */ + friend + bool + operator!=( + authority_view const& a0, + authority_view const& a1) noexcept + { + return ! (a0 == a1); + } + + /** Return the result of comparing two authorities + + The authorities are compared component + by component as if they were first + normalized. + + @par Effects + @code + return url( a0 ).normalize() < url( a1 ).normalize(); + @endcode + + @par Complexity + Linear in `min( a0.size(), a1.size() )` + + @par Exception Safety + Throws nothing + */ + friend + bool + operator<( + authority_view const& a0, + authority_view const& a1) noexcept + { + return a0.compare(a1) < 0; + } + + /** Return the result of comparing two authorities + + The authorities are compared component + by component as if they were first + normalized. + + @par Effects + @code + return url( a0 ).normalize() <= url( a1 ).normalize(); + @endcode + + @par Complexity + Linear in `min( a0.size(), a1.size() )` + + @par Exception Safety + Throws nothing + */ + friend + bool + operator<=( + authority_view const& a0, + authority_view const& a1) noexcept + { + return a0.compare(a1) <= 0; + } + + /** Return the result of comparing two authorities + + The authorities are compared component + by component as if they were first + normalized. + + @par Effects + @code + return url( a0 ).normalize() > url( a1 ).normalize(); + @endcode + + @par Complexity + Linear in `min( a0.size(), a1.size() )` + + @par Exception Safety + Throws nothing + */ + friend + bool + operator>( + authority_view const& a0, + authority_view const& a1) noexcept + { + return a0.compare(a1) > 0; + } + + /** Return the result of comparing two authorities + + The authorities are compared component + by component as if they were first + normalized. + + @par Effects + @code + return url( a0 ).normalize() >= url( a1 ).normalize(); + @endcode + + @par Complexity + Linear in `min( a0.size(), a1.size() )` + + @par Exception Safety + Throws nothing + */ + friend + bool + operator>=( + authority_view const& a0, + authority_view const& a1) noexcept + { + return a0.compare(a1) >= 0; + } + // hidden friend friend std::ostream& diff --git a/include/boost/url/impl/authority_view.ipp b/include/boost/url/impl/authority_view.ipp index f698ef49f..a4c893a3a 100644 --- a/include/boost/url/impl/authority_view.ipp +++ b/include/boost/url/impl/authority_view.ipp @@ -259,6 +259,65 @@ parse_authority( return grammar::parse(s, authority_rule); } +//------------------------------------------------ +// +// Comparisons +// +//------------------------------------------------ + +int +authority_view:: +compare(const authority_view& other) const noexcept +{ + int comp = has_userinfo() - other.has_userinfo(); + if ( comp != 0 ) + return comp; + + if (has_userinfo()) + { + comp = detail::compare_encoded( + encoded_user(), + other.encoded_user()); + if ( comp != 0 ) + return comp; + + comp = has_password() - other.has_password(); + if ( comp != 0 ) + return comp; + + if (has_password()) + { + comp = detail::compare_encoded( + encoded_password(), + other.encoded_password()); + if ( comp != 0 ) + return comp; + } + } + + comp = detail::ci_compare_encoded( + encoded_host(), + other.encoded_host()); + if ( comp != 0 ) + return comp; + + comp = has_port() - other.has_port(); + if ( comp != 0 ) + return comp; + + if (has_port()) + { + comp = detail::compare( + port(), + other.port()); + if ( comp != 0 ) + return comp; + } + + return 0; +} + + } // urls } // boost diff --git a/include/boost/url/impl/url.ipp b/include/boost/url/impl/url.ipp index 76c2a7dbd..c58da7cd1 100644 --- a/include/boost/url/impl/url.ipp +++ b/include/boost/url/impl/url.ipp @@ -137,6 +137,253 @@ reserve_impl( u_.cs_ = s_; } +result +relative( + url_view_base const& base, + url_view_base const& href, + url_base& dest) +{ + // AFREITAS: + // - filesystem functions use + // (const path&, const path& base) + // - this function name is probably still bad + // - the function in URI.js behaves slightly + // differently from their own examples + // see https://medialize.github.io/URI.js/docs.html#relativeto + // - we have a bug where u.set_path("") or + // u.segments() = {} crash, so this function + // uses u.segments() = {"."}, which gives + // us slightly different results. + // - The basic exception guarantee is not + // satisfied in case of allocation errors. + + BOOST_ASSERT(&dest != &base); + BOOST_ASSERT(&dest != &href); + + // Validate input + if (!href.is_path_absolute()) + { + // href is already relative + return error::not_a_base; + } + if (!base.is_path_absolute()) + { + // cannot calculate a URI relative to another relative URI + return error::not_a_base; + } + + // Resolve scheme + if (href.scheme() == base.scheme() || + !href.has_scheme()) + dest.remove_scheme(); + else + dest.set_scheme(href.scheme()); + + // Resolve authority + if (dest.has_scheme() || + href.has_authority() != base.has_authority() || + href.authority() != base.authority() || + href.has_userinfo() || + href.has_password()) + { + // Otherwise, copy all but scheme from href + if (href.has_authority()) + dest.set_encoded_authority(href.encoded_authority()); + else + dest.remove_authority(); + dest.set_encoded_path(href.encoded_path()); + dest.normalize_path(); + if (href.has_query()) + dest.set_encoded_query(href.encoded_query()); + else + dest.remove_query(); + if (href.has_fragment()) + dest.set_encoded_fragment(href.encoded_fragment()); + else + dest.remove_fragment(); + return {}; + } + dest.remove_authority(); + + // Resolve path + // 0. Get segments + auto segs0 = base.segments(); + auto segs1 = href.segments(); + auto begin0 = segs0.begin(); + auto it0 = begin0; + auto end0 = segs0.end(); + auto last0 = begin0 != end0 ? std::prev(end0) : end0; + auto begin1 = segs1.begin(); + auto it1 = begin1; + auto end1 = segs1.end(); + auto last1 = begin0 != end1 ? std::prev(end1) : end1; + pct_encoded_view const dotdot(".."); + pct_encoded_view const dot("."); + + // 1. Find the longest common path + while ( + it0 != last0 && + it1 != last1) + { + if (*it0 == *it1) + { + ++it0; + ++it1; + } + else if (*it0 == dot) + { + ++it0; + } + else if (*it1 == dot) + { + ++it1; + } + else if (*it0 == dotdot) + { + ++it0; + if (it1 != begin1) + --it1; + } + else if (*it1 == dotdot) + { + if (it0 != begin0) + --it0; + ++it1; + } + else + { + // Check if *it0 will be consumed by a dotdot + auto it2 = std::next(it0); + std::size_t l = 1; + while (it2 != last0) + { + if (*it2 == dotdot) + { + if (--l == 0) + { + ++it2; + it0 = it2; + break; + } + } + else if (*it2 != dot) + { + ++l; + } + ++it2; + } + if (it0 == it2) + continue; + + // Check if *it1 will be consumed by a dotdot + auto it3 = std::next(it1); + l = 1; + while (it3 != last1) + { + if (*it3 == dotdot) + { + if (--l == 0) + { + ++it3; + it1 = it3; + break; + } + } + else if (*it3 != dot) + { + ++l; + } + ++it3; + } + if (it1 == it3) + continue; + + break; + } + } + + // 1.b Check if paths are the same + if (it0 == last0 && + it1 == last1 && + it0 != end0 && + it1 != end1 && + *it0 == *it1) + { + // Return empty path + dest.segments() = {dot.encoded()}; + if (href.has_query()) + dest.set_encoded_query(href.encoded_query()); + else + dest.remove_query(); + if (href.has_fragment()) + dest.set_encoded_fragment(href.encoded_fragment()); + else + dest.remove_fragment(); + return {}; + } + + // 2. replace each path component in the + // base path with ../ + segments_encoded segs = dest.encoded_segments(); + segs = {dot.encoded()}; + if (it0 != end0) + { + dest.set_path_absolute(false); + auto last0 = std::prev(end0); + while (it0 != last0) + { + if (*it0 == dotdot) + { + if (segs.size() > 1) + segs.pop_back(); + else + segs = {dot.encoded()}; + } + else if (*it0 != dot) + { + if (dest.path() == dot) + segs = {dotdot.encoded()}; + else + segs.push_back(dotdot.encoded()); + } + ++it0; + } + } + + // 3. Append the reference path + while (it1 != end1) + { + if (*it1 == dotdot) + { + if (segs.size() > 1) + segs.pop_back(); + else + segs = {dot.encoded()}; + } + else if (*it1 != dot) + { + string_view v = (*it1).encoded(); + if (dest.path() == dot) + segs = {v}; + else + segs.push_back(v); + } + ++it1; + } + if (href.has_query()) + dest.set_encoded_query(href.encoded_query()); + else + dest.remove_query(); + + if (href.has_fragment()) + dest.set_encoded_fragment(href.encoded_fragment()); + else + dest.remove_fragment(); + + return {}; +} + + } // urls } // boost diff --git a/include/boost/url/impl/url_view_base.ipp b/include/boost/url/impl/url_view_base.ipp index 4ef4519ff..a76883dac 100644 --- a/include/boost/url/impl/url_view_base.ipp +++ b/include/boost/url/impl/url_view_base.ipp @@ -548,35 +548,29 @@ int url_view_base:: compare(const url_view_base& other) const noexcept { - int comp = detail::ci_compare( - scheme(), - other.scheme()); + int comp = has_scheme() - other.has_scheme(); if ( comp != 0 ) return comp; - comp = detail::compare_encoded( - encoded_user(), - other.encoded_user()); - if ( comp != 0 ) - return comp; - - comp = detail::compare_encoded( - encoded_password(), - other.encoded_password()); - if ( comp != 0 ) - return comp; + if (has_scheme()) + { + comp = detail::ci_compare( + scheme(), + other.scheme()); + if ( comp != 0 ) + return comp; + } - comp = detail::ci_compare_encoded( - encoded_host(), - other.encoded_host()); + comp = has_authority() - other.has_authority(); if ( comp != 0 ) return comp; - comp = detail::compare( - port(), - other.port()); - if ( comp != 0 ) - return comp; + if (has_authority()) + { + comp = authority().compare(other.authority()); + if ( comp != 0 ) + return comp; + } comp = detail::normalized_path_compare( encoded_path(), @@ -586,18 +580,32 @@ compare(const url_view_base& other) const noexcept if ( comp != 0 ) return comp; - comp = detail::compare_encoded( - encoded_query(), - other.encoded_query()); + comp = has_query() - other.has_query(); if ( comp != 0 ) return comp; - comp = detail::compare_encoded( - encoded_fragment(), - other.encoded_fragment()); + if (has_query()) + { + comp = detail::compare_encoded( + encoded_query(), + other.encoded_query()); + if ( comp != 0 ) + return comp; + } + + comp = has_fragment() - other.has_fragment(); if ( comp != 0 ) return comp; + if (has_fragment()) + { + comp = detail::compare_encoded( + encoded_fragment(), + other.encoded_fragment()); + if ( comp != 0 ) + return comp; + } + return 0; } diff --git a/include/boost/url/url.hpp b/include/boost/url/url.hpp index 53145248e..64cb49bcd 100644 --- a/include/boost/url/url.hpp +++ b/include/boost/url/url.hpp @@ -322,6 +322,74 @@ resolve( return dest.resolve_impl(base, ref); } +/** Compares two absolute paths and make one relative to the other + + This function compares the absolute paths in + two urls. It returns a new url with a relative + path that references the target path relative + to the base path. + + Unlike @ref resolve, this function takes + two absolute paths to create a relative + path. + + If the input URLs contain schemes and + authorities, these are resolved. If the + schemes and authorities are the same, they + are removed before the relative path is + calculated. If they are different, only + the relative path of the reference URL + is returned. + + Given the input base URL, this function + resolves the reference URL as if performing + the following steps: + + @li Normalize both URLs + @li Remove the longest common path from both paths + @li Replace each segment in the base path with ".." + @li Append the reference path + + This function places the result of the + resolution into `dest`, which can be + any of the url containers that inherit + from @ref url_base. + + If an error occurs, the contents of + `dest` is unspecified and `ec` is set. + + @par Example + @code + url dest; + error_code ec; + relative("/relative/sub/foo/sub/file", "/relative/path", dest, ec); + assert( dest.str() == "../../../path" ); + @endcode + + @par Exception Safety + Basic guarantee. + Calls to allocate may throw. + + @param base The base URL to resolve against. + + @param href The target URL the relative URL should point to + + @param dest The container where the relative result + is written, upon success. + + @param ec Set to the error if any occurred. + + @see + @ref url, + @ref url_view. +*/ +BOOST_URL_DECL +result +relative( + url_view_base const& base, + url_view_base const& href, + url_base& dest); + } // urls } // boost diff --git a/include/boost/url/url_view_base.hpp b/include/boost/url/url_view_base.hpp index aeedb3fbc..225920c6c 100644 --- a/include/boost/url/url_view_base.hpp +++ b/include/boost/url/url_view_base.hpp @@ -1436,10 +1436,7 @@ class BOOST_SYMBOL_VISIBLE { pct_decode_opts opt; opt.plus_to_space = false; - string_view s = encoded_path(); - return - detail::access::construct( - s, u_.decoded_[id_path], opt); + return pct_encoded_view(encoded_path()); } /** Return the path segments @@ -1876,8 +1873,8 @@ class BOOST_SYMBOL_VISIBLE /** Return the result of comparing two URLs - The URLs are compared character - by character as if they were first + The URLs are compared component by + component as if they were first normalized. @par Effects @@ -1902,8 +1899,8 @@ class BOOST_SYMBOL_VISIBLE /** Return the result of comparing two URLs - The URLs are compared character - by character as if they were first + The URLs are compared component by + component as if they were first normalized. @par Effects @@ -1928,8 +1925,8 @@ class BOOST_SYMBOL_VISIBLE /** Return the result of comparing two URLs - The URLs are compared character - by character as if they were first + The URLs are compared component by + component as if they were first normalized. @par Effects @@ -1954,8 +1951,8 @@ class BOOST_SYMBOL_VISIBLE /** Return the result of comparing two URLs - The URLs are compared character - by character as if they were first + The URLs are compared component by + component as if they were first normalized. @par Effects @@ -1980,8 +1977,8 @@ class BOOST_SYMBOL_VISIBLE /** Return the result of comparing two URLs - The URLs are compared character - by character as if they were first + The URLs are compared component by + component as if they were first normalized. @par Effects @@ -2006,8 +2003,8 @@ class BOOST_SYMBOL_VISIBLE /** Return the result of comparing two URLs - The URLs are compared character - by character as if they were first + The URLs are compared component by + component as if they were first normalized. @par Effects diff --git a/test/unit/url.cpp b/test/unit/url.cpp index 612813a29..e659dd77f 100644 --- a/test/unit/url.cpp +++ b/test/unit/url.cpp @@ -2062,6 +2062,111 @@ class url_test check("g#s/../x" , "http://a/b/c/g#s/../x"); } + void + testRelative() + { + auto const check = []( + string_view b, + string_view r, + string_view e) + { + auto ub = + parse_uri_reference(b).value(); + auto ur = + parse_uri_reference(r).value(); + url dest = parse_uri_reference("x/y" ).value(); + auto rv = relative(ub, ur, dest); + BOOST_TEST(!rv.has_error()); + BOOST_TEST_EQ(dest.string(), e); + }; + + // relative URL / absolute paths + check("/a/path/to/somewhere/else", "/a/path/to/a", "../a"); + check("/relative/sub/foo/sub/file", "/relative/path", "../../../path"); + check("http://google.com/baz", "http://example.org/world.html", "//example.org/world.html"); + check("http://google.com/baz", "http:/world.html", "/world.html"); + // AFREITAS: The paths below should be "" but set_path("") and segments() = {} crash + // "." is still equivalent + check("http://www.example.com:8080/dir/file", "http://www.example.com:8080/dir/file", "."); + check("http://www.example.com:8080/dir/file", "http://www.example.com:8080/dir/file?foo=bar#abcd", ".?foo=bar#abcd"); + // "." should be ignored + check("/a/path/././to/./somewhere/else", "/a/./path/./to/a", "../a"); + // ".." should be normalized + check("/a/path/x/../to/y/../somewhere/else", "/b/../a/path/to/a", "../a"); + // same parent + check("/relative/file?some=query#hash", "/relative/path?blubber=1#hash1", "path?blubber=1#hash1"); + // direct parent + check("/relative/sub/file?some=query#hash", "/relative/path?blubber=1#hash1", "../path?blubber=1#hash1"); + // second parent + check("/relative/sub/sub/file?some=query#hash", "/relative/path?blubber=1#hash1", "../../path?blubber=1#hash1"); + // third parent + check("/relative/sub/foo/sub/file?some=query#hash", "/relative/path?blubber=1#hash1", "../../../path?blubber=1#hash1"); + // parent top level + check("/path/to/file?some=query#hash", "/relative/path?blubber=1#hash1", "../../relative/path?blubber=1#hash1"); + // descendant + check("/base/path/top.html", "/base/path/with/subdir/inner.html", "with/subdir/inner.html"); + // same directory + check("/path/top.html", "/path/", "./"); + // absolute / + check("http://example.org/", "http://example.org/foo/bar/bat", "foo/bar/bat"); + // absolute /foo + check("http://example.org/foo", "http://example.org/foo/bar/bat", "foo/bar/bat"); + // absolute /foo/ + check("http://example.org/foo/", "http://example.org/foo/bar/bat", "bar/bat"); + // same scheme + check("http://example.com/foo/", "http://example.org/foo/bar/bat", "//example.org/foo/bar/bat"); + // different scheme + check("https://example.org/foo/", "http://example.org/foo/bar", "http://example.org/foo/bar"); + // base with no scheme or host + check("/foo/", "http://example.org/foo/bar", "http://example.org/foo/bar"); + // base with no scheme + check("//example.org/foo/bar", "http://example.org/foo/bar", "http://example.org/foo/bar"); + // denormalized base + check("/foo/./bar/", "/foo/bar/bat", "bat"); + // denormalized url + check("/foo/bar/", "/foo//bar/bat", "..//bar/bat"); + // credentials + check("http://example.org/foo/", "http://user:pass@example.org/foo/bar", "//user:pass@example.org/foo/bar"); + // base credentials + check("http://user:pass@example.org/foo/bar", "http://example.org/foo/bar", "//example.org/foo/bar"); + // same credentials different host + check("http://user:pass@example.com/foo/bar", "http://user:pass@example.org/foo/bar", "//user:pass@example.org/foo/bar"); + // different port 1 + check("http://example.org:8080/foo/bar", "http://example.org/foo/bar", "//example.org/foo/bar"); + // different port 2 + check("http://example.org:8080/foo/bar", "http://example.org:8081/foo/bar", "//example.org:8081/foo/bar"); + // different port 3 + check("http://example.org/foo/bar", "http://example.org:8081/foo/bar", "//example.org:8081/foo/bar"); + // same path - fragment + check("http://www.example.com:8080/dir/file", "http://www.example.com:8080/dir/file#abcd", ".#abcd"); + // same path - query + check("http://www.example.com:8080/dir/file", "http://www.example.com:8080/dir/file?abcd=123", ".?abcd=123"); + // same path - query and fragment + check("http://www.example.com:8080/dir/file", "http://www.example.com:8080/dir/file?abcd=123#alpha", ".?abcd=123#alpha"); + + auto const fail = []( + string_view b, + string_view r, + error_code e) + { + auto ub = + parse_uri_reference(b).value(); + auto ur = + parse_uri_reference(r).value(); + url dest = parse_uri_reference("x/y" ).value(); + auto rv = relative(ub, ur, dest); + BOOST_TEST(rv.has_error()); + BOOST_TEST_EQ(rv.error(), e); + }; + + // already relative + fail("/foo/", "foo/bar", error::not_a_base); + + // relative base + fail("foo/", "/foo/bar", error::not_a_base); + + } + //-------------------------------------------- void @@ -2251,6 +2356,7 @@ class url_test testFragment(); testSegments(); testResolution(); + testRelative(); testOstream(); testNormalize(); }