From 9bb54ec5081a73022ba14b9559ccfaa65b984a13 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Wed, 18 Sep 2024 20:05:05 -0400 Subject: [PATCH] more constexpr (#738) * convert more functions to constexpr * moving functions around * tweak * lint * simplify --------- Co-authored-by: Yagiz Nizipli --- .github/workflows/aarch64.yml | 7 +- include/ada.h | 1 + include/ada/helpers.h | 6 +- include/ada/url-inl.h | 18 ++- include/ada/url.h | 26 ++-- include/ada/url_aggregator-inl.h | 224 ++++++++++++++++++++++++++++--- include/ada/url_aggregator.h | 41 +++--- include/ada/url_base-inl.h | 3 +- include/ada/url_base.h | 2 +- include/ada/url_components-inl.h | 89 ++++++++++++ include/ada/url_components.h | 3 +- src/url-getters.cpp | 4 - src/url_aggregator.cpp | 186 +------------------------ src/url_components.cpp | 75 ----------- 14 files changed, 349 insertions(+), 336 deletions(-) create mode 100644 include/ada/url_components-inl.h diff --git a/.github/workflows/aarch64.yml b/.github/workflows/aarch64.yml index 1d544bf76..a2751aed6 100644 --- a/.github/workflows/aarch64.yml +++ b/.github/workflows/aarch64.yml @@ -23,16 +23,11 @@ concurrency: jobs: build: runs-on: ubuntu-latest - strategy: - matrix: - shared: [OFF] steps: - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 - uses: uraimo/run-on-arch-action@b0ffb25eb00af00468375982384441f063da1741 # v2.7.2 name: Build and Test id: runcmd - env: - CXX: g++-12 with: arch: aarch64 githubToken: ${{ github.token }} @@ -41,6 +36,6 @@ jobs: apt-get update -q -y apt-get install -y cmake make g++ ninja-build git run: | - cmake -DADA_SANITIZE_BOUNDS_STRICT=ON -DBUILD_SHARED_LIBS=${{matrix.shared}} -B build + cmake -B build cmake --build build ctest --test-dir build diff --git a/include/ada.h b/include/ada.h index 667c7fcaf..c5d0946ec 100644 --- a/include/ada.h +++ b/include/ada.h @@ -21,6 +21,7 @@ #include "ada/url_base-inl.h" #include "ada/url-inl.h" #include "ada/url_components.h" +#include "ada/url_components-inl.h" #include "ada/url_aggregator.h" #include "ada/url_aggregator-inl.h" #include "ada/url_search_params.h" diff --git a/include/ada/helpers.h b/include/ada/helpers.h index a47890b2c..91077e940 100644 --- a/include/ada/helpers.h +++ b/include/ada/helpers.h @@ -102,9 +102,9 @@ bool overlaps(std::string_view input1, const std::string& input2) noexcept; * Return the substring from input going from index pos1 to the pos2 (non * included). The length of the substring is pos2 - pos1. */ -ada_really_inline std::string_view substring(const std::string& input, - size_t pos1, - size_t pos2) noexcept { +ada_really_inline constexpr std::string_view substring(const std::string& input, + size_t pos1, + size_t pos2) noexcept { #if ADA_DEVELOPMENT_CHECKS if (pos2 < pos1) { std::cerr << "Negative-length substring: [" << pos1 << " to " << pos2 << ")" diff --git a/include/ada/url-inl.h b/include/ada/url-inl.h index 6b7a4cd10..863e6c5e7 100644 --- a/include/ada/url-inl.h +++ b/include/ada/url-inl.h @@ -43,6 +43,10 @@ inline std::ostream &operator<<(std::ostream &out, const ada::url &u) { return path.size(); } +[[nodiscard]] constexpr std::string_view url::get_pathname() const noexcept { + return path; +} + [[nodiscard]] ada_really_inline ada::url_components url::get_components() const noexcept { url_components out{}; @@ -148,19 +152,19 @@ inline void url::update_base_port(std::optional input) { port = input; } -inline void url::clear_pathname() { path.clear(); } +constexpr void url::clear_pathname() { path.clear(); } -inline void url::clear_search() { query = std::nullopt; } +constexpr void url::clear_search() { query = std::nullopt; } -[[nodiscard]] inline bool url::has_hash() const noexcept { +[[nodiscard]] constexpr bool url::has_hash() const noexcept { return hash.has_value(); } -[[nodiscard]] inline bool url::has_search() const noexcept { +[[nodiscard]] constexpr bool url::has_search() const noexcept { return query.has_value(); } -inline void url::set_protocol_as_file() { type = ada::scheme::type::FILE; } +constexpr void url::set_protocol_as_file() { type = ada::scheme::type::FILE; } inline void url::set_scheme(std::string &&new_scheme) noexcept { type = ada::scheme::get_scheme_type(new_scheme); @@ -170,12 +174,12 @@ inline void url::set_scheme(std::string &&new_scheme) noexcept { } } -inline void url::copy_scheme(ada::url &&u) noexcept { +constexpr void url::copy_scheme(ada::url &&u) noexcept { non_special_scheme = u.non_special_scheme; type = u.type; } -inline void url::copy_scheme(const ada::url &u) { +constexpr void url::copy_scheme(const ada::url &u) { non_special_scheme = u.non_special_scheme; type = u.type; } diff --git a/include/ada/url.h b/include/ada/url.h index 1418d9bb7..63740e6e3 100644 --- a/include/ada/url.h +++ b/include/ada/url.h @@ -149,7 +149,7 @@ struct url : url_base { * @return a newly allocated string. * @see https://url.spec.whatwg.org/#dom-url-pathname */ - [[nodiscard]] std::string_view get_pathname() const noexcept; + [[nodiscard]] constexpr std::string_view get_pathname() const noexcept; /** * Compute the pathname length in bytes without instantiating a view or a @@ -283,9 +283,9 @@ struct url : url_base { [[nodiscard]] ada_really_inline ada::url_components get_components() const noexcept; /** @return true if the URL has a hash component */ - [[nodiscard]] inline bool has_hash() const noexcept override; + [[nodiscard]] constexpr bool has_hash() const noexcept override; /** @return true if the URL has a search component */ - [[nodiscard]] inline bool has_search() const noexcept override; + [[nodiscard]] constexpr bool has_search() const noexcept override; private: friend ada::url ada::parser::parse_url(std::string_view, @@ -361,12 +361,6 @@ struct url : url_base { return this->parse_port(view, false); } - /** - * Take the scheme from another URL. The scheme string is copied from the - * provided url. - */ - inline void copy_scheme(const ada::url &u); - /** * Parse the host from the provided input. We assume that * the input does not contain spaces or tabs. Control @@ -380,9 +374,9 @@ struct url : url_base { template [[nodiscard]] ada_really_inline bool parse_scheme(std::string_view input); - inline void clear_pathname() override; - inline void clear_search() override; - inline void set_protocol_as_file(); + constexpr void clear_pathname() override; + constexpr void clear_search() override; + constexpr void set_protocol_as_file(); /** * Parse the path from the provided input. @@ -407,7 +401,13 @@ struct url : url_base { * Take the scheme from another URL. The scheme string is moved from the * provided url. */ - inline void copy_scheme(ada::url &&u) noexcept; + constexpr void copy_scheme(ada::url &&u) noexcept; + + /** + * Take the scheme from another URL. The scheme string is copied from the + * provided url. + */ + constexpr void copy_scheme(const ada::url &u); }; // struct url diff --git a/include/ada/url_aggregator-inl.h b/include/ada/url_aggregator-inl.h index fb7c46380..483214a75 100644 --- a/include/ada/url_aggregator-inl.h +++ b/include/ada/url_aggregator-inl.h @@ -412,7 +412,7 @@ inline void url_aggregator::append_base_username(const std::string_view input) { ADA_ASSERT_TRUE(validate()); } -inline void url_aggregator::clear_password() { +constexpr void url_aggregator::clear_password() { ada_log("url_aggregator::clear_password ", to_string(), "\n", to_diagram()); ADA_ASSERT_TRUE(validate()); if (!has_password()) { @@ -634,7 +634,7 @@ inline void url_aggregator::clear_hash() { ADA_ASSERT_TRUE(validate()); } -inline void url_aggregator::clear_pathname() { +constexpr void url_aggregator::clear_pathname() { ada_log("url_aggregator::clear_pathname"); ADA_ASSERT_TRUE(validate()); uint32_t ending_index = uint32_t(buffer.size()); @@ -669,7 +669,7 @@ inline void url_aggregator::clear_pathname() { ada_log("url_aggregator::clear_pathname completed, running checks... ok"); } -inline void url_aggregator::clear_hostname() { +constexpr void url_aggregator::clear_hostname() { ada_log("url_aggregator::clear_hostname"); ADA_ASSERT_TRUE(validate()); if (!has_authority()) { @@ -706,22 +706,22 @@ inline void url_aggregator::clear_hostname() { ADA_ASSERT_TRUE(validate()); } -[[nodiscard]] inline bool url_aggregator::has_hash() const noexcept { +[[nodiscard]] constexpr bool url_aggregator::has_hash() const noexcept { ada_log("url_aggregator::has_hash"); return components.hash_start != url_components::omitted; } -[[nodiscard]] inline bool url_aggregator::has_search() const noexcept { +[[nodiscard]] constexpr bool url_aggregator::has_search() const noexcept { ada_log("url_aggregator::has_search"); return components.search_start != url_components::omitted; } -ada_really_inline bool url_aggregator::has_credentials() const noexcept { +constexpr bool url_aggregator::has_credentials() const noexcept { ada_log("url_aggregator::has_credentials"); return has_non_empty_username() || has_non_empty_password(); } -inline bool url_aggregator::cannot_have_credentials_or_port() const { +constexpr bool url_aggregator::cannot_have_credentials_or_port() const { ada_log("url_aggregator::cannot_have_credentials_or_port"); return type == ada::scheme::type::FILE || components.host_start == components.host_end; @@ -732,7 +732,8 @@ url_aggregator::get_components() const noexcept { return components; } -[[nodiscard]] inline bool ada::url_aggregator::has_authority() const noexcept { +[[nodiscard]] constexpr bool ada::url_aggregator::has_authority() + const noexcept { ada_log("url_aggregator::has_authority"); // Performance: instead of doing this potentially expensive check, we could // have a boolean in the struct. @@ -767,28 +768,28 @@ inline void ada::url_aggregator::add_authority_slashes_if_needed() noexcept { ADA_ASSERT_TRUE(validate()); } -inline void ada::url_aggregator::reserve(uint32_t capacity) { +constexpr void ada::url_aggregator::reserve(uint32_t capacity) { buffer.reserve(capacity); } -inline bool url_aggregator::has_non_empty_username() const noexcept { +constexpr bool url_aggregator::has_non_empty_username() const noexcept { ada_log("url_aggregator::has_non_empty_username"); return components.protocol_end + 2 < components.username_end; } -inline bool url_aggregator::has_non_empty_password() const noexcept { +constexpr bool url_aggregator::has_non_empty_password() const noexcept { ada_log("url_aggregator::has_non_empty_password"); return components.host_start - components.username_end > 0; } -inline bool url_aggregator::has_password() const noexcept { +constexpr bool url_aggregator::has_password() const noexcept { ada_log("url_aggregator::has_password"); // This function does not care about the length of the password return components.host_start > components.username_end && buffer[components.username_end] == ':'; } -inline bool url_aggregator::has_empty_hostname() const noexcept { +constexpr bool url_aggregator::has_empty_hostname() const noexcept { if (!has_hostname()) { return false; } @@ -801,18 +802,18 @@ inline bool url_aggregator::has_empty_hostname() const noexcept { return components.username_end != components.host_start; } -inline bool url_aggregator::has_hostname() const noexcept { +constexpr bool url_aggregator::has_hostname() const noexcept { return has_authority(); } -inline bool url_aggregator::has_port() const noexcept { +constexpr bool url_aggregator::has_port() const noexcept { ada_log("url_aggregator::has_port"); // A URL cannot have a username/password/port if its host is null or the empty // string, or its scheme is "file". return has_hostname() && components.pathname_start != components.host_end; } -[[nodiscard]] inline bool url_aggregator::has_dash_dot() const noexcept { +[[nodiscard]] constexpr bool url_aggregator::has_dash_dot() const noexcept { // If url's host is null, url does not have an opaque path, url's path's size // is greater than 1, and url's path[0] is the empty string, then append // U+002F (/) followed by U+002E (.) to output. @@ -844,8 +845,8 @@ inline bool url_aggregator::has_port() const noexcept { buffer[components.host_end + 1] == '.'; } -[[nodiscard]] inline std::string_view url_aggregator::get_href() const noexcept - ada_lifetime_bound { +[[nodiscard]] constexpr std::string_view url_aggregator::get_href() + const noexcept ada_lifetime_bound { ada_log("url_aggregator::get_href"); return buffer; } @@ -889,7 +890,7 @@ ada_really_inline size_t url_aggregator::parse_port( return consumed; } -inline void url_aggregator::set_protocol_as_file() { +constexpr void url_aggregator::set_protocol_as_file() { ada_log("url_aggregator::set_protocol_as_file "); ADA_ASSERT_TRUE(validate()); type = ada::scheme::type::FILE; @@ -919,6 +920,191 @@ inline void url_aggregator::set_protocol_as_file() { ADA_ASSERT_TRUE(validate()); } +[[nodiscard]] constexpr bool url_aggregator::validate() const noexcept { + if (!is_valid) { + return true; + } + if (!components.check_offset_consistency()) { + ada_log("url_aggregator::validate inconsistent components \n", + to_diagram()); + return false; + } + // We have a credible components struct, but let us investivate more + // carefully: + /** + * https://user:pass@example.com:1234/foo/bar?baz#quux + * | | | | ^^^^| | | + * | | | | | | | `----- hash_start + * | | | | | | `--------- search_start + * | | | | | `----------------- pathname_start + * | | | | `--------------------- port + * | | | `----------------------- host_end + * | | `---------------------------------- host_start + * | `--------------------------------------- username_end + * `--------------------------------------------- protocol_end + */ + if (components.protocol_end == url_components::omitted) { + ada_log("url_aggregator::validate omitted protocol_end \n", to_diagram()); + return false; + } + if (components.username_end == url_components::omitted) { + ada_log("url_aggregator::validate omitted username_end \n", to_diagram()); + return false; + } + if (components.host_start == url_components::omitted) { + ada_log("url_aggregator::validate omitted host_start \n", to_diagram()); + return false; + } + if (components.host_end == url_components::omitted) { + ada_log("url_aggregator::validate omitted host_end \n", to_diagram()); + return false; + } + if (components.pathname_start == url_components::omitted) { + ada_log("url_aggregator::validate omitted pathname_start \n", to_diagram()); + return false; + } + + if (components.protocol_end > buffer.size()) { + ada_log("url_aggregator::validate protocol_end overflow \n", to_diagram()); + return false; + } + if (components.username_end > buffer.size()) { + ada_log("url_aggregator::validate username_end overflow \n", to_diagram()); + return false; + } + if (components.host_start > buffer.size()) { + ada_log("url_aggregator::validate host_start overflow \n", to_diagram()); + return false; + } + if (components.host_end > buffer.size()) { + ada_log("url_aggregator::validate host_end overflow \n", to_diagram()); + return false; + } + if (components.pathname_start > buffer.size()) { + ada_log("url_aggregator::validate pathname_start overflow \n", + to_diagram()); + return false; + } + + if (components.protocol_end > 0) { + if (buffer[components.protocol_end - 1] != ':') { + ada_log( + "url_aggregator::validate missing : at the end of the protocol \n", + to_diagram()); + return false; + } + } + + if (components.username_end != buffer.size() && + components.username_end > components.protocol_end + 2) { + if (buffer[components.username_end] != ':' && + buffer[components.username_end] != '@') { + ada_log( + "url_aggregator::validate missing : or @ at the end of the username " + "\n", + to_diagram()); + return false; + } + } + + if (components.host_start != buffer.size()) { + if (components.host_start > components.username_end) { + if (buffer[components.host_start] != '@') { + ada_log( + "url_aggregator::validate missing @ at the end of the password \n", + to_diagram()); + return false; + } + } else if (components.host_start == components.username_end && + components.host_end > components.host_start) { + if (components.host_start == components.protocol_end + 2) { + if (buffer[components.protocol_end] != '/' || + buffer[components.protocol_end + 1] != '/') { + ada_log( + "url_aggregator::validate missing // between protocol and host " + "\n", + to_diagram()); + return false; + } + } else { + if (components.host_start > components.protocol_end && + buffer[components.host_start] != '@') { + ada_log( + "url_aggregator::validate missing @ at the end of the username " + "\n", + to_diagram()); + return false; + } + } + } else { + if (components.host_end != components.host_start) { + ada_log("url_aggregator::validate expected omitted host \n", + to_diagram()); + return false; + } + } + } + if (components.host_end != buffer.size() && + components.pathname_start > components.host_end) { + if (components.pathname_start == components.host_end + 2 && + buffer[components.host_end] == '/' && + buffer[components.host_end + 1] == '.') { + if (components.pathname_start + 1 >= buffer.size() || + buffer[components.pathname_start] != '/' || + buffer[components.pathname_start + 1] != '/') { + ada_log( + "url_aggregator::validate expected the path to begin with // \n", + to_diagram()); + return false; + } + } else if (buffer[components.host_end] != ':') { + ada_log("url_aggregator::validate missing : at the port \n", + to_diagram()); + return false; + } + } + if (components.pathname_start != buffer.size() && + components.pathname_start < components.search_start && + components.pathname_start < components.hash_start && !has_opaque_path) { + if (buffer[components.pathname_start] != '/') { + ada_log("url_aggregator::validate missing / at the path \n", + to_diagram()); + return false; + } + } + if (components.search_start != url_components::omitted) { + if (buffer[components.search_start] != '?') { + ada_log("url_aggregator::validate missing ? at the search \n", + to_diagram()); + return false; + } + } + if (components.hash_start != url_components::omitted) { + if (buffer[components.hash_start] != '#') { + ada_log("url_aggregator::validate missing # at the hash \n", + to_diagram()); + return false; + } + } + + return true; +} + +[[nodiscard]] constexpr std::string_view url_aggregator::get_pathname() + const noexcept ada_lifetime_bound { + ada_log("url_aggregator::get_pathname pathname_start = ", + components.pathname_start, " buffer.size() = ", buffer.size(), + " components.search_start = ", components.search_start, + " components.hash_start = ", components.hash_start); + auto ending_index = uint32_t(buffer.size()); + if (components.search_start != url_components::omitted) { + ending_index = components.search_start; + } else if (components.hash_start != url_components::omitted) { + ending_index = components.hash_start; + } + return helpers::substring(buffer, components.pathname_start, ending_index); +} + inline std::ostream &operator<<(std::ostream &out, const ada::url_aggregator &u) { return out << u.to_string(); diff --git a/include/ada/url_aggregator.h b/include/ada/url_aggregator.h index 0a81a1094..8c71a7395 100644 --- a/include/ada/url_aggregator.h +++ b/include/ada/url_aggregator.h @@ -57,7 +57,7 @@ struct url_aggregator : url_base { * @see https://url.spec.whatwg.org/#dom-url-href * @see https://url.spec.whatwg.org/#concept-url-serializer */ - [[nodiscard]] inline std::string_view get_href() const noexcept + [[nodiscard]] constexpr std::string_view get_href() const noexcept ada_lifetime_bound; /** * The username getter steps are to return this's URL's username. @@ -114,7 +114,7 @@ struct url_aggregator : url_base { * @return a lightweight std::string_view. * @see https://url.spec.whatwg.org/#dom-url-pathname */ - [[nodiscard]] std::string_view get_pathname() const noexcept + [[nodiscard]] constexpr std::string_view get_pathname() const noexcept ada_lifetime_bound; /** * Compute the pathname length in bytes without instantiating a view or a @@ -144,7 +144,8 @@ struct url_aggregator : url_base { * A URL includes credentials if its username or password is not the empty * string. */ - [[nodiscard]] ada_really_inline bool has_credentials() const noexcept; + [[nodiscard]] ada_really_inline constexpr bool has_credentials() + const noexcept; /** * Useful for implementing efficient serialization for the URL. @@ -183,24 +184,24 @@ struct url_aggregator : url_base { * @return true if the URL is valid, otherwise return true of the offsets are * possible. */ - [[nodiscard]] bool validate() const noexcept; + [[nodiscard]] constexpr bool validate() const noexcept; /** @return true if it has an host but it is the empty string */ - [[nodiscard]] inline bool has_empty_hostname() const noexcept; + [[nodiscard]] constexpr bool has_empty_hostname() const noexcept; /** @return true if it has a host (included an empty host) */ - [[nodiscard]] inline bool has_hostname() const noexcept; + [[nodiscard]] constexpr bool has_hostname() const noexcept; /** @return true if the URL has a non-empty username */ - [[nodiscard]] inline bool has_non_empty_username() const noexcept; + [[nodiscard]] constexpr bool has_non_empty_username() const noexcept; /** @return true if the URL has a non-empty password */ - [[nodiscard]] inline bool has_non_empty_password() const noexcept; + [[nodiscard]] constexpr bool has_non_empty_password() const noexcept; /** @return true if the URL has a (non default) port */ - [[nodiscard]] inline bool has_port() const noexcept; + [[nodiscard]] constexpr bool has_port() const noexcept; /** @return true if the URL has a password */ - [[nodiscard]] inline bool has_password() const noexcept; + [[nodiscard]] constexpr bool has_password() const noexcept; /** @return true if the URL has a hash component */ - [[nodiscard]] inline bool has_hash() const noexcept override; + [[nodiscard]] constexpr bool has_hash() const noexcept override; /** @return true if the URL has a search component */ - [[nodiscard]] inline bool has_search() const noexcept override; + [[nodiscard]] constexpr bool has_search() const noexcept override; inline void clear_port(); inline void clear_hash(); @@ -233,7 +234,7 @@ struct url_aggregator : url_base { * To optimize performance, you may indicate how much memory to allocate * within this instance. */ - inline void reserve(uint32_t capacity); + constexpr void reserve(uint32_t capacity); ada_really_inline size_t parse_port( std::string_view view, bool check_trailing_content) noexcept override; @@ -268,7 +269,7 @@ struct url_aggregator : url_base { * A URL cannot have a username/password/port if its host is null or the empty * string, or its scheme is "file". */ - [[nodiscard]] inline bool cannot_have_credentials_or_port() const; + [[nodiscard]] constexpr bool cannot_have_credentials_or_port() const; template bool set_host_or_hostname(std::string_view input); @@ -290,10 +291,10 @@ struct url_aggregator : url_base { inline void update_base_port(uint32_t input); inline void append_base_pathname(std::string_view input); [[nodiscard]] inline uint32_t retrieve_base_port() const; - inline void clear_hostname(); - inline void clear_password(); - inline void clear_pathname() override; - [[nodiscard]] inline bool has_dash_dot() const noexcept; + constexpr void clear_hostname(); + constexpr void clear_password(); + constexpr void clear_pathname() override; + [[nodiscard]] constexpr bool has_dash_dot() const noexcept; void delete_dash_dot(); inline void consume_prepared_path(std::string_view input); template @@ -301,8 +302,8 @@ struct url_aggregator : url_base { std::string_view input); ada_really_inline uint32_t replace_and_resize(uint32_t start, uint32_t end, std::string_view input); - [[nodiscard]] inline bool has_authority() const noexcept; - inline void set_protocol_as_file(); + [[nodiscard]] constexpr bool has_authority() const noexcept; + constexpr void set_protocol_as_file(); inline void set_scheme(std::string_view new_scheme) noexcept; /** * Fast function to set the scheme from a view with a colon in the diff --git a/include/ada/url_base-inl.h b/include/ada/url_base-inl.h index 26ba1087c..948205eb2 100644 --- a/include/ada/url_base-inl.h +++ b/include/ada/url_base-inl.h @@ -21,7 +21,8 @@ namespace ada { -[[nodiscard]] ada_really_inline bool url_base::is_special() const noexcept { +[[nodiscard]] ada_really_inline constexpr bool url_base::is_special() + const noexcept { return type != ada::scheme::NOT_SPECIAL; } diff --git a/include/ada/url_base.h b/include/ada/url_base.h index de4068d3d..9d2461d7a 100644 --- a/include/ada/url_base.h +++ b/include/ada/url_base.h @@ -68,7 +68,7 @@ struct url_base { * A URL is special if its scheme is a special scheme. A URL is not special if * its scheme is not a special scheme. */ - [[nodiscard]] ada_really_inline bool is_special() const noexcept; + [[nodiscard]] ada_really_inline constexpr bool is_special() const noexcept; /** * The origin getter steps are to return the serialization of this's URL's diff --git a/include/ada/url_components-inl.h b/include/ada/url_components-inl.h new file mode 100644 index 000000000..77ab8874a --- /dev/null +++ b/include/ada/url_components-inl.h @@ -0,0 +1,89 @@ +/** + * @file url_components.h + * @brief Declaration for the URL Components + */ +#ifndef ADA_URL_COMPONENTS_INL_H +#define ADA_URL_COMPONENTS_INL_H + +#include "ada/url_components.h" + +namespace ada { + +[[nodiscard]] constexpr bool url_components::check_offset_consistency() + const noexcept { + /** + * https://user:pass@example.com:1234/foo/bar?baz#quux + * | | | | ^^^^| | | + * | | | | | | | `----- hash_start + * | | | | | | `--------- search_start + * | | | | | `----------------- pathname_start + * | | | | `--------------------- port + * | | | `----------------------- host_end + * | | `---------------------------------- host_start + * | `--------------------------------------- username_end + * `--------------------------------------------- protocol_end + */ + // These conditions can be made more strict. + uint32_t index = 0; + + if (protocol_end == url_components::omitted) { + return false; + } + if (protocol_end < index) { + return false; + } + index = protocol_end; + + if (username_end == url_components::omitted) { + return false; + } + if (username_end < index) { + return false; + } + index = username_end; + + if (host_start == url_components::omitted) { + return false; + } + if (host_start < index) { + return false; + } + index = host_start; + + if (port != url_components::omitted) { + if (port > 0xffff) { + return false; + } + uint32_t port_length = helpers::fast_digit_count(port) + 1; + if (index + port_length < index) { + return false; + } + index += port_length; + } + + if (pathname_start == url_components::omitted) { + return false; + } + if (pathname_start < index) { + return false; + } + index = pathname_start; + + if (search_start != url_components::omitted) { + if (search_start < index) { + return false; + } + index = search_start; + } + + if (hash_start != url_components::omitted) { + if (hash_start < index) { + return false; + } + } + + return true; +} + +} // namespace ada +#endif diff --git a/include/ada/url_components.h b/include/ada/url_components.h index 1596c5c13..a72767bb0 100644 --- a/include/ada/url_components.h +++ b/include/ada/url_components.h @@ -67,7 +67,7 @@ struct url_components { * @return true if the offset values are * consistent with a possible URL string */ - [[nodiscard]] bool check_offset_consistency() const noexcept; + [[nodiscard]] constexpr bool check_offset_consistency() const noexcept; /** * Converts a url_components to JSON stringified version. @@ -75,6 +75,5 @@ struct url_components { [[nodiscard]] std::string to_string() const; }; // struct url_components - } // namespace ada #endif diff --git a/src/url-getters.cpp b/src/url-getters.cpp index 227863a71..54a7b2a34 100644 --- a/src/url-getters.cpp +++ b/src/url-getters.cpp @@ -62,10 +62,6 @@ namespace ada { return host.value_or(""); } -[[nodiscard]] std::string_view url::get_pathname() const noexcept { - return path; -} - [[nodiscard]] std::string url::get_search() const noexcept { // If this's URL's query is either null or the empty string, then return the // empty string. Return U+003F (?), followed by this's URL's query. diff --git a/src/url_aggregator.cpp b/src/url_aggregator.cpp index 8e4d07aa0..48ed79b8e 100644 --- a/src/url_aggregator.cpp +++ b/src/url_aggregator.cpp @@ -5,6 +5,7 @@ #include "ada/scheme.h" #include "ada/unicode-inl.h" #include "ada/url_components.h" +#include "ada/url_components-inl.h" #include "ada/url_aggregator.h" #include "ada/url_aggregator-inl.h" @@ -723,21 +724,6 @@ bool url_aggregator::set_hostname(const std::string_view input) { return helpers::substring(buffer, start, components.host_end); } -[[nodiscard]] std::string_view url_aggregator::get_pathname() const noexcept - ada_lifetime_bound { - ada_log("url_aggregator::get_pathname pathname_start = ", - components.pathname_start, " buffer.size() = ", buffer.size(), - " components.search_start = ", components.search_start, - " components.hash_start = ", components.hash_start); - auto ending_index = uint32_t(buffer.size()); - if (components.search_start != url_components::omitted) { - ending_index = components.search_start; - } else if (components.hash_start != url_components::omitted) { - ending_index = components.hash_start; - } - return helpers::substring(buffer, components.pathname_start, ending_index); -} - [[nodiscard]] std::string_view url_aggregator::get_search() const noexcept ada_lifetime_bound { ada_log("url_aggregator::get_search"); @@ -1377,176 +1363,6 @@ bool url_aggregator::parse_opaque_host(std::string_view input) { return answer; } -[[nodiscard]] bool url_aggregator::validate() const noexcept { - if (!is_valid) { - return true; - } - if (!components.check_offset_consistency()) { - ada_log("url_aggregator::validate inconsistent components \n", - to_diagram()); - return false; - } - // We have a credible components struct, but let us investivate more - // carefully: - /** - * https://user:pass@example.com:1234/foo/bar?baz#quux - * | | | | ^^^^| | | - * | | | | | | | `----- hash_start - * | | | | | | `--------- search_start - * | | | | | `----------------- pathname_start - * | | | | `--------------------- port - * | | | `----------------------- host_end - * | | `---------------------------------- host_start - * | `--------------------------------------- username_end - * `--------------------------------------------- protocol_end - */ - if (components.protocol_end == url_components::omitted) { - ada_log("url_aggregator::validate omitted protocol_end \n", to_diagram()); - return false; - } - if (components.username_end == url_components::omitted) { - ada_log("url_aggregator::validate omitted username_end \n", to_diagram()); - return false; - } - if (components.host_start == url_components::omitted) { - ada_log("url_aggregator::validate omitted host_start \n", to_diagram()); - return false; - } - if (components.host_end == url_components::omitted) { - ada_log("url_aggregator::validate omitted host_end \n", to_diagram()); - return false; - } - if (components.pathname_start == url_components::omitted) { - ada_log("url_aggregator::validate omitted pathname_start \n", to_diagram()); - return false; - } - - if (components.protocol_end > buffer.size()) { - ada_log("url_aggregator::validate protocol_end overflow \n", to_diagram()); - return false; - } - if (components.username_end > buffer.size()) { - ada_log("url_aggregator::validate username_end overflow \n", to_diagram()); - return false; - } - if (components.host_start > buffer.size()) { - ada_log("url_aggregator::validate host_start overflow \n", to_diagram()); - return false; - } - if (components.host_end > buffer.size()) { - ada_log("url_aggregator::validate host_end overflow \n", to_diagram()); - return false; - } - if (components.pathname_start > buffer.size()) { - ada_log("url_aggregator::validate pathname_start overflow \n", - to_diagram()); - return false; - } - - if (components.protocol_end > 0) { - if (buffer[components.protocol_end - 1] != ':') { - ada_log( - "url_aggregator::validate missing : at the end of the protocol \n", - to_diagram()); - return false; - } - } - - if (components.username_end != buffer.size() && - components.username_end > components.protocol_end + 2) { - if (buffer[components.username_end] != ':' && - buffer[components.username_end] != '@') { - ada_log( - "url_aggregator::validate missing : or @ at the end of the username " - "\n", - to_diagram()); - return false; - } - } - - if (components.host_start != buffer.size()) { - if (components.host_start > components.username_end) { - if (buffer[components.host_start] != '@') { - ada_log( - "url_aggregator::validate missing @ at the end of the password \n", - to_diagram()); - return false; - } - } else if (components.host_start == components.username_end && - components.host_end > components.host_start) { - if (components.host_start == components.protocol_end + 2) { - if (buffer[components.protocol_end] != '/' || - buffer[components.protocol_end + 1] != '/') { - ada_log( - "url_aggregator::validate missing // between protocol and host " - "\n", - to_diagram()); - return false; - } - } else { - if (components.host_start > components.protocol_end && - buffer[components.host_start] != '@') { - ada_log( - "url_aggregator::validate missing @ at the end of the username " - "\n", - to_diagram()); - return false; - } - } - } else { - if (components.host_end != components.host_start) { - ada_log("url_aggregator::validate expected omitted host \n", - to_diagram()); - return false; - } - } - } - if (components.host_end != buffer.size() && - components.pathname_start > components.host_end) { - if (components.pathname_start == components.host_end + 2 && - buffer[components.host_end] == '/' && - buffer[components.host_end + 1] == '.') { - if (components.pathname_start + 1 >= buffer.size() || - buffer[components.pathname_start] != '/' || - buffer[components.pathname_start + 1] != '/') { - ada_log( - "url_aggregator::validate expected the path to begin with // \n", - to_diagram()); - return false; - } - } else if (buffer[components.host_end] != ':') { - ada_log("url_aggregator::validate missing : at the port \n", - to_diagram()); - return false; - } - } - if (components.pathname_start != buffer.size() && - components.pathname_start < components.search_start && - components.pathname_start < components.hash_start && !has_opaque_path) { - if (buffer[components.pathname_start] != '/') { - ada_log("url_aggregator::validate missing / at the path \n", - to_diagram()); - return false; - } - } - if (components.search_start != url_components::omitted) { - if (buffer[components.search_start] != '?') { - ada_log("url_aggregator::validate missing ? at the search \n", - to_diagram()); - return false; - } - } - if (components.hash_start != url_components::omitted) { - if (buffer[components.hash_start] != '#') { - ada_log("url_aggregator::validate missing # at the hash \n", - to_diagram()); - return false; - } - } - - return true; -} - void url_aggregator::delete_dash_dot() { ada_log("url_aggregator::delete_dash_dot"); ADA_ASSERT_TRUE(validate()); diff --git a/src/url_components.cpp b/src/url_components.cpp index 7324caeb9..40508f4e0 100644 --- a/src/url_components.cpp +++ b/src/url_components.cpp @@ -7,81 +7,6 @@ namespace ada { -[[nodiscard]] bool url_components::check_offset_consistency() const noexcept { - /** - * https://user:pass@example.com:1234/foo/bar?baz#quux - * | | | | ^^^^| | | - * | | | | | | | `----- hash_start - * | | | | | | `--------- search_start - * | | | | | `----------------- pathname_start - * | | | | `--------------------- port - * | | | `----------------------- host_end - * | | `---------------------------------- host_start - * | `--------------------------------------- username_end - * `--------------------------------------------- protocol_end - */ - // These conditions can be made more strict. - uint32_t index = 0; - - if (protocol_end == url_components::omitted) { - return false; - } - if (protocol_end < index) { - return false; - } - index = protocol_end; - - if (username_end == url_components::omitted) { - return false; - } - if (username_end < index) { - return false; - } - index = username_end; - - if (host_start == url_components::omitted) { - return false; - } - if (host_start < index) { - return false; - } - index = host_start; - - if (port != url_components::omitted) { - if (port > 0xffff) { - return false; - } - uint32_t port_length = helpers::fast_digit_count(port) + 1; - if (index + port_length < index) { - return false; - } - index += port_length; - } - - if (pathname_start == url_components::omitted) { - return false; - } - if (pathname_start < index) { - return false; - } - index = pathname_start; - - if (search_start != url_components::omitted) { - if (search_start < index) { - return false; - } - index = search_start; - } - - if (hash_start != url_components::omitted) { - if (hash_start < index) { - return false; - } - } - - return true; -} - [[nodiscard]] std::string url_components::to_string() const { std::string answer; auto back = std::back_insert_iterator(answer);