diff --git a/examples/intersect-test.cpp b/examples/intersect-test.cpp index 4930d702..abf0b46b 100644 --- a/examples/intersect-test.cpp +++ b/examples/intersect-test.cpp @@ -42,7 +42,7 @@ auto get_intersect_for_query( } Nfa nfa(std::move(rules)); auto dfa2 = ByteLexer::nfa_to_dfa(nfa); - auto schema_types = dfa1->get_intersect(dfa2); + auto schema_types = dfa1->get_intersect(dfa2.get()); std::cout << search_string << ":"; for (auto const& schema_type : schema_types) { std::cout << m_id_symbol[schema_type] << ","; diff --git a/src/log_surgeon/Lexer.hpp b/src/log_surgeon/Lexer.hpp index 0cfee645..2872ae2e 100644 --- a/src/log_surgeon/Lexer.hpp +++ b/src/log_surgeon/Lexer.hpp @@ -12,6 +12,7 @@ #include #include +#include #include #include #include diff --git a/src/log_surgeon/finite_automata/Dfa.hpp b/src/log_surgeon/finite_automata/Dfa.hpp index a2000579..ae1ea367 100644 --- a/src/log_surgeon/finite_automata/Dfa.hpp +++ b/src/log_surgeon/finite_automata/Dfa.hpp @@ -13,10 +13,9 @@ template class Dfa { public: /** - * Creates a new DFA state based on a set of NFA states and adds it to - * m_states - * @param nfa_state_set - * @return DfaStateType* + * Creates a new DFA state based on a set of NFA states and adds it to `m_states`. + * @param nfa_state_set The set of NFA states represented by this DFA state. + * @return A pointer to the new DFA state. */ template auto new_state(std::set const& nfa_state_set) -> DfaStateType*; @@ -24,16 +23,14 @@ class Dfa { auto get_root() const -> DfaStateType const* { return m_states.at(0).get(); } /** - * Compares this dfa with dfa_in to determine the set of schema types in - * this dfa that are reachable by any type in dfa_in. A type is considered - * reachable if there is at least one string for which: (1) this dfa returns - * a set of types containing the type, and (2) dfa_in returns any non-empty - * set of types. - * @param dfa_in - * @return The set of schema types reachable by dfa_in + * Compares this dfa with `dfa_in` to determine the set of schema types in this dfa that are + * reachable by any type in `dfa_in`. A type is considered reachable if there is at least one + * string for which: (1) this dfa returns a set of types containing the type, and (2) `dfa_in` + * returns any non-empty set of types. + * @param dfa_in The dfa with which to take the intersect. + * @return The set of schema types reachable by `dfa_in`. */ - [[nodiscard]] auto get_intersect(std::unique_ptr const& dfa_in - ) const -> std::set; + [[nodiscard]] auto get_intersect(Dfa const* dfa_in) const -> std::set; private: std::vector> m_states; @@ -53,8 +50,7 @@ auto Dfa::new_state(std::set const& nfa_state_set) } template -auto Dfa::get_intersect(std::unique_ptr const& dfa_in -) const -> std::set { +auto Dfa::get_intersect(Dfa const* dfa_in) const -> std::set { std::set schema_types; std::set> unvisited_pairs; std::set> visited_pairs; diff --git a/src/log_surgeon/finite_automata/DfaState.hpp b/src/log_surgeon/finite_automata/DfaState.hpp index 7345843c..cbc62ad9 100644 --- a/src/log_surgeon/finite_automata/DfaState.hpp +++ b/src/log_surgeon/finite_automata/DfaState.hpp @@ -5,8 +5,10 @@ #include #include #include +#include #include +#include #include #include @@ -17,11 +19,15 @@ class DfaState; using DfaByteState = DfaState; using DfaUtf8State = DfaState; -template +template class DfaState { public: using Tree = UnicodeIntervalTree; + DfaState() { + std::fill(std::begin(m_bytes_transition), std::end(m_bytes_transition), nullptr); + } + auto add_matching_variable_id(uint32_t const variable_id) -> void { m_matching_variable_ids.push_back(variable_id); } @@ -30,30 +36,31 @@ class DfaState { return m_matching_variable_ids; } - [[nodiscard]] auto is_accepting() const -> bool { return !m_matching_variable_ids.empty(); } + [[nodiscard]] auto is_accepting() const -> bool { + return false == m_matching_variable_ids.empty(); + } auto add_byte_transition(uint8_t const& byte, DfaState* dest_state) -> void { m_bytes_transition[byte] = dest_state; } /** - * Returns the next state the DFA transitions to on input character (byte or utf8). - * @param character - * @return DfaState* + * @param character The character (byte or utf8) to transition on. + * @return A pointer to the DFA state reached after transitioning on `character`. */ [[nodiscard]] auto next(uint32_t character) const -> DfaState*; private: std::vector m_matching_variable_ids; DfaState* m_bytes_transition[cSizeOfByte]; - // NOTE: We don't need m_tree_transitions for the `stateType == DfaStateType::Byte` case, - // so we use an empty class (`std::tuple<>`) in that case. - std::conditional_t> m_tree_transitions; + // NOTE: We don't need m_tree_transitions for the `state_type == DfaStateType::Byte` case, so we + // use an empty class (`std::tuple<>`) in that case. + std::conditional_t> m_tree_transitions; }; -template -auto DfaState::next(uint32_t character) const -> DfaState* { - if constexpr (DfaStateType::Byte == stateType) { +template +auto DfaState::next(uint32_t character) const -> DfaState* { + if constexpr (DfaStateType::Byte == state_type) { return m_bytes_transition[character]; } else { if (character < cSizeOfByte) { @@ -62,7 +69,7 @@ auto DfaState::next(uint32_t character) const -> DfaState* { std::unique_ptr> result = m_tree_transitions.find(Interval(character, character)); assert(result->size() <= 1); - if (!result->empty()) { + if (false == result->empty()) { return result->front().m_value; } return nullptr; diff --git a/src/log_surgeon/finite_automata/DfaStatePair.hpp b/src/log_surgeon/finite_automata/DfaStatePair.hpp index 358f396f..568142d3 100644 --- a/src/log_surgeon/finite_automata/DfaStatePair.hpp +++ b/src/log_surgeon/finite_automata/DfaStatePair.hpp @@ -1,10 +1,11 @@ #ifndef LOG_SURGEON_FINITE_AUTOMATA_DFA_STATE_PAIR #define LOG_SURGEON_FINITE_AUTOMATA_DFA_STATE_PAIR +#include #include #include -#include +#include namespace log_surgeon::finite_automata { /** diff --git a/src/log_surgeon/finite_automata/PrefixTree.hpp b/src/log_surgeon/finite_automata/PrefixTree.hpp index ab88d805..e2de78aa 100644 --- a/src/log_surgeon/finite_automata/PrefixTree.hpp +++ b/src/log_surgeon/finite_automata/PrefixTree.hpp @@ -65,6 +65,11 @@ class PrefixTree { [[nodiscard]] auto is_root() const -> bool { return false == m_parent_id.has_value(); } + /** + * Gets the parent ID without checking if it's `std::nullopt`. + * NOTE: This method should only be used if the caller has checked the node is not the root. + * @return The ID of the parent node in the prefix tree. + */ [[nodiscard]] auto get_parent_id_unsafe() const -> id_t { // NOLINTNEXTLINE(bugprone-unchecked-optional-access) return m_parent_id.value(); diff --git a/tests/test-prefix-tree.cpp b/tests/test-prefix-tree.cpp index 27c79882..66d8f8a0 100644 --- a/tests/test-prefix-tree.cpp +++ b/tests/test-prefix-tree.cpp @@ -10,22 +10,11 @@ using log_surgeon::finite_automata::PrefixTree; using id_t = PrefixTree::id_t; using position_t = PrefixTree::position_t; -constexpr auto cRootId{PrefixTree::cRootId}; -constexpr id_t cInvalidNodeId{100}; -constexpr position_t cInsertPos1{4}; -constexpr position_t cInsertPos2{7}; -constexpr position_t cInsertPos3{9}; -constexpr position_t cMaxPos{std::numeric_limits::max()}; -constexpr position_t cNegativePos1{-1}; -constexpr position_t cNegativePos2{-100}; -constexpr position_t cSetPos1{10}; -constexpr position_t cSetPos2{12}; -constexpr position_t cSetPos3{15}; -constexpr position_t cSetPos4{20}; -constexpr position_t cTreeSize1{4}; -constexpr position_t cTreeSize2{8}; - TEST_CASE("`PrefixTree` operations", "[PrefixTree]") { + constexpr auto cRootId{PrefixTree::cRootId}; + constexpr position_t cInitialPos1{4}; + constexpr position_t cSetPos1{10}; + SECTION("Newly constructed tree works correctly") { PrefixTree const tree; @@ -34,16 +23,24 @@ TEST_CASE("`PrefixTree` operations", "[PrefixTree]") { } SECTION("Inserting nodes into the prefix tree works correctly") { + constexpr position_t cInitialPos2{7}; + constexpr position_t cInitialPos3{9}; + constexpr position_t cMaxPos{std::numeric_limits::max()}; + constexpr position_t cNegativePos1{-1}; + constexpr position_t cNegativePos2{-100}; + constexpr position_t cTreeSize1{4}; + constexpr position_t cTreeSize2{8}; + PrefixTree tree; // Test basic insertions - auto const node_id_1{tree.insert(cRootId, cInsertPos1)}; - auto const node_id_2{tree.insert(node_id_1, cInsertPos2)}; - auto const node_id_3{tree.insert(node_id_2, cInsertPos3)}; - REQUIRE(std::vector{cInsertPos1} == tree.get_reversed_positions(node_id_1)); - REQUIRE(std::vector{cInsertPos2, cInsertPos1} + auto const node_id_1{tree.insert(cRootId, cInitialPos1)}; + auto const node_id_2{tree.insert(node_id_1, cInitialPos2)}; + auto const node_id_3{tree.insert(node_id_2, cInitialPos3)}; + REQUIRE(std::vector{cInitialPos1} == tree.get_reversed_positions(node_id_1)); + REQUIRE(std::vector{cInitialPos2, cInitialPos1} == tree.get_reversed_positions(node_id_2)); - REQUIRE(std::vector{cInsertPos3, cInsertPos2, cInsertPos1} + REQUIRE(std::vector{cInitialPos3, cInitialPos2, cInitialPos1} == tree.get_reversed_positions(node_id_3)); REQUIRE(cTreeSize1 == tree.size()); @@ -53,12 +50,12 @@ TEST_CASE("`PrefixTree` operations", "[PrefixTree]") { // Test insertion with negative position values auto const node_id_5{tree.insert(cRootId, cNegativePos1)}; - auto const node_id_6{tree.insert(node_id_5, cInsertPos1)}; + auto const node_id_6{tree.insert(node_id_5, cInitialPos1)}; auto const node_id_7{tree.insert(node_id_6, cNegativePos2)}; REQUIRE(std::vector{cNegativePos1} == tree.get_reversed_positions(node_id_5)); - REQUIRE(std::vector{cInsertPos1, cNegativePos1} + REQUIRE(std::vector{cInitialPos1, cNegativePos1} == tree.get_reversed_positions(node_id_6)); - REQUIRE(std::vector{cNegativePos2, cInsertPos1, cNegativePos1} + REQUIRE(std::vector{cNegativePos2, cInitialPos1, cNegativePos1} == tree.get_reversed_positions(node_id_7)); REQUIRE(cTreeSize2 == tree.size()); } @@ -67,7 +64,7 @@ TEST_CASE("`PrefixTree` operations", "[PrefixTree]") { PrefixTree tree; REQUIRE_THROWS_AS(tree.get_reversed_positions(tree.size()), std::out_of_range); - tree.insert(cRootId, cInsertPos1); + tree.insert(cRootId, cInitialPos1); REQUIRE_THROWS_AS(tree.get_reversed_positions(tree.size()), std::out_of_range); REQUIRE_THROWS_AS( @@ -77,13 +74,17 @@ TEST_CASE("`PrefixTree` operations", "[PrefixTree]") { } SECTION("Set position for a valid index works correctly") { + constexpr position_t cSetPos2{12}; + constexpr position_t cSetPos3{15}; + constexpr position_t cSetPos4{20}; + PrefixTree tree; // Test that you can set the root node for sanity, although this value is not used tree.set(cRootId, cSetPos1); // Test updates to different nodes - auto const node_id_1{tree.insert(cRootId, cInsertPos1)}; - auto const node_id_2{tree.insert(node_id_1, cInsertPos1)}; + auto const node_id_1{tree.insert(cRootId, cInitialPos1)}; + auto const node_id_2{tree.insert(node_id_1, cInitialPos1)}; tree.set(node_id_1, cSetPos1); tree.set(node_id_2, cSetPos2); REQUIRE(std::vector{cSetPos1} == tree.get_reversed_positions(node_id_1)); @@ -105,13 +106,15 @@ TEST_CASE("`PrefixTree` operations", "[PrefixTree]") { } SECTION("Set position for an invalid index throws correctly") { + constexpr id_t cInvalidNodeId{100}; + PrefixTree tree; // Test setting position before any insertions - REQUIRE_THROWS_AS(tree.set(cInvalidNodeId, cSetPos4), std::out_of_range); + REQUIRE_THROWS_AS(tree.set(cInvalidNodeId, cSetPos1), std::out_of_range); // Test setting position just beyond valid range - auto const node_id_1{tree.insert(cRootId, cInsertPos1)}; - REQUIRE_THROWS_AS(tree.set(node_id_1 + 1, cSetPos4), std::out_of_range); + auto const node_id_1{tree.insert(cRootId, cInitialPos1)}; + REQUIRE_THROWS_AS(tree.set(node_id_1 + 1, cSetPos1), std::out_of_range); } } diff --git a/tests/test-register-handler.cpp b/tests/test-register-handler.cpp index 2371fc9e..e8102e22 100644 --- a/tests/test-register-handler.cpp +++ b/tests/test-register-handler.cpp @@ -12,17 +12,19 @@ using position_t = log_surgeon::finite_automata::PrefixTree::position_t; namespace { /** - * @param handler The register handler that will contain the new registers. - * @param num_registers The number of registers to initialize. + * @param num_registers The number of registers managed by the handler. + * @return The newly initialized register handler. */ -auto registers_init(RegisterHandler& handler, size_t num_registers) -> void; +[[nodiscard]] auto handler_init(size_t num_registers) -> RegisterHandler; -auto registers_init(RegisterHandler& handler, size_t const num_registers) -> void { +auto handler_init(size_t const num_registers) -> RegisterHandler { constexpr position_t cDefaultPos{0}; + RegisterHandler handler; for (size_t i{0}; i < num_registers; ++i) { handler.add_register(i, cDefaultPos); } + return handler; } } // namespace @@ -32,13 +34,12 @@ TEST_CASE("`RegisterHandler` tests", "[RegisterHandler]") { constexpr size_t cRegId1{0}; constexpr size_t cRegId2{1}; - RegisterHandler handler; - SECTION("Initial state is empty") { - REQUIRE_THROWS_AS(handler.get_reversed_positions(cRegId1), std::out_of_range); + RegisterHandler empty_handler{handler_init(0)}; + REQUIRE_THROWS_AS(empty_handler.get_reversed_positions(cRegId1), std::out_of_range); } - registers_init(handler, cNumRegisters); + RegisterHandler handler{handler_init(cNumRegisters)}; SECTION("Set register position correctly") { handler.set_register(cRegId1, cInitialPos1);