diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 9672f6da..6b6cd51d 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,5 +1,11 @@ -# References - + # Description diff --git a/.github/workflows/pr-title-checks.yaml b/.github/workflows/pr-title-checks.yaml new file mode 100644 index 00000000..1d65f1e0 --- /dev/null +++ b/.github/workflows/pr-title-checks.yaml @@ -0,0 +1,25 @@ +name: "pr-title-checks" + +on: + pull_request_target: + types: ["edited", "opened", "reopened"] + branches: ["main"] + +permissions: {} + +concurrency: + group: "${{github.workflow}}-${{github.ref}}" + + # Cancel in-progress jobs for efficiency + cancel-in-progress: true + +jobs: + conventional-commits: + permissions: + # For amannn/action-semantic-pull-request + pull-requests: "read" + runs-on: "ubuntu-latest" + steps: + - uses: "amannn/action-semantic-pull-request@v5" + env: + GITHUB_TOKEN: "${{secrets.GITHUB_TOKEN}}" diff --git a/CMakeLists.txt b/CMakeLists.txt index d4802ca2..2627928e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -93,6 +93,8 @@ set(SOURCE_FILES src/log_surgeon/SchemaParser.hpp src/log_surgeon/Token.cpp src/log_surgeon/Token.hpp + src/log_surgeon/finite_automata/PrefixTree.cpp + src/log_surgeon/finite_automata/PrefixTree.hpp src/log_surgeon/finite_automata/RegexAST.hpp src/log_surgeon/finite_automata/Dfa.hpp src/log_surgeon/finite_automata/DfaState.hpp @@ -101,7 +103,7 @@ set(SOURCE_FILES src/log_surgeon/finite_automata/Nfa.hpp src/log_surgeon/finite_automata/NfaState.hpp src/log_surgeon/finite_automata/NfaStateType.hpp - src/log_surgeon/finite_automata/Register.hpp + src/log_surgeon/finite_automata/RegisterHandler.hpp src/log_surgeon/finite_automata/Tag.hpp src/log_surgeon/finite_automata/TaggedTransition.hpp src/log_surgeon/finite_automata/UnicodeIntervalTree.hpp diff --git a/src/log_surgeon/Lexer.tpp b/src/log_surgeon/Lexer.tpp index 4806987e..c5feb85f 100644 --- a/src/log_surgeon/Lexer.tpp +++ b/src/log_surgeon/Lexer.tpp @@ -410,11 +410,12 @@ auto Lexer::epsilon_closure(NfaStateType const* stat { stack.push(positive_tagged_start_transition.get_dest_state()); } - for (auto const& positive_tagged_end_transition : - current_state->get_positive_tagged_start_transitions()) - { - stack.push(positive_tagged_end_transition.get_dest_state()); + auto const& optional_positive_tagged_end_transition + = current_state->get_positive_tagged_end_transition(); + if (optional_positive_tagged_end_transition.has_value()) { + stack.push(optional_positive_tagged_end_transition.value().get_dest_state()); } + auto const& optional_negative_tagged_transition = current_state->get_negative_tagged_transition(); if (optional_negative_tagged_transition.has_value()) { diff --git a/src/log_surgeon/finite_automata/Nfa.hpp b/src/log_surgeon/finite_automata/Nfa.hpp index 0e44bedb..caf58ce4 100644 --- a/src/log_surgeon/finite_automata/Nfa.hpp +++ b/src/log_surgeon/finite_automata/Nfa.hpp @@ -31,15 +31,15 @@ class Nfa { [[nodiscard]] auto new_state() -> NfaStateType*; /** - * Creates a unique_ptr for an NFA state with a positive tagged transition and adds it to + * Creates a unique_ptr for an NFA state with a positive tagged end transition and adds it to * `m_states`. * @param tag * @param dest_state - * @return NfaStateType* + * @return A new state with a positive tagged end transition to `dest_state`. */ - [[nodiscard]] auto new_state_with_positive_tagged_transition( - Tag* tag, - NfaStateType* dest_state + [[nodiscard]] auto new_state_with_positive_tagged_end_transition( + Tag const* tag, + NfaStateType const* dest_state ) -> NfaStateType*; /** @@ -50,10 +50,23 @@ class Nfa { * @return NfaStateType* */ [[nodiscard]] auto new_state_with_negative_tagged_transition( - std::vector tags, - NfaStateType* dest_state + std::vector tags, + NfaStateType const* dest_state ) -> NfaStateType*; + /** + * Creates the start and end states for a capture group. + * @param tag The tag associated with the capture group. + * @param dest_state + * @return A pair of states: + * - A new state with a positive tagged start transition from `m_root`. + * - A new state with a positive tagged end transition to `dest_state`. + */ + [[nodiscard]] auto new_start_and_end_states_with_positive_tagged_transitions( + Tag const* tag, + NfaStateType const* dest_state + ) -> std::pair; + /** * @return A vector representing the traversal order of the NFA states using breadth-first * search (BFS). @@ -97,9 +110,9 @@ auto Nfa::new_state() -> NfaStateType* { } template -auto Nfa::new_state_with_positive_tagged_transition( - Tag* tag, - NfaStateType* dest_state +auto Nfa::new_state_with_positive_tagged_end_transition( + Tag const* tag, + NfaStateType const* dest_state ) -> NfaStateType* { m_states.emplace_back(std::make_unique(tag, dest_state)); return m_states.back().get(); @@ -107,13 +120,25 @@ auto Nfa::new_state_with_positive_tagged_transition( template auto Nfa::new_state_with_negative_tagged_transition( - std::vector tags, - NfaStateType* dest_state + std::vector tags, + NfaStateType const* dest_state ) -> NfaStateType* { m_states.emplace_back(std::make_unique(std::move(tags), dest_state)); return m_states.back().get(); } +template +auto Nfa::new_start_and_end_states_with_positive_tagged_transitions( + Tag const* tag, + NfaStateType const* dest_state +) -> std::pair { + auto* start_state = new_state(); + m_root->add_positive_tagged_start_transition(tag, start_state); + + auto* end_state = new_state_with_positive_tagged_end_transition(tag, dest_state); + return {start_state, end_state}; +} + template auto Nfa::get_bfs_traversal_order() const -> std::vector { std::queue state_queue; @@ -148,11 +173,14 @@ auto Nfa::get_bfs_traversal_order() const -> std::vectorget_positive_tagged_end_transitions()) - { - add_to_queue_and_visited(positive_tagged_end_transition.get_dest_state()); + + auto const& optional_positive_tagged_end_transition + = current_state->get_positive_tagged_end_transition(); + if (optional_positive_tagged_end_transition.has_value()) { + add_to_queue_and_visited(optional_positive_tagged_end_transition.value().get_dest_state( + )); } + auto const& optional_negative_tagged_transition = current_state->get_negative_tagged_transition(); if (optional_negative_tagged_transition.has_value()) { diff --git a/src/log_surgeon/finite_automata/NfaState.hpp b/src/log_surgeon/finite_automata/NfaState.hpp index b030a548..88f2509c 100644 --- a/src/log_surgeon/finite_automata/NfaState.hpp +++ b/src/log_surgeon/finite_automata/NfaState.hpp @@ -30,10 +30,10 @@ class NfaState { NfaState() = default; - NfaState(Tag* tag, NfaState* dest_state) - : m_positive_tagged_end_transitions{{tag, dest_state}} {} + NfaState(Tag const* tag, NfaState const* dest_state) + : m_positive_tagged_end_transition{PositiveTaggedTransition{tag, dest_state}} {} - NfaState(std::vector tags, NfaState* dest_state) + NfaState(std::vector tags, NfaState const* dest_state) : m_negative_tagged_transition{NegativeTaggedTransition{std::move(tags), dest_state}} {} auto set_accepting(bool accepting) -> void { m_accepting = accepting; } @@ -48,7 +48,8 @@ class NfaState { return m_matching_variable_id; } - auto add_positive_tagged_start_transition(Tag* tag, NfaState* dest_state) -> void { + auto + add_positive_tagged_start_transition(Tag const* tag, NfaState const* dest_state) -> void { m_positive_tagged_start_transitions.emplace_back(tag, dest_state); } @@ -57,9 +58,9 @@ class NfaState { return m_positive_tagged_start_transitions; } - [[nodiscard]] auto get_positive_tagged_end_transitions( - ) const -> std::vector> const& { - return m_positive_tagged_end_transitions; + [[nodiscard]] auto get_positive_tagged_end_transition( + ) const -> std::optional> const& { + return m_positive_tagged_end_transition; } [[nodiscard]] auto get_negative_tagged_transition( @@ -109,7 +110,7 @@ class NfaState { bool m_accepting{false}; uint32_t m_matching_variable_id{0}; std::vector> m_positive_tagged_start_transitions; - std::vector> m_positive_tagged_end_transitions; + std::optional> m_positive_tagged_end_transition; std::optional> m_negative_tagged_transition; std::vector m_epsilon_transitions; std::array, cSizeOfByte> m_bytes_transitions; @@ -185,28 +186,27 @@ auto NfaState::serialize( epsilon_transitions.emplace_back(std::to_string(state_ids.at(dest_state))); } - std::vector positive_tagged_start_transition_strings; + std::vector serialized_positive_tagged_start_transitions; for (auto const& positive_tagged_start_transition : m_positive_tagged_start_transitions) { - auto const optional_serialized_positive_transition + auto const optional_serialized_positive_start_transition = positive_tagged_start_transition.serialize(state_ids); - if (false == optional_serialized_positive_transition.has_value()) { + if (false == optional_serialized_positive_start_transition.has_value()) { return std::nullopt; } - positive_tagged_start_transition_strings.emplace_back( - optional_serialized_positive_transition.value() + serialized_positive_tagged_start_transitions.emplace_back( + optional_serialized_positive_start_transition.value() ); } - std::vector positive_tagged_end_transition_strings; - for (auto const& positive_tagged_end_transition : m_positive_tagged_end_transitions) { - auto const optional_serialized_positive_transition - = positive_tagged_end_transition.serialize(state_ids); - if (false == optional_serialized_positive_transition.has_value()) { + std::string serialized_positive_tagged_end_transition; + if (m_positive_tagged_end_transition.has_value()) { + auto const optional_serialized_positive_end_transition + = m_positive_tagged_end_transition.value().serialize(state_ids); + if (false == optional_serialized_positive_end_transition.has_value()) { return std::nullopt; } - positive_tagged_end_transition_strings.emplace_back( - optional_serialized_positive_transition.value() - ); + serialized_positive_tagged_end_transition + = optional_serialized_positive_end_transition.value(); } std::string negative_tagged_transition_string; @@ -230,8 +230,8 @@ auto NfaState::serialize( accepting_tag_string, fmt::join(byte_transitions, ","), fmt::join(epsilon_transitions, ","), - fmt::join(positive_tagged_start_transition_strings, ","), - fmt::join(positive_tagged_end_transition_strings, ","), + fmt::join(serialized_positive_tagged_start_transitions, ","), + serialized_positive_tagged_end_transition, negative_tagged_transition_string ); } diff --git a/src/log_surgeon/finite_automata/PrefixTree.cpp b/src/log_surgeon/finite_automata/PrefixTree.cpp new file mode 100644 index 00000000..bf0705c8 --- /dev/null +++ b/src/log_surgeon/finite_automata/PrefixTree.cpp @@ -0,0 +1,20 @@ +#include "PrefixTree.hpp" + +#include +#include + +namespace log_surgeon::finite_automata { +auto PrefixTree::get_reversed_positions(id_t const node_id) const -> std::vector { + if (m_nodes.size() <= node_id) { + throw std::out_of_range("Prefix tree index out of range."); + } + + std::vector reversed_positions; + auto current_node{m_nodes[node_id]}; + while (false == current_node.is_root()) { + reversed_positions.push_back(current_node.get_position()); + current_node = m_nodes[current_node.get_parent_node_id().value()]; + } + return reversed_positions; +} +} // namespace log_surgeon::finite_automata diff --git a/src/log_surgeon/finite_automata/PrefixTree.hpp b/src/log_surgeon/finite_automata/PrefixTree.hpp new file mode 100644 index 00000000..815c7dda --- /dev/null +++ b/src/log_surgeon/finite_automata/PrefixTree.hpp @@ -0,0 +1,85 @@ +#ifndef LOG_SURGEON_FINITE_AUTOMATA_PREFIX_TREE_HPP +#define LOG_SURGEON_FINITE_AUTOMATA_PREFIX_TREE_HPP + +#include +#include +#include +#include + +namespace log_surgeon::finite_automata { +/** + * Represents a prefix tree to store register data during TDFA simulation. Each node in the tree + * stores a single position in the lexed string. Each path from the root to an index corresponds to + * a sequence of positions for an individual tag: + * - Positive position node: Indicates the tag was matched at the position. + * - Negative position node: Indicates the tag was unmatched. If a negative node is the entire path, + * it indicates the tag was never matched. If the negative tag is along a path containing positive + * nodes, it functions as a placeholder. This can be useful for nested capture groups, to maintain a + * one-to-one mapping between the contained capture group and the enclosing capture group. + */ +class PrefixTree { +public: + using id_t = uint32_t; + using position_t = int32_t; + + static constexpr id_t cRootId{0}; + + PrefixTree() : m_nodes{{std::nullopt, -1}} {} + + /** + * @param parent_node_id Index of the inserted node's parent in the prefix tree. + * @param position The position in the lexed string. + * @return The index of the newly inserted node in the tree. + * @throw std::out_of_range if the parent's index is out of range. + */ + [[maybe_unused]] auto insert(id_t const parent_node_id, position_t const position) -> id_t { + if (m_nodes.size() <= parent_node_id) { + throw std::out_of_range("Predecessor index out of range."); + } + + m_nodes.emplace_back(parent_node_id, position); + return m_nodes.size() - 1; + } + + auto set(id_t const node_id, position_t const position) -> void { + m_nodes.at(node_id).set_position(position); + } + + [[nodiscard]] auto size() const -> size_t { return m_nodes.size(); } + + /** + * @param node_id The index of the node. + * @return A vector containing positions in order from the given index up to but not including + * the root node. + * @throw std::out_of_range if the index is out of range. + */ + [[nodiscard]] auto get_reversed_positions(id_t node_id) const -> std::vector; + +private: + class Node { + public: + Node(std::optional const parent_node_id, position_t const position) + : m_parent_node_id{parent_node_id}, + m_position{position} {} + + [[nodiscard]] auto is_root() const -> bool { return false == m_parent_node_id.has_value(); } + + [[nodiscard]] auto get_parent_node_id() const -> std::optional { + return m_parent_node_id; + } + + auto set_position(position_t const position) -> void { m_position = position; } + + [[nodiscard]] auto get_position() const -> position_t { return m_position; } + + private: + std::optional m_parent_node_id; + position_t m_position; + }; + + std::vector m_nodes; +}; + +} // namespace log_surgeon::finite_automata + +#endif // LOG_SURGEON_FINITE_AUTOMATA_PREFIX_TREE_HPP diff --git a/src/log_surgeon/finite_automata/RegexAST.hpp b/src/log_surgeon/finite_automata/RegexAST.hpp index d4e1af7a..dc74131f 100644 --- a/src/log_surgeon/finite_automata/RegexAST.hpp +++ b/src/log_surgeon/finite_automata/RegexAST.hpp @@ -83,15 +83,15 @@ class RegexAST { */ [[nodiscard]] virtual auto serialize() const -> std::u32string = 0; - [[nodiscard]] auto get_subtree_positive_tags() const -> std::vector const& { + [[nodiscard]] auto get_subtree_positive_tags() const -> std::vector const& { return m_subtree_positive_tags; } - auto set_subtree_positive_tags(std::vector subtree_positive_tags) -> void { + auto set_subtree_positive_tags(std::vector subtree_positive_tags) -> void { m_subtree_positive_tags = std::move(subtree_positive_tags); } - auto add_subtree_positive_tags(std::vector const& subtree_positive_tags) -> void { + auto add_subtree_positive_tags(std::vector const& subtree_positive_tags) -> void { m_subtree_positive_tags.insert( m_subtree_positive_tags.end(), subtree_positive_tags.cbegin(), @@ -99,7 +99,7 @@ class RegexAST { ); } - auto set_negative_tags(std::vector negative_tags) -> void { + auto set_negative_tags(std::vector negative_tags) -> void { m_negative_tags = std::move(negative_tags); } @@ -146,8 +146,8 @@ class RegexAST { } private: - std::vector m_subtree_positive_tags; - std::vector m_negative_tags; + std::vector m_subtree_positive_tags; + std::vector m_negative_tags; }; /** @@ -623,8 +623,9 @@ class RegexASTMultiplication : public RegexAST { /** * Represents a capture group AST node. - * `m_tag` is always expected to be non-null. - * `m_group_regex_ast` is always expected to be non-null. + * NOTE: + * - `m_tag` is always expected to be non-null. + * - `m_group_regex_ast` is always expected to be non-null. * @tparam NfaStateType Specifies the type of transition (bytes or UTF-8 characters). */ template @@ -692,11 +693,11 @@ class RegexASTCapture : public RegexAST { /** * Adds the needed `Nfa::states` to the passed in nfa to handle a - * `RegexASTCapture` before transitioning to an accepting `end_state`. + * `RegexASTCapture` before transitioning to a `dest_state`. * @param nfa - * @param end_state + * @param dest_state */ - auto add_to_nfa(Nfa* nfa, NfaStateType* end_state) const -> void override; + auto add_to_nfa(Nfa* nfa, NfaStateType* dest_state) const -> void override; [[nodiscard]] auto serialize() const -> std::u32string override; @@ -889,21 +890,54 @@ template } template -void RegexASTCapture::add_to_nfa(Nfa* nfa, NfaStateType* end_state) - const { - NfaStateType* root = nfa->get_root(); - auto* capture_group_start_state = nfa->new_state(); - root->add_positive_tagged_start_transition(m_tag.get(), capture_group_start_state); - - auto* state_with_positive_tagged_transition - = nfa->new_state_with_positive_tagged_transition(m_tag.get(), end_state); - nfa->set_root(capture_group_start_state); - m_group_regex_ast->add_to_nfa_with_negative_tags(nfa, state_with_positive_tagged_transition); - - nfa->set_root(root); +auto RegexASTCapture::add_to_nfa( + Nfa* nfa, + NfaStateType* dest_state +) const -> void { + // TODO: move this into a documentation file in the future, and reference it here. + // The NFA constructed for a capture group follows the structure below, with tagged transitions + // explicitly labeled for clarity: + // +---------------------+ + // | `m_root` | + // +---------------------+ + // | `m_tag` start + // | (positive tagged start transition) + // v + // +---------------------+ + // |`capture_start_state`| + // +---------------------+ + // | + // | (epsilon transition) + // v + // +---------------------+ + // | `m_group_regex_ast` | + // | (nested NFA) | + // +---------------------+ + // | `m_negative_tags` + // | (negative tagged transition) + // v + // +---------------------+ + // | `capture_end_state` | + // +---------------------+ + // | `m_tag` end + // | (positive tagged end transition) + // v + // +---------------------+ + // | `dest_state` | + // +---------------------+ + auto [capture_start_state, capture_end_state] + = nfa->new_start_and_end_states_with_positive_tagged_transitions( + m_tag.get(), + dest_state + ); + + auto* initial_root = nfa->get_root(); + nfa->set_root(capture_start_state); + m_group_regex_ast->add_to_nfa_with_negative_tags(nfa, capture_end_state); + nfa->set_root(initial_root); } -template +template [[nodiscard]] auto RegexASTCapture::serialize() const -> std::u32string { auto const tag_name_u32 = std::u32string(m_tag->get_name().cbegin(), m_tag->get_name().cend()); return fmt::format( diff --git a/src/log_surgeon/finite_automata/Register.hpp b/src/log_surgeon/finite_automata/Register.hpp deleted file mode 100644 index d0be4f15..00000000 --- a/src/log_surgeon/finite_automata/Register.hpp +++ /dev/null @@ -1,35 +0,0 @@ -#ifndef LOG_SURGEON_FINITE_AUTOMATA_REGISTER -#define LOG_SURGEON_FINITE_AUTOMATA_REGISTER - -#include - -#include - -namespace log_surgeon::finite_automata { -class Register { -public: - explicit Register(Tag* tag) : m_tag{tag} {} - - auto add_pos(uint32_t const pos) -> void { positions.push_back(pos); } - - auto update_last_position(uint32_t const pos) -> void { positions.back() = pos; } - - auto negate_last_position() -> void { positions.pop_back(); } - - auto negate_all_positions() -> void { positions.clear(); } - - [[nodiscard]] auto get_tag() const -> Tag* { return m_tag; } - - [[nodiscard]] auto get_last_position() const -> uint32_t { return positions.back(); } - - [[nodiscard]] auto get_all_positions() const -> std::vector const& { - return positions; - } - -private: - Tag* m_tag; - std::vector positions; -}; -} // namespace log_surgeon::finite_automata - -#endif // LOG_SURGEON_FINITE_AUTOMATA_REGISTER diff --git a/src/log_surgeon/finite_automata/RegisterHandler.hpp b/src/log_surgeon/finite_automata/RegisterHandler.hpp new file mode 100644 index 00000000..86a06f21 --- /dev/null +++ b/src/log_surgeon/finite_automata/RegisterHandler.hpp @@ -0,0 +1,52 @@ +#ifndef LOG_SURGEON_FINITE_AUTOMATA_REGISTER_HANDLER_HPP +#define LOG_SURGEON_FINITE_AUTOMATA_REGISTER_HANDLER_HPP + +#include +#include + +#include + +namespace log_surgeon::finite_automata { +/** + * The register handler maintains a prefix tree that is sufficient to represent all registers. + * The register handler also contains a vector of registers, and performs the set, copy, and append + * operations for these registers. + * + * NOTE: For efficiency, registers are not initialized when lexing a new string; instead, it is the + * responsibility of the DFA to set the register values when needed. + */ +class RegisterHandler { +public: + auto add_register( + PrefixTree::id_t const prefix_tree_parent_node_id, + PrefixTree::position_t const position + ) -> void { + auto const prefix_tree_node_id{m_prefix_tree.insert(prefix_tree_parent_node_id, position)}; + m_registers.emplace_back(prefix_tree_node_id); + } + + auto set_register(size_t const reg_id, PrefixTree::position_t const position) -> void { + m_prefix_tree.set(m_registers.at(reg_id), position); + } + + auto copy_register(size_t const dest_reg_id, size_t const source_reg_id) -> void { + m_registers.at(dest_reg_id) = m_registers.at(source_reg_id); + } + + auto append_position(size_t const reg_id, PrefixTree::position_t const position) -> void { + auto const node_id{m_registers.at(reg_id)}; + m_registers.at(reg_id) = m_prefix_tree.insert(node_id, position); + } + + [[nodiscard]] auto get_reversed_positions(size_t const reg_id + ) const -> std::vector { + return m_prefix_tree.get_reversed_positions(m_registers.at(reg_id)); + } + +private: + PrefixTree m_prefix_tree; + std::vector m_registers; +}; +} // namespace log_surgeon::finite_automata + +#endif // LOG_SURGEON_FINITE_AUTOMATA_REGISTER_HANDLER_HPP diff --git a/src/log_surgeon/finite_automata/TaggedTransition.hpp b/src/log_surgeon/finite_automata/TaggedTransition.hpp index 2fe093e0..4da9b5fa 100644 --- a/src/log_surgeon/finite_automata/TaggedTransition.hpp +++ b/src/log_surgeon/finite_automata/TaggedTransition.hpp @@ -3,6 +3,7 @@ #include #include +#include #include #include @@ -13,7 +14,7 @@ namespace log_surgeon::finite_automata { /** * Represents an NFA transition indicating that a capture group has been matched. - * `m_tag` is always expected to be non-null. + * NOTE: `m_tag` is always expected to be non-null. * @tparam NfaStateType Specifies the type of transition (bytes or UTF-8 characters). */ template @@ -24,7 +25,7 @@ class PositiveTaggedTransition { * @param dest_state * @throw std::invalid_argument if `tag` is `nullptr`. */ - PositiveTaggedTransition(Tag* tag, NfaStateType const* dest_state) + PositiveTaggedTransition(Tag const* tag, NfaStateType const* dest_state) : m_tag{nullptr == tag ? throw std::invalid_argument("Tag cannot be null") : tag}, m_dest_state{dest_state} {} @@ -45,14 +46,14 @@ class PositiveTaggedTransition { } private: - Tag* m_tag; + Tag const* m_tag; NfaStateType const* m_dest_state; }; /** * Represents an NFA transition indicating that a capture group has been unmatched. - * All tags in `m_tags` are always expected to be non-null. - * @tparam NFAStateType Specifies the type of transition (bytes or UTF-8 characters). + * NOTE: All tags in `m_tags` are always expected to be non-null. + * @tparam NfaStateType Specifies the type of transition (bytes or UTF-8 characters). */ template class NegativeTaggedTransition { @@ -62,7 +63,7 @@ class NegativeTaggedTransition { * @param dest_state * @throw std::invalid_argument if any elements in `tags` is `nullptr`. */ - NegativeTaggedTransition(std::vector tags, NfaStateType* dest_state) + NegativeTaggedTransition(std::vector tags, NfaStateType const* dest_state) : m_tags{[&tags] { if (std::ranges::any_of(tags, [](Tag const* tag) { return nullptr == tag; })) { throw std::invalid_argument("Tags cannot contain null elements"); @@ -91,8 +92,8 @@ class NegativeTaggedTransition { } private: - std::vector m_tags; - NfaStateType* m_dest_state; + std::vector m_tags; + NfaStateType const* m_dest_state; }; } // namespace log_surgeon::finite_automata diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 8a1733e2..1e4a8363 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -2,11 +2,13 @@ set( SOURCES_LOG_SURGEON ../src/log_surgeon/FileReader.cpp ../src/log_surgeon/FileReader.hpp + ../src/log_surgeon/finite_automata/PrefixTree.cpp + ../src/log_surgeon/finite_automata/PrefixTree.hpp ../src/log_surgeon/finite_automata/RegexAST.hpp ../src/log_surgeon/finite_automata/Nfa.hpp ../src/log_surgeon/finite_automata/NfaState.hpp ../src/log_surgeon/finite_automata/NfaStateType.hpp - ../src/log_surgeon/finite_automata/Register.hpp + ../src/log_surgeon/finite_automata/RegisterHandler.hpp ../src/log_surgeon/finite_automata/Tag.hpp ../src/log_surgeon/finite_automata/TaggedTransition.hpp ../src/log_surgeon/LALR1Parser.cpp @@ -22,7 +24,7 @@ set( ../src/log_surgeon/Token.hpp ) -set(SOURCES_TESTS test-lexer.cpp test-NFA.cpp test-tag.cpp) +set(SOURCES_TESTS test-lexer.cpp test-NFA.cpp test-prefix-tree.cpp test-register-handler.cpp test-tag.cpp) add_executable(unit-test ${SOURCES_LOG_SURGEON} ${SOURCES_TESTS}) target_link_libraries(unit-test PRIVATE Catch2::Catch2WithMain log_surgeon::log_surgeon) diff --git a/tests/test-lexer.cpp b/tests/test-lexer.cpp index 947a40fe..2b028920 100644 --- a/tests/test-lexer.cpp +++ b/tests/test-lexer.cpp @@ -116,7 +116,7 @@ TEST_CASE("Test the Schema class", "[Schema]") { auto* regex_ast_capture = dynamic_cast(regex_ast_cat_ptr->get_right()); REQUIRE(nullptr != regex_ast_capture); - REQUIRE("uID" == string{regex_ast_capture->get_group_name()}); + REQUIRE("uID" == regex_ast_capture->get_group_name()); auto* regex_ast_multiplication_ast = dynamic_cast( regex_ast_capture->get_group_regex_ast().get() diff --git a/tests/test-prefix-tree.cpp b/tests/test-prefix-tree.cpp new file mode 100644 index 00000000..47262bdd --- /dev/null +++ b/tests/test-prefix-tree.cpp @@ -0,0 +1,117 @@ +#include +#include +#include + +#include + +#include + +using log_surgeon::finite_automata::PrefixTree; +using id_t = PrefixTree::id_t; +using position_t = PrefixTree::position_t; + +constexpr auto cRootId{PrefixTree::cRootId}; +constexpr id_t cInvaidNodeId{100}; +constexpr position_t cInsertPos1{4}; +constexpr position_t cInsertPos2{7}; +constexpr position_t cInsertPos3{9}; +constexpr position_t cMaxPos{std::numeric_limits::max()}; +constexpr position_t cNegativePos1{-1}; +constexpr position_t cNegativePos2{-100}; +constexpr position_t cSetPos1{10}; +constexpr position_t cSetPos2{12}; +constexpr position_t cSetPos3{15}; +constexpr position_t cSetPos4{20}; +constexpr position_t cTreeSize1{4}; +constexpr position_t cTreeSize2{8}; + +TEST_CASE("`PrefixTree` operations", "[PrefixTree]") { + SECTION("Newly constructed tree works correctly") { + PrefixTree const tree; + + // A newly constructed tree should return no positions as the root node is ignored + REQUIRE(tree.get_reversed_positions(cRootId).empty()); + } + + SECTION("Inserting nodes into the prefix tree works correctly") { + PrefixTree tree; + + // Test basic insertions + auto const node_id_1{tree.insert(cRootId, cInsertPos1)}; + auto const node_id_2{tree.insert(node_id_1, cInsertPos2)}; + auto const node_id_3{tree.insert(node_id_2, cInsertPos3)}; + REQUIRE(std::vector{cInsertPos1} == tree.get_reversed_positions(node_id_1)); + REQUIRE(std::vector{cInsertPos2, cInsertPos1} + == tree.get_reversed_positions(node_id_2)); + REQUIRE(std::vector{cInsertPos3, cInsertPos2, cInsertPos1} + == tree.get_reversed_positions(node_id_3)); + REQUIRE(cTreeSize1 == tree.size()); + + // Test insertion with large position values + auto const node_id_4{tree.insert(cRootId, cMaxPos)}; + REQUIRE(cMaxPos == tree.get_reversed_positions(node_id_4)[0]); + + // Test insertion with negative position values + auto const node_id_5{tree.insert(cRootId, cNegativePos1)}; + auto const node_id_6{tree.insert(node_id_5, cInsertPos1)}; + auto const node_id_7{tree.insert(node_id_6, cNegativePos2)}; + REQUIRE(std::vector{cNegativePos1} == tree.get_reversed_positions(node_id_5)); + REQUIRE(std::vector{cInsertPos1, cNegativePos1} + == tree.get_reversed_positions(node_id_6)); + REQUIRE(std::vector{cNegativePos2, cInsertPos1, cNegativePos1} + == tree.get_reversed_positions(node_id_7)); + REQUIRE(cTreeSize2 == tree.size()); + } + + SECTION("Invalid index access throws correctly") { + PrefixTree tree; + REQUIRE_THROWS_AS(tree.get_reversed_positions(tree.size()), std::out_of_range); + + tree.insert(cRootId, cInsertPos1); + REQUIRE_THROWS_AS(tree.get_reversed_positions(tree.size()), std::out_of_range); + + REQUIRE_THROWS_AS( + tree.get_reversed_positions(std::numeric_limits::max()), + std::out_of_range + ); + } + + SECTION("Set position for a valid index works correctly") { + PrefixTree tree; + // Test that you can set the root node for sanity, although this value is not used + tree.set(cRootId, cSetPos1); + + // Test updates to different nodes + auto const node_id_1{tree.insert(cRootId, cInsertPos1)}; + auto const node_id_2{tree.insert(node_id_1, cInsertPos1)}; + tree.set(node_id_1, cSetPos1); + tree.set(node_id_2, cSetPos2); + REQUIRE(std::vector{cSetPos1} == tree.get_reversed_positions(node_id_1)); + REQUIRE(std::vector{cSetPos2, cSetPos1} + == tree.get_reversed_positions(node_id_2)); + + // Test multiple updates to the same node + tree.set(node_id_2, cSetPos3); + tree.set(node_id_2, cSetPos4); + REQUIRE(std::vector{cSetPos4, cSetPos1} + == tree.get_reversed_positions(node_id_2)); + + // Test that updates don't affect unrelated paths + auto const node_id_3{tree.insert(cRootId, cSetPos2)}; + tree.set(node_id_3, cSetPos3); + REQUIRE(std::vector{cSetPos1} == tree.get_reversed_positions(node_id_1)); + REQUIRE(std::vector{cSetPos4, cSetPos1} + == tree.get_reversed_positions(node_id_2)); + } + + SECTION("Set position for an invalid index throws correctly") { + PrefixTree tree; + + // Test setting position before any insertions + REQUIRE_THROWS_AS(tree.set(cInvaidNodeId, cSetPos4), std::out_of_range); + + // Test setting position just beyond valid range + auto const node_id_1{tree.insert(cRootId, cInsertPos1)}; + REQUIRE_THROWS_AS(tree.set(node_id_1 + 1, cSetPos4), std::out_of_range); + } +} diff --git a/tests/test-register-handler.cpp b/tests/test-register-handler.cpp new file mode 100644 index 00000000..9cec3ff5 --- /dev/null +++ b/tests/test-register-handler.cpp @@ -0,0 +1,85 @@ +#include +#include +#include + +#include + +#include +#include + +using log_surgeon::finite_automata::RegisterHandler; +using position_t = log_surgeon::finite_automata::PrefixTree::position_t; + +constexpr position_t cInitialPos{0}; +constexpr position_t cNegativePos1{-1}; +constexpr position_t cNegativePos2{-100}; +constexpr position_t cSetPos1{5}; +constexpr position_t cSetPos2{10}; +constexpr position_t cSetPos3{15}; +constexpr size_t cNumRegisters{5}; +constexpr size_t cRegId1{0}; +constexpr size_t cRegId2{1}; +constexpr size_t cRegId3{2}; +constexpr size_t cInvalidRegId{10}; + +namespace { +auto add_register_to_handler(RegisterHandler& handler) -> void { + for (size_t i{0}; i < cNumRegisters; ++i) { + handler.add_register(i, cInitialPos); + } +} +} // namespace + +TEST_CASE("`RegisterHandler` tests", "[RegisterHandler]") { + RegisterHandler handler; + + SECTION("Initial state is empty") { + REQUIRE_THROWS_AS(handler.get_reversed_positions(cRegId1), std::out_of_range); + } + + add_register_to_handler(handler); + + SECTION("Set register position correctly") { + handler.set_register(cRegId1, cSetPos1); + REQUIRE(std::vector{cSetPos1} == handler.get_reversed_positions(cRegId1)); + } + + SECTION("Register relationships are maintained") { + handler.set_register(cRegId1, cSetPos1); + handler.set_register(cRegId2, cSetPos2); + handler.set_register(cRegId3, cSetPos3); + + auto positions{handler.get_reversed_positions(cRegId3)}; + REQUIRE(std::vector{cSetPos3, cSetPos2, cSetPos1} + == handler.get_reversed_positions(cRegId3)); + } + + SECTION("Copy register index correctly") { + handler.set_register(cRegId1, cSetPos1); + handler.copy_register(cRegId2, cRegId1); + REQUIRE(std::vector{cSetPos1} == handler.get_reversed_positions(cRegId2)); + } + + SECTION("`append_position` appends position correctly") { + handler.set_register(cRegId1, cSetPos1); + handler.append_position(cRegId1, cSetPos2); + REQUIRE(std::vector{cSetPos2, cSetPos1} + == handler.get_reversed_positions(cRegId1)); + } + + SECTION("Throws out of range correctly") { + REQUIRE_THROWS_AS(handler.set_register(cInvalidRegId, cSetPos1), std::out_of_range); + REQUIRE_THROWS_AS(handler.copy_register(cInvalidRegId, cRegId2), std::out_of_range); + REQUIRE_THROWS_AS(handler.copy_register(cRegId1, cInvalidRegId), std::out_of_range); + REQUIRE_THROWS_AS(handler.append_position(cInvalidRegId, cSetPos1), std::out_of_range); + REQUIRE_THROWS_AS(handler.get_reversed_positions(cInvalidRegId), std::out_of_range); + } + + SECTION("Handles negative position values correctly") { + handler.set_register(cRegId1, cNegativePos1); + handler.append_position(cRegId1, cSetPos1); + handler.append_position(cRegId1, cNegativePos2); + REQUIRE(std::vector{cNegativePos2, cSetPos1, cNegativePos1} + == handler.get_reversed_positions(cRegId1)); + } +} diff --git a/tests/test-tag.cpp b/tests/test-tag.cpp index a8b35e99..41f8a2ef 100644 --- a/tests/test-tag.cpp +++ b/tests/test-tag.cpp @@ -1,16 +1,13 @@ -#include - #include #include using log_surgeon::finite_automata::Tag; -using std::string; TEST_CASE("Tag operations", "[Tag]") { SECTION("Basic name retrieval works correctly") { Tag const tag{"uID"}; - REQUIRE("uID" == string{tag.get_name()}); + REQUIRE("uID" == tag.get_name()); } SECTION("Empty tag name is handled correctly") { @@ -20,16 +17,18 @@ TEST_CASE("Tag operations", "[Tag]") { SECTION("Special characters in tag names are preserved") { Tag const special_tag{"user.id-123_@"}; - REQUIRE("user.id-123_@" == string{special_tag.get_name()}); + REQUIRE("user.id-123_@" == special_tag.get_name()); } - SECTION("Move semantics work correctly") { - Tag original_tag{"source"}; - Tag moved_tag{std::move(original_tag)}; - REQUIRE("source" == string{moved_tag.get_name()}); - + SECTION("Copy constructor works correctly") { Tag assign_tag{"target"}; assign_tag = Tag{"new_source"}; - REQUIRE("new_source" == string{assign_tag.get_name()}); + REQUIRE("new_source" == assign_tag.get_name()); + } + + SECTION("Move constructor works correctly") { + Tag original_tag{"source"}; + Tag moved_tag{std::move(original_tag)}; + REQUIRE("source" == moved_tag.get_name()); } } diff --git a/tools/deps-install/ubuntu/install-catch2.sh b/tools/deps-install/ubuntu/install-catch2.sh index bb5ebfbe..aa063d72 100755 --- a/tools/deps-install/ubuntu/install-catch2.sh +++ b/tools/deps-install/ubuntu/install-catch2.sh @@ -69,7 +69,7 @@ fi # Build cd "$extracted_dir" -cmake -B build -S . -DBUILD_TESTING=OFF +cmake -B build -S . -DBUILD_TESTING=OFF -DCMAKE_CXX_STANDARD=17 cmake --build build --parallel "$num_cpus" # Check if checkinstall is installed