Skip to content

Commit

Permalink
Merge branch 'remove-redundant-typenames' into remove-regex-prefix
Browse files Browse the repository at this point in the history
  • Loading branch information
SharafMohamed committed Dec 4, 2024
2 parents 1b1460b + d5b20fe commit 69a7ad1
Show file tree
Hide file tree
Showing 18 changed files with 550 additions and 128 deletions.
10 changes: 8 additions & 2 deletions .github/PULL_REQUEST_TEMPLATE.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# References
<!-- Any issues or pull requests relevant to this pull request -->
<!--
Set the PR title to a meaningful commit message that:
- follows the Conventional Commits specification (https://www.conventionalcommits.org).
- is in imperative form.
Example:
fix: Don't add implicit wildcards ('*') at the beginning and the end of a query (fixes #390).
-->

# Description
<!-- Describe what this request will change/fix and provide any details necessary for reviewers -->
Expand Down
25 changes: 25 additions & 0 deletions .github/workflows/pr-title-checks.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
name: "pr-title-checks"

on:
pull_request_target:
types: ["edited", "opened", "reopened"]
branches: ["main"]

permissions: {}

concurrency:
group: "${{github.workflow}}-${{github.ref}}"

# Cancel in-progress jobs for efficiency
cancel-in-progress: true

jobs:
conventional-commits:
permissions:
# For amannn/action-semantic-pull-request
pull-requests: "read"
runs-on: "ubuntu-latest"
steps:
- uses: "amannn/action-semantic-pull-request@v5"
env:
GITHUB_TOKEN: "${{secrets.GITHUB_TOKEN}}"
4 changes: 3 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,8 @@ set(SOURCE_FILES
src/log_surgeon/SchemaParser.hpp
src/log_surgeon/Token.cpp
src/log_surgeon/Token.hpp
src/log_surgeon/finite_automata/PrefixTree.cpp
src/log_surgeon/finite_automata/PrefixTree.hpp
src/log_surgeon/finite_automata/RegexAST.hpp
src/log_surgeon/finite_automata/Dfa.hpp
src/log_surgeon/finite_automata/DfaState.hpp
Expand All @@ -101,7 +103,7 @@ set(SOURCE_FILES
src/log_surgeon/finite_automata/Nfa.hpp
src/log_surgeon/finite_automata/NfaState.hpp
src/log_surgeon/finite_automata/NfaStateType.hpp
src/log_surgeon/finite_automata/Register.hpp
src/log_surgeon/finite_automata/RegisterHandler.hpp
src/log_surgeon/finite_automata/Tag.hpp
src/log_surgeon/finite_automata/TaggedTransition.hpp
src/log_surgeon/finite_automata/UnicodeIntervalTree.hpp
Expand Down
9 changes: 5 additions & 4 deletions src/log_surgeon/Lexer.tpp
Original file line number Diff line number Diff line change
Expand Up @@ -410,11 +410,12 @@ auto Lexer<NfaStateType, DfaStateType>::epsilon_closure(NfaStateType const* stat
{
stack.push(positive_tagged_start_transition.get_dest_state());
}
for (auto const& positive_tagged_end_transition :
current_state->get_positive_tagged_start_transitions())
{
stack.push(positive_tagged_end_transition.get_dest_state());
auto const& optional_positive_tagged_end_transition
= current_state->get_positive_tagged_end_transition();
if (optional_positive_tagged_end_transition.has_value()) {
stack.push(optional_positive_tagged_end_transition.value().get_dest_state());
}

auto const& optional_negative_tagged_transition
= current_state->get_negative_tagged_transition();
if (optional_negative_tagged_transition.has_value()) {
Expand Down
60 changes: 44 additions & 16 deletions src/log_surgeon/finite_automata/Nfa.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,15 @@ class Nfa {
[[nodiscard]] auto new_state() -> NfaStateType*;

/**
* Creates a unique_ptr for an NFA state with a positive tagged transition and adds it to
* Creates a unique_ptr for an NFA state with a positive tagged end transition and adds it to
* `m_states`.
* @param tag
* @param dest_state
* @return NfaStateType*
* @return A new state with a positive tagged end transition to `dest_state`.
*/
[[nodiscard]] auto new_state_with_positive_tagged_transition(
Tag* tag,
NfaStateType* dest_state
[[nodiscard]] auto new_state_with_positive_tagged_end_transition(
Tag const* tag,
NfaStateType const* dest_state
) -> NfaStateType*;

/**
Expand All @@ -50,10 +50,23 @@ class Nfa {
* @return NfaStateType*
*/
[[nodiscard]] auto new_state_with_negative_tagged_transition(
std::vector<Tag*> tags,
NfaStateType* dest_state
std::vector<Tag const*> tags,
NfaStateType const* dest_state
) -> NfaStateType*;

/**
* Creates the start and end states for a capture group.
* @param tag The tag associated with the capture group.
* @param dest_state
* @return A pair of states:
* - A new state with a positive tagged start transition from `m_root`.
* - A new state with a positive tagged end transition to `dest_state`.
*/
[[nodiscard]] auto new_start_and_end_states_with_positive_tagged_transitions(
Tag const* tag,
NfaStateType const* dest_state
) -> std::pair<NfaStateType*, NfaStateType*>;

/**
* @return A vector representing the traversal order of the NFA states using breadth-first
* search (BFS).
Expand Down Expand Up @@ -97,23 +110,35 @@ auto Nfa<NfaStateType>::new_state() -> NfaStateType* {
}

template <typename NfaStateType>
auto Nfa<NfaStateType>::new_state_with_positive_tagged_transition(
Tag* tag,
NfaStateType* dest_state
auto Nfa<NfaStateType>::new_state_with_positive_tagged_end_transition(
Tag const* tag,
NfaStateType const* dest_state
) -> NfaStateType* {
m_states.emplace_back(std::make_unique<NfaStateType>(tag, dest_state));
return m_states.back().get();
}

template <typename NfaStateType>
auto Nfa<NfaStateType>::new_state_with_negative_tagged_transition(
std::vector<Tag*> tags,
NfaStateType* dest_state
std::vector<Tag const*> tags,
NfaStateType const* dest_state
) -> NfaStateType* {
m_states.emplace_back(std::make_unique<NfaStateType>(std::move(tags), dest_state));
return m_states.back().get();
}

template <typename NfaStateType>
auto Nfa<NfaStateType>::new_start_and_end_states_with_positive_tagged_transitions(
Tag const* tag,
NfaStateType const* dest_state
) -> std::pair<NfaStateType*, NfaStateType*> {
auto* start_state = new_state();
m_root->add_positive_tagged_start_transition(tag, start_state);

auto* end_state = new_state_with_positive_tagged_end_transition(tag, dest_state);
return {start_state, end_state};
}

template <typename NfaStateType>
auto Nfa<NfaStateType>::get_bfs_traversal_order() const -> std::vector<NfaStateType const*> {
std::queue<NfaStateType const*> state_queue;
Expand Down Expand Up @@ -148,11 +173,14 @@ auto Nfa<NfaStateType>::get_bfs_traversal_order() const -> std::vector<NfaStateT
{
add_to_queue_and_visited(positive_tagged_start_transition.get_dest_state());
}
for (auto const& positive_tagged_end_transition :
current_state->get_positive_tagged_end_transitions())
{
add_to_queue_and_visited(positive_tagged_end_transition.get_dest_state());

auto const& optional_positive_tagged_end_transition
= current_state->get_positive_tagged_end_transition();
if (optional_positive_tagged_end_transition.has_value()) {
add_to_queue_and_visited(optional_positive_tagged_end_transition.value().get_dest_state(
));
}

auto const& optional_negative_tagged_transition
= current_state->get_negative_tagged_transition();
if (optional_negative_tagged_transition.has_value()) {
Expand Down
46 changes: 23 additions & 23 deletions src/log_surgeon/finite_automata/NfaState.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@ class NfaState {

NfaState() = default;

NfaState(Tag* tag, NfaState* dest_state)
: m_positive_tagged_end_transitions{{tag, dest_state}} {}
NfaState(Tag const* tag, NfaState const* dest_state)
: m_positive_tagged_end_transition{PositiveTaggedTransition{tag, dest_state}} {}

NfaState(std::vector<Tag*> tags, NfaState* dest_state)
NfaState(std::vector<Tag const*> tags, NfaState const* dest_state)
: m_negative_tagged_transition{NegativeTaggedTransition{std::move(tags), dest_state}} {}

auto set_accepting(bool accepting) -> void { m_accepting = accepting; }
Expand All @@ -48,7 +48,8 @@ class NfaState {
return m_matching_variable_id;
}

auto add_positive_tagged_start_transition(Tag* tag, NfaState* dest_state) -> void {
auto
add_positive_tagged_start_transition(Tag const* tag, NfaState const* dest_state) -> void {
m_positive_tagged_start_transitions.emplace_back(tag, dest_state);
}

Expand All @@ -57,9 +58,9 @@ class NfaState {
return m_positive_tagged_start_transitions;
}

[[nodiscard]] auto get_positive_tagged_end_transitions(
) const -> std::vector<PositiveTaggedTransition<NfaState>> const& {
return m_positive_tagged_end_transitions;
[[nodiscard]] auto get_positive_tagged_end_transition(
) const -> std::optional<PositiveTaggedTransition<NfaState>> const& {
return m_positive_tagged_end_transition;
}

[[nodiscard]] auto get_negative_tagged_transition(
Expand Down Expand Up @@ -109,7 +110,7 @@ class NfaState {
bool m_accepting{false};
uint32_t m_matching_variable_id{0};
std::vector<PositiveTaggedTransition<NfaState>> m_positive_tagged_start_transitions;
std::vector<PositiveTaggedTransition<NfaState>> m_positive_tagged_end_transitions;
std::optional<PositiveTaggedTransition<NfaState>> m_positive_tagged_end_transition;
std::optional<NegativeTaggedTransition<NfaState>> m_negative_tagged_transition;
std::vector<NfaState*> m_epsilon_transitions;
std::array<std::vector<NfaState*>, cSizeOfByte> m_bytes_transitions;
Expand Down Expand Up @@ -185,28 +186,27 @@ auto NfaState<state_type>::serialize(
epsilon_transitions.emplace_back(std::to_string(state_ids.at(dest_state)));
}

std::vector<std::string> positive_tagged_start_transition_strings;
std::vector<std::string> serialized_positive_tagged_start_transitions;
for (auto const& positive_tagged_start_transition : m_positive_tagged_start_transitions) {
auto const optional_serialized_positive_transition
auto const optional_serialized_positive_start_transition
= positive_tagged_start_transition.serialize(state_ids);
if (false == optional_serialized_positive_transition.has_value()) {
if (false == optional_serialized_positive_start_transition.has_value()) {
return std::nullopt;
}
positive_tagged_start_transition_strings.emplace_back(
optional_serialized_positive_transition.value()
serialized_positive_tagged_start_transitions.emplace_back(
optional_serialized_positive_start_transition.value()
);
}

std::vector<std::string> positive_tagged_end_transition_strings;
for (auto const& positive_tagged_end_transition : m_positive_tagged_end_transitions) {
auto const optional_serialized_positive_transition
= positive_tagged_end_transition.serialize(state_ids);
if (false == optional_serialized_positive_transition.has_value()) {
std::string serialized_positive_tagged_end_transition;
if (m_positive_tagged_end_transition.has_value()) {
auto const optional_serialized_positive_end_transition
= m_positive_tagged_end_transition.value().serialize(state_ids);
if (false == optional_serialized_positive_end_transition.has_value()) {
return std::nullopt;
}
positive_tagged_end_transition_strings.emplace_back(
optional_serialized_positive_transition.value()
);
serialized_positive_tagged_end_transition
= optional_serialized_positive_end_transition.value();
}

std::string negative_tagged_transition_string;
Expand All @@ -230,8 +230,8 @@ auto NfaState<state_type>::serialize(
accepting_tag_string,
fmt::join(byte_transitions, ","),
fmt::join(epsilon_transitions, ","),
fmt::join(positive_tagged_start_transition_strings, ","),
fmt::join(positive_tagged_end_transition_strings, ","),
fmt::join(serialized_positive_tagged_start_transitions, ","),
serialized_positive_tagged_end_transition,
negative_tagged_transition_string
);
}
Expand Down
20 changes: 20 additions & 0 deletions src/log_surgeon/finite_automata/PrefixTree.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#include "PrefixTree.hpp"

#include <stdexcept>
#include <vector>

namespace log_surgeon::finite_automata {
auto PrefixTree::get_reversed_positions(id_t const node_id) const -> std::vector<position_t> {
if (m_nodes.size() <= node_id) {
throw std::out_of_range("Prefix tree index out of range.");
}

std::vector<position_t> reversed_positions;
auto current_node{m_nodes[node_id]};
while (false == current_node.is_root()) {
reversed_positions.push_back(current_node.get_position());
current_node = m_nodes[current_node.get_parent_node_id().value()];
}
return reversed_positions;
}
} // namespace log_surgeon::finite_automata
85 changes: 85 additions & 0 deletions src/log_surgeon/finite_automata/PrefixTree.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
#ifndef LOG_SURGEON_FINITE_AUTOMATA_PREFIX_TREE_HPP
#define LOG_SURGEON_FINITE_AUTOMATA_PREFIX_TREE_HPP

#include <cstdint>
#include <optional>
#include <stdexcept>
#include <vector>

namespace log_surgeon::finite_automata {
/**
* Represents a prefix tree to store register data during TDFA simulation. Each node in the tree
* stores a single position in the lexed string. Each path from the root to an index corresponds to
* a sequence of positions for an individual tag:
* - Positive position node: Indicates the tag was matched at the position.
* - Negative position node: Indicates the tag was unmatched. If a negative node is the entire path,
* it indicates the tag was never matched. If the negative tag is along a path containing positive
* nodes, it functions as a placeholder. This can be useful for nested capture groups, to maintain a
* one-to-one mapping between the contained capture group and the enclosing capture group.
*/
class PrefixTree {
public:
using id_t = uint32_t;
using position_t = int32_t;

static constexpr id_t cRootId{0};

PrefixTree() : m_nodes{{std::nullopt, -1}} {}

/**
* @param parent_node_id Index of the inserted node's parent in the prefix tree.
* @param position The position in the lexed string.
* @return The index of the newly inserted node in the tree.
* @throw std::out_of_range if the parent's index is out of range.
*/
[[maybe_unused]] auto insert(id_t const parent_node_id, position_t const position) -> id_t {
if (m_nodes.size() <= parent_node_id) {
throw std::out_of_range("Predecessor index out of range.");
}

m_nodes.emplace_back(parent_node_id, position);
return m_nodes.size() - 1;
}

auto set(id_t const node_id, position_t const position) -> void {
m_nodes.at(node_id).set_position(position);
}

[[nodiscard]] auto size() const -> size_t { return m_nodes.size(); }

/**
* @param node_id The index of the node.
* @return A vector containing positions in order from the given index up to but not including
* the root node.
* @throw std::out_of_range if the index is out of range.
*/
[[nodiscard]] auto get_reversed_positions(id_t node_id) const -> std::vector<position_t>;

private:
class Node {
public:
Node(std::optional<id_t> const parent_node_id, position_t const position)
: m_parent_node_id{parent_node_id},
m_position{position} {}

[[nodiscard]] auto is_root() const -> bool { return false == m_parent_node_id.has_value(); }

[[nodiscard]] auto get_parent_node_id() const -> std::optional<id_t> {
return m_parent_node_id;
}

auto set_position(position_t const position) -> void { m_position = position; }

[[nodiscard]] auto get_position() const -> position_t { return m_position; }

private:
std::optional<id_t> m_parent_node_id;
position_t m_position;
};

std::vector<Node> m_nodes;
};

} // namespace log_surgeon::finite_automata

#endif // LOG_SURGEON_FINITE_AUTOMATA_PREFIX_TREE_HPP
Loading

0 comments on commit 69a7ad1

Please sign in to comment.