Skip to content

Commit

Permalink
Merge branch 'remove-redundant-typenames' into remove-regex-prefix
Browse files Browse the repository at this point in the history
  • Loading branch information
SharafMohamed committed Nov 14, 2024
2 parents 1a1655a + 7195f57 commit 1b1460b
Show file tree
Hide file tree
Showing 7 changed files with 102 additions and 85 deletions.
8 changes: 4 additions & 4 deletions src/log_surgeon/finite_automata/Nfa.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ class Nfa {
*/
[[nodiscard]] auto new_state_with_positive_tagged_transition(
Tag* tag,
NfaStateType const* dest_state
NfaStateType* dest_state
) -> NfaStateType*;

/**
Expand All @@ -51,7 +51,7 @@ class Nfa {
*/
[[nodiscard]] auto new_state_with_negative_tagged_transition(
std::vector<Tag*> tags,
NfaStateType const* dest_state
NfaStateType* dest_state
) -> NfaStateType*;

/**
Expand Down Expand Up @@ -99,7 +99,7 @@ auto Nfa<NfaStateType>::new_state() -> NfaStateType* {
template <typename NfaStateType>
auto Nfa<NfaStateType>::new_state_with_positive_tagged_transition(
Tag* tag,
NfaStateType const* dest_state
NfaStateType* dest_state
) -> NfaStateType* {
m_states.emplace_back(std::make_unique<NfaStateType>(tag, dest_state));
return m_states.back().get();
Expand All @@ -108,7 +108,7 @@ auto Nfa<NfaStateType>::new_state_with_positive_tagged_transition(
template <typename NfaStateType>
auto Nfa<NfaStateType>::new_state_with_negative_tagged_transition(
std::vector<Tag*> tags,
NfaStateType const* dest_state
NfaStateType* dest_state
) -> NfaStateType* {
m_states.emplace_back(std::make_unique<NfaStateType>(std::move(tags), dest_state));
return m_states.back().get();
Expand Down
4 changes: 2 additions & 2 deletions src/log_surgeon/finite_automata/NfaState.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@ class NfaState {

NfaState() = default;

NfaState(Tag* tag, NfaState const* dest_state)
NfaState(Tag* tag, NfaState* dest_state)
: m_positive_tagged_end_transitions{{tag, dest_state}} {}

NfaState(std::vector<Tag*> tags, NfaState const* dest_state)
NfaState(std::vector<Tag*> tags, NfaState* dest_state)
: m_negative_tagged_transition{NegativeTaggedTransition{std::move(tags), dest_state}} {}

auto set_accepting(bool accepting) -> void { m_accepting = accepting; }
Expand Down
67 changes: 33 additions & 34 deletions src/log_surgeon/finite_automata/RegexAST.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <ranges>
#include <stdexcept>
#include <string>
#include <string_view>
#include <utility>
#include <vector>

Expand Down Expand Up @@ -90,11 +91,11 @@ class RegexAST {
m_subtree_positive_tags = std::move(subtree_positive_tags);
}

auto add_subtree_positive_tags(std::vector<Tag*> subtree_positive_tags) -> void {
auto add_subtree_positive_tags(std::vector<Tag*> const& subtree_positive_tags) -> void {
m_subtree_positive_tags.insert(
m_subtree_positive_tags.end(),
std::make_move_iterator(subtree_positive_tags.begin()),
std::make_move_iterator(subtree_positive_tags.end())
subtree_positive_tags.cbegin(),
subtree_positive_tags.cend()
);
}

Expand All @@ -107,8 +108,8 @@ class RegexAST {
* @param nfa
* @param end_state
*/
auto add_to_nfa_with_negative_tags(Nfa<NfaStateType>* nfa, NfaStateType* end_state) const
-> void {
auto
add_to_nfa_with_negative_tags(Nfa<NfaStateType>* nfa, NfaStateType* end_state) const -> void {
// Handle negative tags as:
// root --(regex)--> state_with_negative_tagged_transition --(negative tags)--> end_state
if (false == m_negative_tags.empty()) {
Expand All @@ -123,8 +124,8 @@ class RegexAST {
protected:
RegexAST(RegexAST const& rhs) = default;
auto operator=(RegexAST const& rhs) -> RegexAST& = default;
RegexAST(RegexAST&& rhs) noexcept = default;
auto operator=(RegexAST&& rhs) noexcept -> RegexAST& = default;
RegexAST(RegexAST&& rhs) noexcept = delete;
auto operator=(RegexAST&& rhs) noexcept -> RegexAST& = delete;

[[nodiscard]] auto serialize_negative_tags() const -> std::u32string {
if (m_negative_tags.empty()) {
Expand Down Expand Up @@ -438,10 +439,6 @@ class RegexASTOr : public RegexAST<NfaStateType> {
m_left(std::unique_ptr<RegexAST<NfaStateType>>(rhs.m_left->clone())),
m_right(std::unique_ptr<RegexAST<NfaStateType>>(rhs.m_right->clone())) {}

auto operator=(RegexASTOr const& rhs) -> RegexASTOr& = default;
RegexASTOr(RegexASTOr&& rhs) noexcept = default;
auto operator=(RegexASTOr&& rhs) noexcept -> RegexASTOr& = default;

/**
* Used for cloning a unique_pointer of type RegexASTOr
* @return RegexASTOr*
Expand Down Expand Up @@ -505,10 +502,6 @@ class RegexASTCat : public RegexAST<NfaStateType> {
m_left(std::unique_ptr<RegexAST<NfaStateType>>(rhs.m_left->clone())),
m_right(std::unique_ptr<RegexAST<NfaStateType>>(rhs.m_right->clone())) {}

auto operator=(RegexASTCat const& rhs) -> RegexASTCat& = default;
RegexASTCat(RegexASTCat&& rhs) noexcept = default;
auto operator=(RegexASTCat&& rhs) noexcept -> RegexASTCat& = default;

/**
* Used for cloning a unique_pointer of type RegexASTCat
* @return RegexASTCat*
Expand Down Expand Up @@ -574,10 +567,6 @@ class RegexASTMultiplication : public RegexAST<NfaStateType> {
m_min(rhs.m_min),
m_max(rhs.m_max) {}

auto operator=(RegexASTMultiplication const& rhs) -> RegexASTMultiplication& = default;
RegexASTMultiplication(RegexASTMultiplication&& rhs) noexcept = default;
auto operator=(RegexASTMultiplication&& rhs) noexcept -> RegexASTMultiplication& = default;

/**
* Used for cloning a unique_pointer of type RegexASTMultiplication
* @return RegexASTMultiplication*
Expand Down Expand Up @@ -632,17 +621,33 @@ class RegexASTMultiplication : public RegexAST<NfaStateType> {
uint32_t m_max;
};

/**
* Represents a capture group AST node.
* `m_tag` is always expected to be non-null.
* `m_group_regex_ast` is always expected to be non-null.
* @tparam NfaStateType Specifies the type of transition (bytes or UTF-8 characters).
*/
template <typename NfaStateType>
class RegexASTCapture : public RegexAST<NfaStateType> {
public:
~RegexASTCapture() override = default;

/**
* @param group_regex_ast
* @param tag
* @throw std::invalid_argument if `group_regex_ast` or `tag` are `nullptr`.
*/
RegexASTCapture(
std::unique_ptr<RegexAST<NfaStateType>> group_regex_ast,
std::unique_ptr<Tag> tag
)
: m_group_regex_ast{std::move(group_regex_ast)},
m_tag{std::move(tag)} {
: m_group_regex_ast{(
nullptr == group_regex_ast
? throw std::invalid_argument("Group regex AST cannot be null")
: std::move(group_regex_ast)
)},
m_tag{nullptr == tag ? throw std::invalid_argument("Tag cannot be null")
: std::move(tag)} {
RegexAST<NfaStateType>::set_subtree_positive_tags(
m_group_regex_ast->get_subtree_positive_tags()
);
Expand All @@ -654,14 +659,10 @@ class RegexASTCapture : public RegexAST<NfaStateType> {
m_group_regex_ast{
std::unique_ptr<RegexAST<NfaStateType>>(rhs.m_group_regex_ast->clone())
},
m_tag{rhs.m_tag ? std::make_unique<Tag>(*rhs.m_tag) : nullptr} {
m_tag{std::make_unique<Tag>(*rhs.m_tag)} {
RegexAST<NfaStateType>::set_subtree_positive_tags(rhs.get_subtree_positive_tags());
}

auto operator=(RegexASTCapture const& rhs) -> RegexASTCapture& = default;
RegexASTCapture(RegexASTCapture&& rhs) noexcept = default;
auto operator=(RegexASTCapture&& rhs) noexcept -> RegexASTCapture& = default;

/**
* Used for cloning a `unique_pointer` of type `RegexASTCapture`.
* @return RegexASTCapture*
Expand Down Expand Up @@ -699,7 +700,7 @@ class RegexASTCapture : public RegexAST<NfaStateType> {

[[nodiscard]] auto serialize() const -> std::u32string override;

[[nodiscard]] auto get_group_name() const -> std::string const& { return m_tag->get_name(); }
[[nodiscard]] auto get_group_name() const -> std::string_view { return m_tag->get_name(); }

[[nodiscard]] auto get_group_regex_ast(
) const -> std::unique_ptr<RegexAST<NfaStateType>> const& {
Expand Down Expand Up @@ -779,8 +780,7 @@ RegexASTOr<NfaStateType>::RegexASTOr(
}

template <typename NfaStateType>
void RegexASTOr<NfaStateType>::add_to_nfa(Nfa<NfaStateType>* nfa, NfaStateType* end_state)
const {
void RegexASTOr<NfaStateType>::add_to_nfa(Nfa<NfaStateType>* nfa, NfaStateType* end_state) const {
m_left->add_to_nfa_with_negative_tags(nfa, end_state);
m_right->add_to_nfa_with_negative_tags(nfa, end_state);
}
Expand All @@ -807,8 +807,7 @@ RegexASTCat<NfaStateType>::RegexASTCat(
}

template <typename NfaStateType>
void RegexASTCat<NfaStateType>::add_to_nfa(Nfa<NfaStateType>* nfa, NfaStateType* end_state)
const {
void RegexASTCat<NfaStateType>::add_to_nfa(Nfa<NfaStateType>* nfa, NfaStateType* end_state) const {
NfaStateType* saved_root = nfa->get_root();
NfaStateType* intermediate_state = nfa->new_state();
m_left->add_to_nfa_with_negative_tags(nfa, intermediate_state);
Expand Down Expand Up @@ -904,12 +903,12 @@ void RegexASTCapture<NfaStateType>::add_to_nfa(Nfa<NfaStateType>* nfa, NfaStateT
nfa->set_root(root);
}

template <typename NfaStateType>
template <typename NFAStateType>
[[nodiscard]] auto RegexASTCapture<NfaStateType>::serialize() const -> std::u32string {
auto const tag_name_u32 = std::u32string(m_tag->get_name().begin(), m_tag->get_name().end());
auto const tag_name_u32 = std::u32string(m_tag->get_name().cbegin(), m_tag->get_name().cend());
return fmt::format(
U"({})<{}>{}",
nullptr != m_group_regex_ast ? m_group_regex_ast->serialize() : U"null",
m_group_regex_ast->serialize(),
tag_name_u32,
RegexAST<NfaStateType>::serialize_negative_tags()
);
Expand Down
25 changes: 3 additions & 22 deletions src/log_surgeon/finite_automata/Tag.hpp
Original file line number Diff line number Diff line change
@@ -1,38 +1,19 @@
#ifndef LOG_SURGEON_FINITE_AUTOMATA_TAG
#define LOG_SURGEON_FINITE_AUTOMATA_TAG

#include <cstdint>
#include <string>
#include <string_view>
#include <utility>
#include <vector>

namespace log_surgeon::finite_automata {
/**
* This class represents a tag that is associated with matches of a capture group. If
* `m_start_positions` is empty, it indicates that the capture group was unmatched.
*
* Since capture group regex can be contained within repetition regex,
* (e.g., "((user_id=(?<uid>\d+),)+"), `m_start_positions` and `m_end_positions` are vectors that
* track the locations of each occurrence of the capture group.
*/
class Tag {
public:
explicit Tag(std::string name) : m_name{std::move(name)} {}

auto set_start_positions(std::vector<uint32_t> start_positions) -> void {
m_start_positions = std::move(start_positions);
}

auto set_end_positions(std::vector<uint32_t> end_positions) -> void {
m_end_positions = std::move(end_positions);
}

[[nodiscard]] auto get_name() const -> std::string const& { return m_name; }
[[nodiscard]] auto get_name() const -> std::string_view { return m_name; }

private:
std::string const m_name;
std::vector<uint32_t> m_start_positions;
std::vector<uint32_t> m_end_positions;
std::string m_name;
};
} // namespace log_surgeon::finite_automata

Expand Down
48 changes: 31 additions & 17 deletions src/log_surgeon/finite_automata/TaggedTransition.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,25 @@
#include <log_surgeon/finite_automata/Tag.hpp>

namespace log_surgeon::finite_automata {
/**
* Represents an NFA transition indicating that a capture group has been matched.
* `m_tag` is always expected to be non-null.
* @tparam NfaStateType Specifies the type of transition (bytes or UTF-8 characters).
*/
template <typename NfaStateType>
class PositiveTaggedTransition {
public:
/**
* @param tag
* @param dest_state
* @throw std::invalid_argument if `tag` is `nullptr`.
*/
PositiveTaggedTransition(Tag* tag, NfaStateType const* dest_state)
: m_tag{tag},
: m_tag{nullptr == tag ? throw std::invalid_argument("Tag cannot be null") : tag},
m_dest_state{dest_state} {}

[[nodiscard]] auto get_dest_state() const -> NfaStateType const* { return m_dest_state; }

auto set_tag_start_positions(std::vector<uint32_t> start_positions) const -> void {
m_tag->set_start_positions(std::move(start_positions));
}

auto set_tag_end_positions(std::vector<uint32_t> end_positions) const -> void {
m_tag->set_end_positions(std::move(end_positions));
}

/**
* @param state_ids A map of states to their unique identifiers.
* @return A string representation of the positive tagged transition on success.
Expand All @@ -36,7 +38,7 @@ class PositiveTaggedTransition {
[[nodiscard]] auto serialize(std::unordered_map<NfaStateType const*, uint32_t> const& state_ids
) const -> std::optional<std::string> {
auto const state_id_it = state_ids.find(m_dest_state);
if (state_id_it == state_ids.end() || nullptr == m_tag) {
if (state_id_it == state_ids.end()) {
return std::nullopt;
}
return fmt::format("{}[{}]", state_id_it->second, m_tag->get_name());
Expand All @@ -47,11 +49,26 @@ class PositiveTaggedTransition {
NfaStateType const* m_dest_state;
};

/**
* Represents an NFA transition indicating that a capture group has been unmatched.
* All tags in `m_tags` are always expected to be non-null.
* @tparam NFAStateType Specifies the type of transition (bytes or UTF-8 characters).
*/
template <typename NfaStateType>
class NegativeTaggedTransition {
public:
NegativeTaggedTransition(std::vector<Tag*> tags, NfaStateType const* dest_state)
: m_tags{std::move(tags)},
/**
* @param tags
* @param dest_state
* @throw std::invalid_argument if any elements in `tags` is `nullptr`.
*/
NegativeTaggedTransition(std::vector<Tag*> tags, NfaStateType* dest_state)
: m_tags{[&tags] {
if (std::ranges::any_of(tags, [](Tag const* tag) { return nullptr == tag; })) {
throw std::invalid_argument("Tags cannot contain null elements");
}
return std::move(tags);
}()},
m_dest_state{dest_state} {}

[[nodiscard]] auto get_dest_state() const -> NfaStateType const* { return m_dest_state; }
Expand All @@ -68,17 +85,14 @@ class NegativeTaggedTransition {
return std::nullopt;
}

if (std::ranges::any_of(m_tags, [](Tag const* tag) { return tag == nullptr; })) {
return std::nullopt;
}
auto const tag_names = m_tags | std::ranges::views::transform(&Tag::get_name);

return fmt::format("{}[{}]", state_id_it->second, fmt::join(tag_names, ","));
}

private:
std::vector<Tag*> const m_tags;
NfaStateType const* m_dest_state;
std::vector<Tag*> m_tags;
NfaStateType* m_dest_state;
};
} // namespace log_surgeon::finite_automata

Expand Down
4 changes: 1 addition & 3 deletions tests/test-lexer.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
#include <codecvt>
#include <cstdint>
#include <locale>
#include <ranges>
#include <string>
#include <string_view>
#include <vector>
Expand Down Expand Up @@ -118,7 +116,7 @@ TEST_CASE("Test the Schema class", "[Schema]") {
auto* regex_ast_capture
= dynamic_cast<RegexASTCaptureByte const*>(regex_ast_cat_ptr->get_right());
REQUIRE(nullptr != regex_ast_capture);
REQUIRE("uID" == regex_ast_capture->get_group_name());
REQUIRE("uID" == string{regex_ast_capture->get_group_name()});

auto* regex_ast_multiplication_ast = dynamic_cast<RegexASTMultiplicationByte*>(
regex_ast_capture->get_group_regex_ast().get()
Expand Down
Loading

0 comments on commit 1b1460b

Please sign in to comment.