diff --git a/src/log_surgeon/Lexer.hpp b/src/log_surgeon/Lexer.hpp index 895f4a7..0cfee64 100644 --- a/src/log_surgeon/Lexer.hpp +++ b/src/log_surgeon/Lexer.hpp @@ -11,9 +11,9 @@ #include #include -#include #include #include +#include #include #include #include diff --git a/src/log_surgeon/LogParser.cpp b/src/log_surgeon/LogParser.cpp index 04a2613..be680c5 100644 --- a/src/log_surgeon/LogParser.cpp +++ b/src/log_surgeon/LogParser.cpp @@ -17,6 +17,8 @@ using std::unique_ptr; using std::vector; namespace log_surgeon { +using finite_automata::DfaByteState; +using finite_automata::NfaByteState; using finite_automata::RegexAST; using finite_automata::RegexASTCat; using finite_automata::RegexASTGroup; @@ -24,8 +26,6 @@ using finite_automata::RegexASTInteger; using finite_automata::RegexASTLiteral; using finite_automata::RegexASTMultiplication; using finite_automata::RegexASTOr; -using finite_automata::DfaByteState; -using finite_automata::NfaByteState; LogParser::LogParser(string const& schema_file_path) : LogParser::LogParser(SchemaParser::try_schema_file(schema_file_path)) {} @@ -62,8 +62,7 @@ void LogParser::add_rules(std::unique_ptr schema_ast) { for (unique_ptr const& parser_ast : schema_ast->m_schema_vars) { auto* rule = dynamic_cast(parser_ast.get()); if (rule->m_name == "timestamp") { - unique_ptr> first_timestamp_regex_ast( - rule->m_regex_ptr->clone() + unique_ptr> first_timestamp_regex_ast(rule->m_regex_ptr->clone() ); unique_ptr> r1 = make_unique>(utf8::cCharStartOfFile); @@ -74,9 +73,8 @@ void LogParser::add_rules(std::unique_ptr schema_ast) { std::move(first_timestamp_regex_ast) ) ); - unique_ptr> newline_timestamp_regex_ast( - rule->m_regex_ptr->clone() - ); + unique_ptr> newline_timestamp_regex_ast(rule->m_regex_ptr->clone( + )); unique_ptr> r2 = make_unique>('\n'); add_rule( @@ -143,9 +141,7 @@ void LogParser::add_rules(std::unique_ptr schema_ast) { // For log-specific lexing: modify variable regex to contain a delimiter at the start. unique_ptr> delimiter_group - = make_unique>( - RegexASTGroup(delimiters) - ); + = make_unique>(RegexASTGroup(delimiters)); rule->m_regex_ptr = make_unique>( std::move(delimiter_group), std::move(rule->m_regex_ptr) diff --git a/src/log_surgeon/LogParser.hpp b/src/log_surgeon/LogParser.hpp index 6afe1df..14d77f8 100644 --- a/src/log_surgeon/LogParser.hpp +++ b/src/log_surgeon/LogParser.hpp @@ -15,8 +15,7 @@ namespace log_surgeon { // TODO: Compare c-array vs. vectors (its underlying array) for buffers -class LogParser - : public Parser { +class LogParser : public Parser { public: enum class ParsingAction { None, diff --git a/src/log_surgeon/SchemaParser.cpp b/src/log_surgeon/SchemaParser.cpp index d9913cd..c7c5e6a 100644 --- a/src/log_surgeon/SchemaParser.cpp +++ b/src/log_surgeon/SchemaParser.cpp @@ -19,22 +19,22 @@ using ParserValueRegex = log_surgeon::ParserValue>>; using RegexASTByte = log_surgeon::finite_automata::RegexAST; -using RegexASTGroupByte = log_surgeon::finite_automata::RegexASTGroup< - log_surgeon::finite_automata::NfaByteState>; -using RegexASTIntegerByte = log_surgeon::finite_automata::RegexASTInteger< - log_surgeon::finite_automata::NfaByteState>; -using RegexASTLiteralByte = log_surgeon::finite_automata::RegexASTLiteral< - log_surgeon::finite_automata::NfaByteState>; +using RegexASTGroupByte + = log_surgeon::finite_automata::RegexASTGroup; +using RegexASTIntegerByte + = log_surgeon::finite_automata::RegexASTInteger; +using RegexASTLiteralByte + = log_surgeon::finite_automata::RegexASTLiteral; using RegexASTMultiplicationByte = log_surgeon::finite_automata::RegexASTMultiplication< log_surgeon::finite_automata::NfaByteState>; using RegexASTOrByte = log_surgeon::finite_automata::RegexASTOr; -using RegexASTCatByte = log_surgeon::finite_automata::RegexASTCat< - log_surgeon::finite_automata::NfaByteState>; -using RegexASTCaptureByte = log_surgeon::finite_automata::RegexASTCapture< - log_surgeon::finite_automata::NfaByteState>; -using RegexASTEmptyByte = log_surgeon::finite_automata::RegexASTEmpty< - log_surgeon::finite_automata::NfaByteState>; +using RegexASTCatByte + = log_surgeon::finite_automata::RegexASTCat; +using RegexASTCaptureByte + = log_surgeon::finite_automata::RegexASTCapture; +using RegexASTEmptyByte + = log_surgeon::finite_automata::RegexASTEmpty; using std::make_unique; using std::string; diff --git a/src/log_surgeon/SchemaParser.hpp b/src/log_surgeon/SchemaParser.hpp index 272d800..748b94a 100644 --- a/src/log_surgeon/SchemaParser.hpp +++ b/src/log_surgeon/SchemaParser.hpp @@ -46,8 +46,7 @@ class SchemaVarAST : public ParserAST { // Constructor SchemaVarAST( std::string name, - std::unique_ptr> - regex_ptr, + std::unique_ptr> regex_ptr, uint32_t line_num ) : m_line_num(line_num), @@ -69,9 +68,8 @@ class DelimiterStringAST : public ParserAST { std::vector m_delimiters; }; -class SchemaParser : public LALR1Parser< - finite_automata::NfaByteState, - finite_automata::DfaByteState> { +class SchemaParser + : public LALR1Parser { public: /** * File wrapper around generate_schema_ast() diff --git a/src/log_surgeon/finite_automata/Dfa.hpp b/src/log_surgeon/finite_automata/Dfa.hpp index c20fc9f..a200057 100644 --- a/src/log_surgeon/finite_automata/Dfa.hpp +++ b/src/log_surgeon/finite_automata/Dfa.hpp @@ -41,8 +41,7 @@ class Dfa { template template -auto Dfa::new_state(std::set const& nfa_state_set -) -> DfaStateType* { +auto Dfa::new_state(std::set const& nfa_state_set) -> DfaStateType* { m_states.emplace_back(std::make_unique()); auto* dfa_state = m_states.back().get(); for (auto const* nfa_state : nfa_state_set) { diff --git a/src/log_surgeon/finite_automata/DfaStateType.hpp b/src/log_surgeon/finite_automata/DfaStateType.hpp index 3909df4..017134c 100644 --- a/src/log_surgeon/finite_automata/DfaStateType.hpp +++ b/src/log_surgeon/finite_automata/DfaStateType.hpp @@ -4,7 +4,7 @@ #include namespace log_surgeon::finite_automata { -enum class DfaStateType : uint8_t { +enum class DfaStateType : uint8_t { Byte, Utf8 }; diff --git a/src/log_surgeon/finite_automata/NfaState.hpp b/src/log_surgeon/finite_automata/NfaState.hpp index 88f2509..7c406dc 100644 --- a/src/log_surgeon/finite_automata/NfaState.hpp +++ b/src/log_surgeon/finite_automata/NfaState.hpp @@ -48,8 +48,7 @@ class NfaState { return m_matching_variable_id; } - auto - add_positive_tagged_start_transition(Tag const* tag, NfaState const* dest_state) -> void { + auto add_positive_tagged_start_transition(Tag const* tag, NfaState const* dest_state) -> void { m_positive_tagged_start_transitions.emplace_back(tag, dest_state); } @@ -80,8 +79,7 @@ class NfaState { m_bytes_transitions[byte].push_back(dest_state); } - [[nodiscard]] auto get_byte_transitions(uint8_t byte - ) const -> std::vector const& { + [[nodiscard]] auto get_byte_transitions(uint8_t byte) const -> std::vector const& { return m_bytes_transitions[byte]; } @@ -117,8 +115,7 @@ class NfaState { // NOTE: We don't need m_tree_transitions for the `stateType == // DfaStateType::Byte` case, so we use an empty class (`std::tuple<>`) // in that case. - std::conditional_t> - m_tree_transitions; + std::conditional_t> m_tree_transitions; }; template @@ -169,8 +166,7 @@ auto NfaState::add_interval(Interval interval, NfaState* dest_state) } template -auto NfaState::serialize( - std::unordered_map const& state_ids +auto NfaState::serialize(std::unordered_map const& state_ids ) const -> std::optional { std::vector byte_transitions; for (uint32_t idx{0}; idx < cSizeOfByte; ++idx) { diff --git a/src/log_surgeon/finite_automata/RegexAST.hpp b/src/log_surgeon/finite_automata/RegexAST.hpp index dc74131..9573c23 100644 --- a/src/log_surgeon/finite_automata/RegexAST.hpp +++ b/src/log_surgeon/finite_automata/RegexAST.hpp @@ -890,10 +890,8 @@ template } template -auto RegexASTCapture::add_to_nfa( - Nfa* nfa, - NfaStateType* dest_state -) const -> void { +auto RegexASTCapture::add_to_nfa(Nfa* nfa, NfaStateType* dest_state) + const -> void { // TODO: move this into a documentation file in the future, and reference it here. // The NFA constructed for a capture group follows the structure below, with tagged transitions // explicitly labeled for clarity: diff --git a/src/log_surgeon/finite_automata/RegexDFAStateType.hpp b/src/log_surgeon/finite_automata/RegexDFAStateType.hpp new file mode 100644 index 0000000..ae4e52d --- /dev/null +++ b/src/log_surgeon/finite_automata/RegexDFAStateType.hpp @@ -0,0 +1,13 @@ +#ifndef LOG_SURGEON_FINITE_AUTOMATA_REGEX_DFA_STATE_TYPE +#define LOG_SURGEON_FINITE_AUTOMATA_REGEX_DFA_STATE_TYPE + +#include + +namespace log_surgeon::finite_automata { +enum class RegexDFAStateType : uint8_t { + Byte, + UTF8 +}; +} // namespace log_surgeon::finite_automata + +#endif // LOG_SURGEON_FINITE_AUTOMATA_REGEX_DFA_STATE_TYPE diff --git a/tests/test-NFA.cpp b/tests/test-NFA.cpp index 493e4f0..834e7fe 100644 --- a/tests/test-NFA.cpp +++ b/tests/test-NFA.cpp @@ -7,8 +7,8 @@ #include #include -#include #include +#include #include #include diff --git a/tests/test-lexer.cpp b/tests/test-lexer.cpp index 2b02892..a3ab69f 100644 --- a/tests/test-lexer.cpp +++ b/tests/test-lexer.cpp @@ -6,8 +6,8 @@ #include -#include #include +#include #include #include @@ -18,14 +18,14 @@ using std::u32string; using std::vector; using std::wstring_convert; -using RegexASTCatByte = log_surgeon::finite_automata::RegexASTCat< - log_surgeon::finite_automata::NfaByteState>; -using RegexASTCaptureByte = log_surgeon::finite_automata::RegexASTCapture< - log_surgeon::finite_automata::NfaByteState>; -using RegexASTGroupByte = log_surgeon::finite_automata::RegexASTGroup< - log_surgeon::finite_automata::NfaByteState>; -using RegexASTLiteralByte = log_surgeon::finite_automata::RegexASTLiteral< - log_surgeon::finite_automata::NfaByteState>; +using RegexASTCatByte + = log_surgeon::finite_automata::RegexASTCat; +using RegexASTCaptureByte + = log_surgeon::finite_automata::RegexASTCapture; +using RegexASTGroupByte + = log_surgeon::finite_automata::RegexASTGroup; +using RegexASTLiteralByte + = log_surgeon::finite_automata::RegexASTLiteral; using RegexASTMultiplicationByte = log_surgeon::finite_automata::RegexASTMultiplication< log_surgeon::finite_automata::NfaByteState>; using RegexASTOrByte