From 85d311eb13ca96bd327ecbd25224262a3d744acc Mon Sep 17 00:00:00 2001 From: Tony Holdstock-Brown Date: Wed, 6 Nov 2024 15:35:29 -0800 Subject: [PATCH 1/5] wip mem stats --- expr_test.go | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/expr_test.go b/expr_test.go index 66e0236..f4b1b0a 100644 --- a/expr_test.go +++ b/expr_test.go @@ -5,6 +5,7 @@ import ( "encoding/hex" "fmt" "math/rand" + "runtime" "strings" "sync" "testing" @@ -181,6 +182,7 @@ func TestEvaluate_Strings(t *testing.T) { } func TestEvaluate_Strings_Inequality(t *testing.T) { + ctx := context.Background() parser := NewTreeParser(NewCachingCompiler(newEnv(), nil)) @@ -199,6 +201,9 @@ func TestEvaluate_Strings_Inequality(t *testing.T) { require.EqualValues(t, n+1, e.Len()) + mem := getMem() + printMem(mem, "no matches") + t.Run("It matches items", func(t *testing.T) { pre := time.Now() evals, matched, err := e.Evaluate(ctx, map[string]any{ @@ -222,6 +227,8 @@ func TestEvaluate_Strings_Inequality(t *testing.T) { require.GreaterOrEqual(t, matched, int32(1)) }) + printMem(getMem(), "first match") + t.Run("It handles non-matching data", func(t *testing.T) { pre := time.Now() evals, matched, err := e.Evaluate(ctx, map[string]any{ @@ -241,6 +248,8 @@ func TestEvaluate_Strings_Inequality(t *testing.T) { require.EqualValues(t, 1, len(evals)) require.EqualValues(t, 1, matched) }) + + printMem(getMem(), "second match") } func TestEvaluate_Numbers(t *testing.T) { @@ -1189,3 +1198,32 @@ func addOtherExpressions(n int, e AggregateEvaluator, loader *evalLoader) { } wg.Wait() } + +func getMem() runtime.MemStats { + var m runtime.MemStats + runtime.ReadMemStats(&m) + return m +} + +func deltaMem(prev runtime.MemStats) runtime.MemStats { + next := getMem() + + return runtime.MemStats{ + HeapAlloc: next.HeapAlloc - prev.HeapAlloc, + Alloc: next.Alloc - prev.Alloc, + TotalAlloc: next.TotalAlloc - prev.TotalAlloc, + } +} + +func printMem(m runtime.MemStats, label ...string) { + if len(label) > 0 { + fmt.Printf("\t%s\n", label[0]) + } + + fmt.Printf("\tAlloc: %d MiB\n", bToMb(m.Alloc)) + fmt.Printf("\tTotalAlloc: %d MiB\n", bToMb(m.TotalAlloc)) +} + +func bToMb(b uint64) uint64 { + return b / 1024 / 1024 +} From d22fba94b0c6fbaf3c66a0be2d3398842382a82a Mon Sep 17 00:00:00 2001 From: Tony Holdstock-Brown Date: Wed, 6 Nov 2024 17:07:47 -0800 Subject: [PATCH 2/5] Add GroupID optimizations of != branches --- engine_stringmap.go | 66 ++++++++++++++-- expr_test.go | 6 +- groupid.go | 44 +++++++++-- groupid_test.go | 9 ++- parser.go | 33 +++++++- parser_test.go | 186 ++++++++++++++++++++++---------------------- 6 files changed, 228 insertions(+), 116 deletions(-) diff --git a/engine_stringmap.go b/engine_stringmap.go index 75d8477..1f193fe 100644 --- a/engine_stringmap.go +++ b/engine_stringmap.go @@ -66,6 +66,8 @@ func (n *stringLookup) Match(ctx context.Context, input map[string]any) ([]*Stor pool := newErrPool(errPoolOpts{concurrency: n.concurrency}) + neqOptimized := false + // First, handle equality matching. for item := range n.vars { path := item @@ -83,15 +85,23 @@ func (n *stringLookup) Match(ctx context.Context, input map[string]any) ([]*Stor } } - m := n.equalitySearch(ctx, path, str) + m, opt := n.equalitySearch(ctx, path, str) l.Lock() matched = append(matched, m...) + if opt { + neqOptimized = true + } l.Unlock() return nil }) } + // Wait for equality matching to optimize inequality matching + if err := eg.Wait(); err != nil { + return nil, err + } + // Then, iterate through the inequality matches. for item := range n.inequality { path := item @@ -109,7 +119,7 @@ func (n *stringLookup) Match(ctx context.Context, input map[string]any) ([]*Stor } } - m := n.inequalitySearch(ctx, path, str) + m := n.inequalitySearch(ctx, path, str, neqOptimized, matched) l.Lock() matched = append(matched, m...) @@ -131,11 +141,11 @@ func (n *stringLookup) Search(ctx context.Context, variable string, input any) ( return nil } - return n.equalitySearch(ctx, variable, str) - + matched, _ = n.equalitySearch(ctx, variable, str) + return matched } -func (n *stringLookup) equalitySearch(ctx context.Context, variable string, input string) (matched []*StoredExpressionPart) { +func (n *stringLookup) equalitySearch(ctx context.Context, variable string, input string) (matched []*StoredExpressionPart, neqOptimized bool) { n.lock.RLock() defer n.lock.RUnlock() @@ -150,27 +160,67 @@ func (n *stringLookup) equalitySearch(ctx context.Context, variable string, inpu // The variables don't match. continue } + + if part.GroupID.Flag() != OptimizeNone { + neqOptimized = true + } + filtered[i] = part i++ } filtered = filtered[0:i] - return filtered + return filtered, neqOptimized } -func (n *stringLookup) inequalitySearch(ctx context.Context, variable string, input string) (matched []*StoredExpressionPart) { +// inequalitySearch performs lookups for != matches. +func (n *stringLookup) inequalitySearch(ctx context.Context, variable string, input string, neqOptimized bool, currentMatches []*StoredExpressionPart) (matched []*StoredExpressionPart) { + if len(n.inequality[variable]) == 0 { + return nil + } + n.lock.RLock() defer n.lock.RUnlock() hashedInput := n.hash(input) + var found map[groupID]int8 + + if neqOptimized { + // If we're optimizing the "neq" value, we have a compound group which has both an == and != joined: + // `a == a && b != c`. + // + // In these cases, we'd naively return every StoredExpressionPart in the filter, as b != c - disregarding + // the `a == a` match. + // + // With optimizations, we check that there's the right number of string `==` matches in the group before + // evaluating !=, ensuring we keep allocations to a minimum. + found = map[groupID]int8{} + for _, match := range currentMatches { + found[match.GroupID]++ + } + } + results := []*StoredExpressionPart{} for value, exprs := range n.inequality[variable] { if value == hashedInput { continue } - results = append(results, exprs...) + + if !neqOptimized { + results = append(results, exprs...) + continue + } + + for _, expr := range exprs { + res, ok := found[expr.GroupID] + if !ok || res < int8(expr.GroupID.Flag()) { + continue + } + results = append(results, expr) + } } + return results } diff --git a/expr_test.go b/expr_test.go index f4b1b0a..e760791 100644 --- a/expr_test.go +++ b/expr_test.go @@ -196,13 +196,11 @@ func TestEvaluate_Strings_Inequality(t *testing.T) { require.NoError(t, err) n := 100_000 - addOtherExpressions(n, e, loader) - require.EqualValues(t, n+1, e.Len()) - mem := getMem() - printMem(mem, "no matches") + //mem := getMem() + //printMem(mem, "no matches") t.Run("It matches items", func(t *testing.T) { pre := time.Now() diff --git a/groupid.go b/groupid.go index 86a586b..1f14f1a 100644 --- a/groupid.go +++ b/groupid.go @@ -6,15 +6,34 @@ import ( "encoding/hex" ) -// groupID represents a group ID. The first 2 byets are an int16 size of the expression group, -// representing the number of predicates within the expression. The last 6 bytes are a random -// ID for the predicate group. +// groupID represents a group ID. Layout, in bytes: +// - 2: an int16 size of the expression group, +// - 1: optimization flag, for optimizing "!=" in string matching +// - 5: random ID for group type groupID [8]byte +// type internedGroupID unique.Handle[groupID] +// +// func (i internedGroupID) Value() groupID { +// return unique.Handle[groupID](i).Value() +// } +// +// func (i internedGroupID) Size() uint16 { +// // Uses unsafe pointers to access the underlying groupID +// // to return the size without a copy. +// handlePtr := unsafe.Pointer(&i) +// unsafe.Slice( +// // return (*groupID)(unsafe.Pointer(unsafe.SliceData(([8]byte)(handlePtr)))).Size() +// } + var rander = rand.Read type RandomReader func(p []byte) (n int, err error) +const ( + OptimizeNone = 0x0 +) + func (g groupID) String() string { return hex.EncodeToString(g[:]) } @@ -23,13 +42,22 @@ func (g groupID) Size() uint16 { return binary.NativeEndian.Uint16(g[0:2]) } -func newGroupID(size uint16) groupID { - return newGroupIDWithReader(size, rander) +func (g groupID) Flag() byte { + return g[2] } -func newGroupIDWithReader(size uint16, rander RandomReader) groupID { +func newGroupID(size uint16, optimizeFlag byte) groupID { + return newGroupIDWithReader(size, optimizeFlag, rander) +} + +func newGroupIDWithReader(size uint16, optimizeFlag byte, rander RandomReader) groupID { id := make([]byte, 8) binary.NativeEndian.PutUint16(id, size) - _, _ = rander(id[2:]) - return [8]byte(id[0:8]) + // Set the optimize byte. + id[2] = optimizeFlag + _, _ = rander(id[3:]) + + gid := groupID([8]byte(id[0:8])) + // interned := internedGroupID(unique.Make(gid)) + return gid } diff --git a/groupid_test.go b/groupid_test.go index 93aed5b..d09ee3e 100644 --- a/groupid_test.go +++ b/groupid_test.go @@ -8,8 +8,13 @@ import ( func TestGroupID(t *testing.T) { for i := uint16(0); i < 128; i++ { - gid := newGroupID(i) - require.Equal(t, i, gid.Size()) + gid := newGroupID(i, 0x0) + require.NotEmpty(t, gid[2:]) + require.Equal(t, i, gid.Size()) + + // check unsafe size method works + // gid := internedGID.Value() + // require.EqualValues(t, int(i), int(internedGID.Size())) } } diff --git a/parser.go b/parser.go index b982da2..1036d03 100644 --- a/parser.go +++ b/parser.go @@ -492,6 +492,37 @@ func navigateAST(nav expr, parent *Node, vars LiftedArgs, rand RandomReader) ([] total += 1 } + // For each AND, check to see if we have more than one string part, and check to see + // whether we have a "!=" and an "==" chained together. If so, this lets us optimize + // != checks so that we only return the aggregate match if the other "==" also matches. + // + // This is necessary: != returns basically every expression part, which is hugely costly + // in terms of allocation. We want to avoid that if poss. + var ( + stringEq uint8 + hasStringNeq bool + ) + for _, item := range parent.Ands { + if item.Predicate == nil { + continue + } + if _, ok := item.Predicate.Literal.(string); !ok { + continue + } + if item.Predicate.Operator == operators.Equals { + stringEq++ + } + if item.Predicate.Operator == operators.NotEquals { + hasStringNeq = true + } + } + + flag := byte(OptimizeNone) + if stringEq > 0 && hasStringNeq { + // The flag is the number of string equality checks in the == group. + flag = byte(stringEq) + } + // Create a new group ID which tracks the number of expressions that must match // within this group in order for the group to pass. // @@ -500,7 +531,7 @@ func navigateAST(nav expr, parent *Node, vars LiftedArgs, rand RandomReader) ([] // When checking an incoming event, we match the event against each node's // ident/variable. Using the group ID, we can see if we've matched N necessary // items from the same identifier. If so, the evaluation is true. - parent.GroupID = newGroupIDWithReader(uint16(total), rand) + parent.GroupID = newGroupIDWithReader(uint16(total), flag, rand) // For each sub-group, add the same group IDs to children if there's no nesting. // diff --git a/parser_test.go b/parser_test.go index 77534aa..61f9ed8 100644 --- a/parser_test.go +++ b/parser_test.go @@ -89,7 +89,7 @@ func TestParse(t *testing.T) { output: `event.data.ids[2] == "a"`, expected: ParsedExpression{ Root: Node{ - GroupID: newGroupID(1), + GroupID: newGroupID(1, OptimizeNone), Predicate: &Predicate{ Ident: "event.data.ids[2]", Literal: "a", @@ -103,7 +103,7 @@ func TestParse(t *testing.T) { output: `event.data.ids[2].id == "a"`, expected: ParsedExpression{ Root: Node{ - GroupID: newGroupID(1), + GroupID: newGroupID(1, OptimizeNone), Predicate: &Predicate{ Ident: "event.data.ids[2].id", Literal: "a", @@ -126,7 +126,7 @@ func TestParse(t *testing.T) { output: `event == vars.a`, expected: ParsedExpression{ Root: Node{ - GroupID: newGroupID(1), + GroupID: newGroupID(1, OptimizeNone), Predicate: &Predicate{ Ident: "event", LiteralIdent: &ident, @@ -147,7 +147,7 @@ func TestParse(t *testing.T) { output: `event == "foo"`, expected: ParsedExpression{ Root: Node{ - GroupID: newGroupID(1), + GroupID: newGroupID(1, OptimizeNone), Predicate: &Predicate{ Literal: "foo", Ident: "event", @@ -161,7 +161,7 @@ func TestParse(t *testing.T) { output: `event.data.run_id == "xyz"`, expected: ParsedExpression{ Root: Node{ - GroupID: newGroupID(1), + GroupID: newGroupID(1, OptimizeNone), Predicate: &Predicate{ Literal: "xyz", Ident: "event.data.run_id", @@ -176,10 +176,10 @@ func TestParse(t *testing.T) { output: `event.data.id == "foo" && event.data.value > 100`, expected: ParsedExpression{ Root: Node{ - GroupID: newGroupID(2), + GroupID: newGroupID(2, OptimizeNone), Ands: []*Node{ { - GroupID: newGroupID(2), + GroupID: newGroupID(2, OptimizeNone), Predicate: &Predicate{ Literal: "foo", Ident: "event.data.id", @@ -187,7 +187,7 @@ func TestParse(t *testing.T) { }, }, { - GroupID: newGroupID(2), + GroupID: newGroupID(2, OptimizeNone), Predicate: &Predicate{ Literal: int64(100), Ident: "event.data.value", @@ -203,10 +203,10 @@ func TestParse(t *testing.T) { output: `event.data.float <= 3.141 && event.data.id == "foo" && event.data.value > 100`, expected: ParsedExpression{ Root: Node{ - GroupID: newGroupID(3), + GroupID: newGroupID(3, OptimizeNone), Ands: []*Node{ { - GroupID: newGroupID(3), + GroupID: newGroupID(3, OptimizeNone), Predicate: &Predicate{ Literal: 3.141, Ident: "event.data.float", @@ -214,7 +214,7 @@ func TestParse(t *testing.T) { }, }, { - GroupID: newGroupID(3), + GroupID: newGroupID(3, OptimizeNone), Predicate: &Predicate{ Literal: "foo", Ident: "event.data.id", @@ -222,7 +222,7 @@ func TestParse(t *testing.T) { }, }, { - GroupID: newGroupID(3), + GroupID: newGroupID(3, OptimizeNone), Predicate: &Predicate{ Literal: int64(100), Ident: "event.data.value", @@ -245,7 +245,7 @@ func TestParse(t *testing.T) { output: `event.data.a != "a"`, expected: ParsedExpression{ Root: Node{ - GroupID: newGroupID(1), + GroupID: newGroupID(1, OptimizeNone), Predicate: &Predicate{ Literal: "a", Ident: "event.data.a", @@ -259,7 +259,7 @@ func TestParse(t *testing.T) { output: `event.data.a == "a"`, expected: ParsedExpression{ Root: Node{ - GroupID: newGroupID(1), + GroupID: newGroupID(1, OptimizeNone), Predicate: &Predicate{ Literal: "a", Ident: "event.data.a", @@ -280,7 +280,7 @@ func TestParse(t *testing.T) { output: `event.data.id >= "ulid"`, expected: ParsedExpression{ Root: Node{ - GroupID: newGroupID(1), + GroupID: newGroupID(1, OptimizeNone), Predicate: &Predicate{ Literal: "ulid", Ident: "event.data.id", @@ -294,7 +294,7 @@ func TestParse(t *testing.T) { output: `event.data.id < "ulid"`, expected: ParsedExpression{ Root: Node{ - GroupID: newGroupID(1), + GroupID: newGroupID(1, OptimizeNone), Predicate: &Predicate{ Literal: "ulid", Ident: "event.data.id", @@ -308,7 +308,7 @@ func TestParse(t *testing.T) { output: `event.data.a != "a"`, expected: ParsedExpression{ Root: Node{ - GroupID: newGroupID(1), + GroupID: newGroupID(1, OptimizeNone), Predicate: &Predicate{ Literal: "a", Ident: "event.data.a", @@ -329,10 +329,10 @@ func TestParse(t *testing.T) { output: `event == "foo" || event == "bar"`, expected: ParsedExpression{ Root: Node{ - GroupID: newGroupID(1), + GroupID: newGroupID(1, OptimizeNone), Ors: []*Node{ { - GroupID: newGroupID(1), + GroupID: newGroupID(1, OptimizeNone), Predicate: &Predicate{ Literal: "foo", Ident: "event", @@ -340,7 +340,7 @@ func TestParse(t *testing.T) { }, }, { - GroupID: newGroupID(1), + GroupID: newGroupID(1, OptimizeNone), Predicate: &Predicate{ Literal: "bar", Ident: "event", @@ -356,10 +356,10 @@ func TestParse(t *testing.T) { output: `event == "foo" || event == "bar"`, expected: ParsedExpression{ Root: Node{ - GroupID: newGroupID(1), + GroupID: newGroupID(1, OptimizeNone), Ors: []*Node{ { - GroupID: newGroupID(1), + GroupID: newGroupID(1, OptimizeNone), Predicate: &Predicate{ Literal: "foo", Ident: "event", @@ -367,7 +367,7 @@ func TestParse(t *testing.T) { }, }, { - GroupID: newGroupID(1), + GroupID: newGroupID(1, OptimizeNone), Predicate: &Predicate{ Literal: "bar", Ident: "event", @@ -383,11 +383,11 @@ func TestParse(t *testing.T) { output: `a == 1 || (b == 2 && b != 3)`, expected: ParsedExpression{ Root: Node{ - GroupID: newGroupID(1), + GroupID: newGroupID(1, OptimizeNone), Ors: []*Node{ // Either { - GroupID: newGroupID(1), + GroupID: newGroupID(1, OptimizeNone), Predicate: &Predicate{ Literal: int64(1), Ident: "a", @@ -395,10 +395,10 @@ func TestParse(t *testing.T) { }, }, { - GroupID: newGroupID(2), + GroupID: newGroupID(2, OptimizeNone), Ands: []*Node{ { - GroupID: newGroupID(2), + GroupID: newGroupID(2, OptimizeNone), Predicate: &Predicate{ Literal: int64(2), Ident: "b", @@ -406,7 +406,7 @@ func TestParse(t *testing.T) { }, }, { - GroupID: newGroupID(2), + GroupID: newGroupID(2, OptimizeNone), Predicate: &Predicate{ Literal: int64(3), Ident: "b", @@ -424,10 +424,10 @@ func TestParse(t *testing.T) { output: `event == "baz" || event == "foo" || event == "bar"`, expected: ParsedExpression{ Root: Node{ - GroupID: newGroupID(1), + GroupID: newGroupID(1, OptimizeNone), Ors: []*Node{ { - GroupID: newGroupID(1), + GroupID: newGroupID(1, OptimizeNone), Predicate: &Predicate{ Literal: "baz", Ident: "event", @@ -435,7 +435,7 @@ func TestParse(t *testing.T) { }, }, { - GroupID: newGroupID(1), + GroupID: newGroupID(1, OptimizeNone), Predicate: &Predicate{ Literal: "foo", Ident: "event", @@ -443,7 +443,7 @@ func TestParse(t *testing.T) { }, }, { - GroupID: newGroupID(1), + GroupID: newGroupID(1, OptimizeNone), Predicate: &Predicate{ Literal: "bar", Ident: "event", @@ -460,13 +460,13 @@ func TestParse(t *testing.T) { expected: ParsedExpression{ Root: Node{ - GroupID: newGroupID(1), + GroupID: newGroupID(1, OptimizeNone), Ors: []*Node{ { - GroupID: newGroupID(2), + GroupID: newGroupID(2, OptimizeNone), Ands: []*Node{ { - GroupID: newGroupID(2), + GroupID: newGroupID(2, OptimizeNone), Predicate: &Predicate{ Literal: "order", Ident: "event.data.type", @@ -474,7 +474,7 @@ func TestParse(t *testing.T) { }, }, { - GroupID: newGroupID(2), + GroupID: newGroupID(2, OptimizeNone), Predicate: &Predicate{ Literal: int64(500), Ident: "event.data.value", @@ -484,7 +484,7 @@ func TestParse(t *testing.T) { }, }, { - GroupID: newGroupID(1), + GroupID: newGroupID(1, OptimizeNone), Predicate: &Predicate{ Literal: "preorder", Ident: "event.data.type", @@ -508,7 +508,7 @@ func TestParse(t *testing.T) { output: "event.data.value > 100", expected: ParsedExpression{ Root: Node{ - GroupID: newGroupID(1), + GroupID: newGroupID(1, OptimizeNone), Predicate: &Predicate{ Literal: int64(100), Ident: "event.data.value", @@ -522,7 +522,7 @@ func TestParse(t *testing.T) { output: "event.data.value >= 100", expected: ParsedExpression{ Root: Node{ - GroupID: newGroupID(1), + GroupID: newGroupID(1, OptimizeNone), Predicate: &Predicate{ Literal: int64(100), Ident: "event.data.value", @@ -536,7 +536,7 @@ func TestParse(t *testing.T) { output: "event.data.value < 100", expected: ParsedExpression{ Root: Node{ - GroupID: newGroupID(1), + GroupID: newGroupID(1, OptimizeNone), Predicate: &Predicate{ Literal: int64(100), Ident: "event.data.value", @@ -550,7 +550,7 @@ func TestParse(t *testing.T) { output: "event.data.value <= 100", expected: ParsedExpression{ Root: Node{ - GroupID: newGroupID(1), + GroupID: newGroupID(1, OptimizeNone), Predicate: &Predicate{ Literal: int64(100), Ident: "event.data.value", @@ -565,7 +565,7 @@ func TestParse(t *testing.T) { output: "event.data.value < 100", expected: ParsedExpression{ Root: Node{ - GroupID: newGroupID(1), + GroupID: newGroupID(1, OptimizeNone), Predicate: &Predicate{ Literal: int64(100), Ident: "event.data.value", @@ -579,7 +579,7 @@ func TestParse(t *testing.T) { output: "event.data.value <= 100", expected: ParsedExpression{ Root: Node{ - GroupID: newGroupID(1), + GroupID: newGroupID(1, OptimizeNone), Predicate: &Predicate{ Literal: int64(100), Ident: "event.data.value", @@ -593,7 +593,7 @@ func TestParse(t *testing.T) { output: "event.data.value > 100", expected: ParsedExpression{ Root: Node{ - GroupID: newGroupID(1), + GroupID: newGroupID(1, OptimizeNone), Predicate: &Predicate{ Literal: int64(100), Ident: "event.data.value", @@ -607,7 +607,7 @@ func TestParse(t *testing.T) { output: "event.data.value >= 100", expected: ParsedExpression{ Root: Node{ - GroupID: newGroupID(1), + GroupID: newGroupID(1, OptimizeNone), Predicate: &Predicate{ Literal: int64(100), Ident: "event.data.value", @@ -629,7 +629,7 @@ func TestParse(t *testing.T) { output: "event.data.value <= 100", expected: ParsedExpression{ Root: Node{ - GroupID: newGroupID(1), + GroupID: newGroupID(1, OptimizeNone), Predicate: &Predicate{ Literal: int64(100), Ident: "event.data.value", @@ -643,7 +643,7 @@ func TestParse(t *testing.T) { output: "event.data.value < 100", expected: ParsedExpression{ Root: Node{ - GroupID: newGroupID(1), + GroupID: newGroupID(1, OptimizeNone), Predicate: &Predicate{ Literal: int64(100), Ident: "event.data.value", @@ -657,7 +657,7 @@ func TestParse(t *testing.T) { output: "event.data.value >= 100", expected: ParsedExpression{ Root: Node{ - GroupID: newGroupID(1), + GroupID: newGroupID(1, OptimizeNone), Predicate: &Predicate{ Literal: int64(100), Ident: "event.data.value", @@ -671,7 +671,7 @@ func TestParse(t *testing.T) { output: "event.data.value > 100", expected: ParsedExpression{ Root: Node{ - GroupID: newGroupID(1), + GroupID: newGroupID(1, OptimizeNone), Predicate: &Predicate{ Literal: int64(100), Ident: "event.data.value", @@ -686,7 +686,7 @@ func TestParse(t *testing.T) { output: "event.data.value > 100", expected: ParsedExpression{ Root: Node{ - GroupID: newGroupID(1), + GroupID: newGroupID(1, OptimizeNone), Predicate: &Predicate{ Literal: int64(100), Ident: "event.data.value", @@ -700,7 +700,7 @@ func TestParse(t *testing.T) { output: "event.data.value <= 100", expected: ParsedExpression{ Root: Node{ - GroupID: newGroupID(1), + GroupID: newGroupID(1, OptimizeNone), Predicate: &Predicate{ Literal: int64(100), Ident: "event.data.value", @@ -714,7 +714,7 @@ func TestParse(t *testing.T) { output: "event.data.value < 100", expected: ParsedExpression{ Root: Node{ - GroupID: newGroupID(1), + GroupID: newGroupID(1, OptimizeNone), Predicate: &Predicate{ Literal: int64(100), Ident: "event.data.value", @@ -729,7 +729,7 @@ func TestParse(t *testing.T) { output: "event.data.value < 100", expected: ParsedExpression{ Root: Node{ - GroupID: newGroupID(1), + GroupID: newGroupID(1, OptimizeNone), Predicate: &Predicate{ Literal: int64(100), Ident: "event.data.value", @@ -750,10 +750,10 @@ func TestParse(t *testing.T) { output: `c == 3 || a == 1 || b == 2`, expected: ParsedExpression{ Root: Node{ - GroupID: newGroupID(1), + GroupID: newGroupID(1, OptimizeNone), Ors: []*Node{ { - GroupID: newGroupID(1), + GroupID: newGroupID(1, OptimizeNone), Predicate: &Predicate{ Literal: int64(3), Ident: "c", @@ -761,7 +761,7 @@ func TestParse(t *testing.T) { }, }, { - GroupID: newGroupID(1), + GroupID: newGroupID(1, OptimizeNone), Predicate: &Predicate{ Literal: int64(1), Ident: "a", @@ -769,7 +769,7 @@ func TestParse(t *testing.T) { }, }, { - GroupID: newGroupID(1), + GroupID: newGroupID(1, OptimizeNone), Predicate: &Predicate{ Literal: int64(2), Ident: "b", @@ -786,13 +786,13 @@ func TestParse(t *testing.T) { output: `(a == 1 && b == 2) || c == 3`, expected: ParsedExpression{ Root: Node{ - GroupID: newGroupID(1), + GroupID: newGroupID(1, OptimizeNone), Ors: []*Node{ { - GroupID: newGroupID(2), + GroupID: newGroupID(2, OptimizeNone), Ands: []*Node{ { - GroupID: newGroupID(2), + GroupID: newGroupID(2, OptimizeNone), Predicate: &Predicate{ Literal: int64(1), Ident: "a", @@ -800,7 +800,7 @@ func TestParse(t *testing.T) { }, }, { - GroupID: newGroupID(2), + GroupID: newGroupID(2, OptimizeNone), Predicate: &Predicate{ Literal: int64(2), Ident: "b", @@ -810,7 +810,7 @@ func TestParse(t *testing.T) { }, }, { - GroupID: newGroupID(1), + GroupID: newGroupID(1, OptimizeNone), Predicate: &Predicate{ Literal: int64(3), Ident: "c", @@ -827,10 +827,10 @@ func TestParse(t *testing.T) { output: `a == 1 || (b == 2 && c == 3)`, expected: ParsedExpression{ Root: Node{ - GroupID: newGroupID(1), + GroupID: newGroupID(1, OptimizeNone), Ors: []*Node{ { - GroupID: newGroupID(1), + GroupID: newGroupID(1, OptimizeNone), Predicate: &Predicate{ Literal: int64(1), Ident: "a", @@ -838,10 +838,10 @@ func TestParse(t *testing.T) { }, }, { - GroupID: newGroupID(2), + GroupID: newGroupID(2, OptimizeNone), Ands: []*Node{ { - GroupID: newGroupID(2), + GroupID: newGroupID(2, OptimizeNone), Predicate: &Predicate{ Literal: int64(2), Ident: "b", @@ -849,7 +849,7 @@ func TestParse(t *testing.T) { }, }, { - GroupID: newGroupID(2), + GroupID: newGroupID(2, OptimizeNone), Predicate: &Predicate{ Literal: int64(3), Ident: "c", @@ -868,10 +868,10 @@ func TestParse(t *testing.T) { output: `c == 3 && (a == 1 || b == 2)`, expected: ParsedExpression{ Root: Node{ - GroupID: newGroupID(2), + GroupID: newGroupID(2, OptimizeNone), Ands: []*Node{ { - GroupID: newGroupID(2), + GroupID: newGroupID(2, OptimizeNone), Predicate: &Predicate{ Literal: int64(3), Ident: "c", @@ -881,7 +881,7 @@ func TestParse(t *testing.T) { }, Ors: []*Node{ { - GroupID: newGroupID(2), + GroupID: newGroupID(2, OptimizeNone), Predicate: &Predicate{ Literal: int64(1), Ident: "a", @@ -889,7 +889,7 @@ func TestParse(t *testing.T) { }, }, { - GroupID: newGroupID(2), + GroupID: newGroupID(2, OptimizeNone), Predicate: &Predicate{ Literal: int64(2), Ident: "b", @@ -906,10 +906,10 @@ func TestParse(t *testing.T) { output: `a == 1 && b == 2 && (c == 3 || d == 4)`, expected: ParsedExpression{ Root: Node{ - GroupID: newGroupID(3), + GroupID: newGroupID(3, OptimizeNone), Ands: []*Node{ { - GroupID: newGroupID(3), + GroupID: newGroupID(3, OptimizeNone), Predicate: &Predicate{ Literal: int64(1), Ident: "a", @@ -917,7 +917,7 @@ func TestParse(t *testing.T) { }, }, { - GroupID: newGroupID(3), + GroupID: newGroupID(3, OptimizeNone), Predicate: &Predicate{ Literal: int64(2), Ident: "b", @@ -927,7 +927,7 @@ func TestParse(t *testing.T) { }, Ors: []*Node{ { - GroupID: newGroupID(3), + GroupID: newGroupID(3, OptimizeNone), Predicate: &Predicate{ Literal: int64(3), Ident: "c", @@ -935,7 +935,7 @@ func TestParse(t *testing.T) { }, }, { - GroupID: newGroupID(3), + GroupID: newGroupID(3, OptimizeNone), Predicate: &Predicate{ Literal: int64(4), Ident: "d", @@ -957,10 +957,10 @@ func TestParse(t *testing.T) { output: `zz == 4 || (a == 1 && b == 2 && (c == 3 || d == 4)) || (z == 3 && e == 5 && (f == 6 || g == 7))`, expected: ParsedExpression{ Root: Node{ - GroupID: newGroupID(1), + GroupID: newGroupID(1, OptimizeNone), Ors: []*Node{ { - GroupID: newGroupID(1), + GroupID: newGroupID(1, OptimizeNone), Predicate: &Predicate{ Literal: int64(4), Ident: "zz", @@ -968,10 +968,10 @@ func TestParse(t *testing.T) { }, }, { - GroupID: newGroupID(3), + GroupID: newGroupID(3, OptimizeNone), Ands: []*Node{ { - GroupID: newGroupID(3), + GroupID: newGroupID(3, OptimizeNone), Predicate: &Predicate{ Literal: int64(1), Ident: "a", @@ -979,7 +979,7 @@ func TestParse(t *testing.T) { }, }, { - GroupID: newGroupID(3), + GroupID: newGroupID(3, OptimizeNone), Predicate: &Predicate{ Literal: int64(2), Ident: "b", @@ -989,7 +989,7 @@ func TestParse(t *testing.T) { }, Ors: []*Node{ { - GroupID: newGroupID(3), + GroupID: newGroupID(3, OptimizeNone), Predicate: &Predicate{ Literal: int64(3), Ident: "c", @@ -997,7 +997,7 @@ func TestParse(t *testing.T) { }, }, { - GroupID: newGroupID(3), + GroupID: newGroupID(3, OptimizeNone), Predicate: &Predicate{ Literal: int64(4), Ident: "d", @@ -1007,10 +1007,10 @@ func TestParse(t *testing.T) { }, }, { - GroupID: newGroupID(3), + GroupID: newGroupID(3, OptimizeNone), Ands: []*Node{ { - GroupID: newGroupID(3), + GroupID: newGroupID(3, OptimizeNone), Predicate: &Predicate{ Literal: int64(3), Ident: "z", @@ -1018,7 +1018,7 @@ func TestParse(t *testing.T) { }, }, { - GroupID: newGroupID(3), + GroupID: newGroupID(3, OptimizeNone), Predicate: &Predicate{ Literal: int64(5), Ident: "e", @@ -1028,7 +1028,7 @@ func TestParse(t *testing.T) { }, Ors: []*Node{ { - GroupID: newGroupID(3), + GroupID: newGroupID(3, OptimizeNone), Predicate: &Predicate{ Literal: int64(6), Ident: "f", @@ -1036,7 +1036,7 @@ func TestParse(t *testing.T) { }, }, { - GroupID: newGroupID(3), + GroupID: newGroupID(3, OptimizeNone), Predicate: &Predicate{ Literal: int64(7), Ident: "g", @@ -1063,7 +1063,7 @@ func TestParse(t *testing.T) { output: "name select null", expected: ParsedExpression{ Root: Node{ - GroupID: newGroupID(1), + GroupID: newGroupID(1, OptimizeNone), Predicate: &Predicate{ Ident: "name", Operator: "select", @@ -1082,7 +1082,7 @@ func TestParse(t *testing.T) { output: "x comprehension null", expected: ParsedExpression{ Root: Node{ - GroupID: newGroupID(1), + GroupID: newGroupID(1, OptimizeNone), Predicate: &Predicate{ Ident: "x", Operator: "comprehension", @@ -1178,7 +1178,7 @@ func TestParse_LiftedVars(t *testing.T) { output: `event == "foo"`, expected: ParsedExpression{ Root: Node{ - GroupID: newGroupID(1), + GroupID: newGroupID(1, OptimizeNone), Predicate: &Predicate{ Literal: "foo", Ident: "event", @@ -1195,7 +1195,7 @@ func TestParse_LiftedVars(t *testing.T) { output: `event == "bar"`, expected: ParsedExpression{ Root: Node{ - GroupID: newGroupID(1), + GroupID: newGroupID(1, OptimizeNone), Predicate: &Predicate{ Literal: "bar", Ident: "event", @@ -1212,7 +1212,7 @@ func TestParse_LiftedVars(t *testing.T) { output: `event == "bar"`, expected: ParsedExpression{ Root: Node{ - GroupID: newGroupID(1), + GroupID: newGroupID(1, OptimizeNone), Predicate: &Predicate{ Literal: "bar", Ident: "event", From 1f8887ad5223b68bc8058b1564e1e081eae8f53c Mon Sep 17 00:00:00 2001 From: Tony Holdstock-Brown Date: Wed, 6 Nov 2024 17:09:18 -0800 Subject: [PATCH 3/5] lint --- expr_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/expr_test.go b/expr_test.go index e760791..d04d4a7 100644 --- a/expr_test.go +++ b/expr_test.go @@ -1203,6 +1203,7 @@ func getMem() runtime.MemStats { return m } +//nolint:all func deltaMem(prev runtime.MemStats) runtime.MemStats { next := getMem() From 5883577ba11c0b4cae6da2539cb1a4d11fac768c Mon Sep 17 00:00:00 2001 From: Tony Holdstock-Brown Date: Wed, 6 Nov 2024 17:11:54 -0800 Subject: [PATCH 4/5] Fix rebase --- engine_stringmap.go | 5 ++--- expr_test.go | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/engine_stringmap.go b/engine_stringmap.go index 1f193fe..45d4348 100644 --- a/engine_stringmap.go +++ b/engine_stringmap.go @@ -96,12 +96,11 @@ func (n *stringLookup) Match(ctx context.Context, input map[string]any) ([]*Stor return nil }) } - - // Wait for equality matching to optimize inequality matching - if err := eg.Wait(); err != nil { + if err := pool.Wait(); err != nil { return nil, err } + pool = newErrPool(errPoolOpts{concurrency: n.concurrency}) // Then, iterate through the inequality matches. for item := range n.inequality { path := item diff --git a/expr_test.go b/expr_test.go index d04d4a7..068d0d4 100644 --- a/expr_test.go +++ b/expr_test.go @@ -199,8 +199,8 @@ func TestEvaluate_Strings_Inequality(t *testing.T) { addOtherExpressions(n, e, loader) require.EqualValues(t, n+1, e.Len()) - //mem := getMem() - //printMem(mem, "no matches") + mem := getMem() + printMem(mem, "no matches") t.Run("It matches items", func(t *testing.T) { pre := time.Now() From e7427f4d9c87a645ccd6f10127429df397f73ceb Mon Sep 17 00:00:00 2001 From: Tony Holdstock-Brown Date: Wed, 6 Nov 2024 17:27:31 -0800 Subject: [PATCH 5/5] Update tests --- engine_stringmap_test.go | 38 ++++++++++++++++++++++++++++++++++++-- expr_test.go | 8 ++++++-- 2 files changed, 42 insertions(+), 4 deletions(-) diff --git a/engine_stringmap_test.go b/engine_stringmap_test.go index 308fee5..d9a8025 100644 --- a/engine_stringmap_test.go +++ b/engine_stringmap_test.go @@ -5,6 +5,7 @@ import ( "testing" "github.com/google/cel-go/common/operators" + "github.com/google/uuid" "github.com/stretchr/testify/require" ) @@ -12,7 +13,14 @@ func TestEngineStringmap(t *testing.T) { ctx := context.Background() s := newStringEqualityMatcher(testConcurrency).(*stringLookup) + gid := newGroupID(4, 2) // optimized to 2 == matches. + exp := &ParsedExpression{ + EvaluableID: uuid.NewSHA1(uuid.NameSpaceURL, []byte("eq-neq")), + } + a := ExpressionPart{ + Parsed: exp, + GroupID: gid, Predicate: &Predicate{ Ident: "async.data.id", Literal: "123", @@ -20,6 +28,8 @@ func TestEngineStringmap(t *testing.T) { }, } b := ExpressionPart{ + Parsed: &ParsedExpression{EvaluableID: uuid.NewSHA1(uuid.NameSpaceURL, []byte("eq-single"))}, + GroupID: newGroupID(1, 0), // This belongs to a "different" expression, but is the same pred. Predicate: &Predicate{ Ident: "async.data.id", Literal: "123", @@ -27,6 +37,8 @@ func TestEngineStringmap(t *testing.T) { }, } c := ExpressionPart{ + Parsed: exp, + GroupID: gid, Predicate: &Predicate{ Ident: "async.data.another", Literal: "456", @@ -36,6 +48,8 @@ func TestEngineStringmap(t *testing.T) { // Test inequality d := ExpressionPart{ + Parsed: exp, + GroupID: gid, Predicate: &Predicate{ Ident: "async.data.neq", Literal: "neq-1", @@ -43,6 +57,8 @@ func TestEngineStringmap(t *testing.T) { }, } e := ExpressionPart{ + Parsed: &ParsedExpression{EvaluableID: uuid.NewSHA1(uuid.NameSpaceURL, []byte("neq-single"))}, + GroupID: newGroupID(1, 0), // This belongs to a "different" expression, but is the same pred. Predicate: &Predicate{ Ident: "async.data.neq", Literal: "neq-2", @@ -134,7 +150,11 @@ func TestEngineStringmap(t *testing.T) { }, }) require.NoError(t, err) - require.Equal(t, 4, len(found)) // matching plus inequality + + // This should match "neq-single" and eq-single only. It shouldn't + // match the eq-neq expression, as the "async.data.nother" part wasn't matched + // and there's expression optimization to test this. + require.Equal(t, 2, len(found)) }) t.Run("It matches data with null neq", func(t *testing.T) { @@ -147,9 +167,23 @@ func TestEngineStringmap(t *testing.T) { }, }) require.NoError(t, err) - require.Equal(t, 4, len(found)) // matching plus inequality + require.Equal(t, 2, len(found)) // matching plus inequality }) + t.Run("It matches data with expression optimizations in group ID", func(t *testing.T) { + found, err := s.Match(ctx, map[string]any{ + "async": map[string]any{ + "data": map[string]any{ + "id": "123", + "another": "456", + "neq": "lol", + }, + }, + }) + require.NoError(t, err) + + require.Equal(t, 4, len(found)) + }) } func TestEngineStringmap_DuplicateValues(t *testing.T) { diff --git a/expr_test.go b/expr_test.go index 068d0d4..44e6cdd 100644 --- a/expr_test.go +++ b/expr_test.go @@ -121,7 +121,7 @@ func TestEvaluate_Strings(t *testing.T) { ctx := context.Background() parser := NewTreeParser(NewCachingCompiler(newEnv(), nil)) - expected := tex(`event.data.account_id == "yes" && event.data.match == "true"`) + expected := tex(`event.data.account_id == "yes" && event.data.another == "ok" && event.data.match == "true"`) loader := newEvalLoader() loader.AddEval(expected) @@ -146,6 +146,7 @@ func TestEvaluate_Strings(t *testing.T) { "event": map[string]any{ "data": map[string]any{ "account_id": "yes", + "another": "ok", "match": "true", }, }, @@ -167,6 +168,7 @@ func TestEvaluate_Strings(t *testing.T) { "event": map[string]any{ "data": map[string]any{ "account_id": "yes", + "another": "ok", "match": "no", }, }, @@ -186,7 +188,7 @@ func TestEvaluate_Strings_Inequality(t *testing.T) { ctx := context.Background() parser := NewTreeParser(NewCachingCompiler(newEnv(), nil)) - expected := tex(`event.data.account_id == "yes" && event.data.neq != "neq"`) + expected := tex(`event.data.account_id == "yes" && event.data.another == "ok" && event.data.neq != "neq"`) loader := newEvalLoader() loader.AddEval(expected) @@ -208,6 +210,7 @@ func TestEvaluate_Strings_Inequality(t *testing.T) { "event": map[string]any{ "data": map[string]any{ "account_id": "yes", + "another": "ok", "match": "true", "neq": "nah", }, @@ -233,6 +236,7 @@ func TestEvaluate_Strings_Inequality(t *testing.T) { "event": map[string]any{ "data": map[string]any{ "account_id": "yes", + "another": "ok", "match": "no", "neq": "nah", },