Skip to content

Commit

Permalink
optimize arrow project (#558)
Browse files Browse the repository at this point in the history
* optimize arrow project

If the projection matches the entire arrow records simply return the
whole record

* optimize: prehashed column matching

Instead of using regexp just use the dyncol and potentially a new
NotExpr wrapper
  • Loading branch information
thorfour authored Oct 10, 2023
1 parent 5eb3a50 commit c6a06c5
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 7 deletions.
6 changes: 6 additions & 0 deletions pqarrow/arrow.go
Original file line number Diff line number Diff line change
Expand Up @@ -1206,5 +1206,11 @@ func Project(r arrow.Record, projections []logicalplan.Expr) arrow.Record {
}
}

// If the projection matches the entire record, return the record as is.
if len(cols) == r.Schema().NumFields() {
r.Retain() // NOTE: we're creating another reference to this record, so retain it
return r
}

return array.NewRecord(arrow.NewSchema(fields, nil), cols, r.NumRows())
}
28 changes: 28 additions & 0 deletions query/logicalplan/expr.go
Original file line number Diff line number Diff line change
Expand Up @@ -772,3 +772,31 @@ func (a *AllExpr) MatchColumn(columnName string) bool { return true }
func (a *AllExpr) MatchPath(path string) bool { return true }
func (a *AllExpr) Computed() bool { return false }
func (a *AllExpr) Clone() Expr { return &AllExpr{} }

type NotExpr struct {
Expr Expr
}

func Not(expr Expr) *NotExpr {
return &NotExpr{
Expr: expr,
}
}

func (n *NotExpr) DataType(*parquet.Schema) (arrow.DataType, error) { return nil, nil }
func (n *NotExpr) Accept(visitor Visitor) bool {
continu := visitor.PreVisit(n)
if !continu {
return false
}

return visitor.PostVisit(n)
}
func (n *NotExpr) Name() string { return "not " + n.Expr.Name() }
func (n *NotExpr) ColumnsUsedExprs() []Expr {
return []Expr{&NotExpr{Expr: n.Expr}}
}
func (n *NotExpr) MatchColumn(columnName string) bool { return !n.Expr.MatchColumn(columnName) }
func (n *NotExpr) MatchPath(path string) bool { return !n.Expr.MatchPath(path) }
func (n *NotExpr) Computed() bool { return false }
func (n *NotExpr) Clone() Expr { return &NotExpr{Expr: n.Expr} }
8 changes: 3 additions & 5 deletions query/logicalplan/optimize.go
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
package logicalplan

import (
"regexp"

"golang.org/x/exp/slices"
)

var hashedMatch = regexp.MustCompile("^hashed.")
var hashedMatch = "hashed"

type Optimizer interface {
Optimize(plan *LogicalPlan) *LogicalPlan
Expand All @@ -17,7 +15,7 @@ func DefaultOptimizers() []Optimizer {
&AverageAggregationPushDown{},
&PhysicalProjectionPushDown{
defaultProjections: []Expr{
RegExpNotColumnMatch(hashedMatch),
Not(DynCol(hashedMatch)),
},
},
&FilterPushDown{},
Expand Down Expand Up @@ -126,7 +124,7 @@ func (p *PhysicalProjectionPushDown) optimize(plan *LogicalPlan, columnsUsedExpr
columnsUsedExprs = append(columnsUsedExprs, expr.ColumnsUsedExprs()...)
}
p.defaultProjections = []Expr{}
columnsUsedExprs = append(columnsUsedExprs, RegExpColumnMatch(hashedMatch))
columnsUsedExprs = append(columnsUsedExprs, DynCol(hashedMatch))
}

if plan.Input != nil {
Expand Down
4 changes: 2 additions & 2 deletions query/logicalplan/optimize_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ func TestOptimizePhysicalProjectionPushDown(t *testing.T) {
&Column{ColumnName: "stacktrace"},
&Column{ColumnName: "stacktrace"},
&Column{ColumnName: "value"},
RegExpColumnMatch(hashedMatch),
DynCol(hashedMatch),
&Column{ColumnName: "labels.test"},
},
},
Expand Down Expand Up @@ -205,7 +205,7 @@ func TestAllOptimizers(t *testing.T) {
&Column{ColumnName: "stacktrace"},
&Column{ColumnName: "stacktrace"},
&Column{ColumnName: "value"},
RegExpColumnMatch(hashedMatch),
DynCol(hashedMatch),
&Column{ColumnName: "labels.test"},
},
Filter: &BinaryExpr{
Expand Down

0 comments on commit c6a06c5

Please sign in to comment.