diff --git a/pqarrow/arrow.go b/pqarrow/arrow.go index 4f3973ad8..036f1ee5b 100644 --- a/pqarrow/arrow.go +++ b/pqarrow/arrow.go @@ -1206,5 +1206,11 @@ func Project(r arrow.Record, projections []logicalplan.Expr) arrow.Record { } } + // If the projection matches the entire record, return the record as is. + if len(cols) == r.Schema().NumFields() { + r.Retain() // NOTE: we're creating another reference to this record, so retain it + return r + } + return array.NewRecord(arrow.NewSchema(fields, nil), cols, r.NumRows()) } diff --git a/query/logicalplan/expr.go b/query/logicalplan/expr.go index b7d5e4762..d8c24fdc4 100644 --- a/query/logicalplan/expr.go +++ b/query/logicalplan/expr.go @@ -772,3 +772,31 @@ func (a *AllExpr) MatchColumn(columnName string) bool { return true } func (a *AllExpr) MatchPath(path string) bool { return true } func (a *AllExpr) Computed() bool { return false } func (a *AllExpr) Clone() Expr { return &AllExpr{} } + +type NotExpr struct { + Expr Expr +} + +func Not(expr Expr) *NotExpr { + return &NotExpr{ + Expr: expr, + } +} + +func (n *NotExpr) DataType(*parquet.Schema) (arrow.DataType, error) { return nil, nil } +func (n *NotExpr) Accept(visitor Visitor) bool { + continu := visitor.PreVisit(n) + if !continu { + return false + } + + return visitor.PostVisit(n) +} +func (n *NotExpr) Name() string { return "not " + n.Expr.Name() } +func (n *NotExpr) ColumnsUsedExprs() []Expr { + return []Expr{&NotExpr{Expr: n.Expr}} +} +func (n *NotExpr) MatchColumn(columnName string) bool { return !n.Expr.MatchColumn(columnName) } +func (n *NotExpr) MatchPath(path string) bool { return !n.Expr.MatchPath(path) } +func (n *NotExpr) Computed() bool { return false } +func (n *NotExpr) Clone() Expr { return &NotExpr{Expr: n.Expr} } diff --git a/query/logicalplan/optimize.go b/query/logicalplan/optimize.go index 8d3e2ed50..d7fcfa4c0 100644 --- a/query/logicalplan/optimize.go +++ b/query/logicalplan/optimize.go @@ -1,12 +1,10 @@ package logicalplan import ( - "regexp" - "golang.org/x/exp/slices" ) -var hashedMatch = regexp.MustCompile("^hashed.") +var hashedMatch = "hashed" type Optimizer interface { Optimize(plan *LogicalPlan) *LogicalPlan @@ -17,7 +15,7 @@ func DefaultOptimizers() []Optimizer { &AverageAggregationPushDown{}, &PhysicalProjectionPushDown{ defaultProjections: []Expr{ - RegExpNotColumnMatch(hashedMatch), + Not(DynCol(hashedMatch)), }, }, &FilterPushDown{}, @@ -126,7 +124,7 @@ func (p *PhysicalProjectionPushDown) optimize(plan *LogicalPlan, columnsUsedExpr columnsUsedExprs = append(columnsUsedExprs, expr.ColumnsUsedExprs()...) } p.defaultProjections = []Expr{} - columnsUsedExprs = append(columnsUsedExprs, RegExpColumnMatch(hashedMatch)) + columnsUsedExprs = append(columnsUsedExprs, DynCol(hashedMatch)) } if plan.Input != nil { diff --git a/query/logicalplan/optimize_test.go b/query/logicalplan/optimize_test.go index 3835eb48e..b8937cfe1 100644 --- a/query/logicalplan/optimize_test.go +++ b/query/logicalplan/optimize_test.go @@ -35,7 +35,7 @@ func TestOptimizePhysicalProjectionPushDown(t *testing.T) { &Column{ColumnName: "stacktrace"}, &Column{ColumnName: "stacktrace"}, &Column{ColumnName: "value"}, - RegExpColumnMatch(hashedMatch), + DynCol(hashedMatch), &Column{ColumnName: "labels.test"}, }, }, @@ -205,7 +205,7 @@ func TestAllOptimizers(t *testing.T) { &Column{ColumnName: "stacktrace"}, &Column{ColumnName: "stacktrace"}, &Column{ColumnName: "value"}, - RegExpColumnMatch(hashedMatch), + DynCol(hashedMatch), &Column{ColumnName: "labels.test"}, }, Filter: &BinaryExpr{