From e20e81339e016ccfc00c10054069c5275d158de9 Mon Sep 17 00:00:00 2001 From: Matthias Loibl Date: Fri, 15 Nov 2024 11:30:34 -0700 Subject: [PATCH] pqarrow/arrowutils: Add support for sorting Timestamps --- pqarrow/arrowutils/sort.go | 4 +++- pqarrow/arrowutils/sort_test.go | 42 +++++++++++++++++++++++++++++---- 2 files changed, 40 insertions(+), 6 deletions(-) diff --git a/pqarrow/arrowutils/sort.go b/pqarrow/arrowutils/sort.go index 9410a0861..9a69c2254 100644 --- a/pqarrow/arrowutils/sort.go +++ b/pqarrow/arrowutils/sort.go @@ -220,7 +220,7 @@ func newMultiColSorter( } ms.Reserve(int(r.NumRows()), len(columns)) for i := range columns { - ms.directions[i] = int(columns[i].Direction.comparison()) + ms.directions[i] = columns[i].Direction.comparison() ms.nullsFirst[i] = columns[i].NullsFirst } for i, col := range columns { @@ -243,6 +243,8 @@ func newMultiColSorter( ms.comparisons[i] = newOrderedSorter[string](e, cmp.Compare) case *array.Binary: ms.comparisons[i] = newOrderedSorter[[]byte](e, bytes.Compare) + case *array.Timestamp: + ms.comparisons[i] = newOrderedSorter[arrow.Timestamp](e, cmp.Compare) case *array.Dictionary: switch elem := e.Dictionary().(type) { case *array.String: diff --git a/pqarrow/arrowutils/sort_test.go b/pqarrow/arrowutils/sort_test.go index 98801db7c..05a4eb14c 100644 --- a/pqarrow/arrowutils/sort_test.go +++ b/pqarrow/arrowutils/sort_test.go @@ -109,6 +109,26 @@ func TestSortRecord(t *testing.T) { Columns: []SortingColumn{{Index: 2, Direction: Descending}}, Indices: []int32{2, 1, 0}, }, + { + Name: "By Timestamp column ascending", + Samples: Samples{ + {Timestamp: 3}, + {Timestamp: 2}, + {Timestamp: 1}, + }, + Columns: []SortingColumn{{Index: 5}}, + Indices: []int32{2, 1, 0}, + }, + { + Name: "By Timestamp column descending", + Samples: Samples{ + {Timestamp: 1}, + {Timestamp: 2}, + {Timestamp: 3}, + }, + Columns: []SortingColumn{{Index: 5, Direction: Descending}}, + Indices: []int32{2, 1, 0}, + }, { Name: "By Dict column ascending", Samples: Samples{ @@ -365,11 +385,12 @@ func TestReorderRecord(t *testing.T) { // Use all supported sort field. type Sample struct { - Int int64 - Double float64 - String string - Dict string - Nullable *int64 + Int int64 + Double float64 + String string + Dict string + Nullable *int64 + Timestamp arrow.Timestamp } type Samples []Sample @@ -401,6 +422,11 @@ func (s Samples) Record() arrow.Record { Type: arrow.PrimitiveTypes.Int64, Nullable: true, }, + { + Name: "timestamp", + Type: &arrow.TimestampType{}, + Nullable: true, + }, }, nil), ) @@ -409,11 +435,17 @@ func (s Samples) Record() arrow.Record { fString := b.Field(2).(*array.StringBuilder) fDict := b.Field(3).(*array.BinaryDictionaryBuilder) fNullable := b.Field(4).(*array.Int64Builder) + fTimestamp := b.Field(5).(*array.TimestampBuilder) for _, v := range s { fInt.Append(v.Int) fDouble.Append(v.Double) fString.Append(v.String) + if v.Timestamp == 0 { + fTimestamp.AppendNull() + } else { + fTimestamp.Append(v.Timestamp) + } _ = fDict.AppendString(v.Dict) if v.Nullable != nil { fNullable.Append(*v.Nullable)