Skip to content

Commit

Permalink
Update code comment for the cases of regularized RANGE frame and add …
Browse files Browse the repository at this point in the history
…tests for ORDER BY cases with RANGE frame (apache#8410)

* fix: RANGE frame can be regularized to ROWS frame only if empty ORDER BY clause

* Fix flaky test

* Update test comment

* Add code comment

* Update
  • Loading branch information
viirya authored Dec 4, 2023
1 parent 49dc1f2 commit 0bcf462
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 1 deletion.
12 changes: 11 additions & 1 deletion datafusion/expr/src/window_frame.rs
Original file line number Diff line number Diff line change
Expand Up @@ -148,12 +148,22 @@ impl WindowFrame {
pub fn regularize(mut frame: WindowFrame, order_bys: usize) -> Result<WindowFrame> {
if frame.units == WindowFrameUnits::Range && order_bys != 1 {
// Normally, RANGE frames require an ORDER BY clause with exactly one
// column. However, an ORDER BY clause may be absent in two edge cases.
// column. However, an ORDER BY clause may be absent or present but with
// more than one column in two edge cases:
// 1. start bound is UNBOUNDED or CURRENT ROW
// 2. end bound is CURRENT ROW or UNBOUNDED.
// In these cases, we regularize the RANGE frame to be equivalent to a ROWS
// frame with the UNBOUNDED bounds.
// Note that this follows Postgres behavior.
if (frame.start_bound.is_unbounded()
|| frame.start_bound == WindowFrameBound::CurrentRow)
&& (frame.end_bound == WindowFrameBound::CurrentRow
|| frame.end_bound.is_unbounded())
{
// If an ORDER BY clause is absent, the frame is equivalent to a ROWS
// frame with the UNBOUNDED bounds.
// If an ORDER BY clause is present but has more than one column, the
// frame is unchanged.
if order_bys == 0 {
frame.units = WindowFrameUnits::Rows;
frame.start_bound =
Expand Down
58 changes: 58 additions & 0 deletions datafusion/sqllogictest/test_files/window.slt
Original file line number Diff line number Diff line change
Expand Up @@ -3727,3 +3727,61 @@ FROM score_board s

statement ok
DROP TABLE score_board;

# Regularize RANGE frame
query error DataFusion error: Error during planning: RANGE requires exactly one ORDER BY column
select a,
rank() over (order by a, a + 1 RANGE BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) rnk
from (select 1 a union select 2 a) q ORDER BY a

query II
select a,
rank() over (order by a RANGE BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) rnk
from (select 1 a union select 2 a) q ORDER BY a
----
1 1
2 2

query error DataFusion error: Error during planning: RANGE requires exactly one ORDER BY column
select a,
rank() over (RANGE BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) rnk
from (select 1 a union select 2 a) q ORDER BY a

query II
select a,
rank() over (order by a, a + 1 RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) rnk
from (select 1 a union select 2 a) q ORDER BY a
----
1 1
2 2

query II
select a,
rank() over (order by a RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) rnk
from (select 1 a union select 2 a) q ORDER BY a
----
1 1
2 2

# TODO: this is different to Postgres which returns [1, 1] for `rnk`.
# Comment it because it is flaky now as it depends on the order of the `a` column.
# query II
# select a,
# rank() over (RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) rnk
# from (select 1 a union select 2 a) q ORDER BY rnk
# ----
# 1 1
# 2 2

# TODO: this works in Postgres which returns [1, 1].
query error DataFusion error: Arrow error: Invalid argument error: must either specify a row count or at least one column
select rank() over (RANGE between UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) rnk
from (select 1 a union select 2 a) q;

# TODO: this is different to Postgres which returns [1, 1] for `rnk`.
query I
select rank() over (order by 1 RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) rnk
from (select 1 a union select 2 a) q ORDER BY rnk
----
1
2

0 comments on commit 0bcf462

Please sign in to comment.