Skip to content

Commit

Permalink
fix: RANGE frame can be regularized to ROWS frame only if empty ORDER…
Browse files Browse the repository at this point in the history
… BY clause
  • Loading branch information
viirya committed Dec 3, 2023
1 parent 06bbe12 commit a47d477
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 13 deletions.
16 changes: 9 additions & 7 deletions datafusion/expr/src/window_frame.rs
Original file line number Diff line number Diff line change
Expand Up @@ -148,18 +148,20 @@ impl WindowFrame {
pub fn regularize(mut frame: WindowFrame, order_bys: usize) -> Result<WindowFrame> {
if frame.units == WindowFrameUnits::Range && order_bys != 1 {
// Normally, RANGE frames require an ORDER BY clause with exactly one
// column. However, an ORDER BY clause may be absent in two edge cases.
// column. However, an ORDER BY clause may be absent in two edge cases:
// 1. start bound is UNBOUNDED or CURRENT ROW
// 2. end bound is CURRENT ROW or UNBOUNDED.
// In these cases, we regularize the RANGE frame to be equivalent to a ROWS
// frame with the UNBOUNDED bounds.
if (frame.start_bound.is_unbounded()
|| frame.start_bound == WindowFrameBound::CurrentRow)
&& (frame.end_bound == WindowFrameBound::CurrentRow
|| frame.end_bound.is_unbounded())
&& order_bys == 0
{
if order_bys == 0 {
frame.units = WindowFrameUnits::Rows;
frame.start_bound =
WindowFrameBound::Preceding(ScalarValue::UInt64(None));
frame.end_bound = WindowFrameBound::Following(ScalarValue::UInt64(None));
}
frame.units = WindowFrameUnits::Rows;
frame.start_bound = WindowFrameBound::Preceding(ScalarValue::UInt64(None));
frame.end_bound = WindowFrameBound::Following(ScalarValue::UInt64(None));
} else {
plan_err!("RANGE requires exactly one ORDER BY column")?
}
Expand Down
44 changes: 38 additions & 6 deletions datafusion/sqllogictest/test_files/window.slt
Original file line number Diff line number Diff line change
Expand Up @@ -1080,7 +1080,7 @@ SELECT
794 95 95

#fn test_window_range_equivalent_frames
query IIIIIII
query error DataFusion error: Error during planning: RANGE requires exactly one ORDER BY column
SELECT
c9,
COUNT(*) OVER(ORDER BY c9, c1 RANGE BETWEEN CURRENT ROW AND CURRENT ROW) AS cnt1,
Expand All @@ -1092,12 +1092,22 @@ SELECT
FROM aggregate_test_100
ORDER BY c9
LIMIT 5

query IIII
SELECT
c9,
COUNT(*) OVER(RANGE BETWEEN CURRENT ROW AND CURRENT ROW) AS cnt4,
COUNT(*) OVER(RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS cnt5,
COUNT(*) OVER(RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS cnt6
FROM aggregate_test_100
ORDER BY c9
LIMIT 5
----
28774375 1 1 1 100 100 100
63044568 1 2 1 100 100 100
141047417 1 3 1 100 100 100
141680161 1 4 1 100 100 100
145294611 1 5 1 100 100 100
28774375 100 100 100
63044568 100 100 100
141047417 100 100 100
141680161 100 100 100
145294611 100 100 100

#fn test_window_cume_dist
query IRR
Expand Down Expand Up @@ -3581,3 +3591,25 @@ CREATE TABLE new_table AS SELECT NTILE(2) OVER(ORDER BY c1) AS ntile_2 FROM aggr

statement ok
DROP TABLE new_table;

# RANGE frame with/without (multiple) ORDER BY
query error DataFusion error: Error during planning: RANGE requires exactly one ORDER BY column
select a,
rank() over (partition by a order by a, a + 1 RANGE UNBOUNDED PRECEDING) rnk
from (select 1 a union all select 2 a) q

query II
select a,
rank() over (partition by a order by a RANGE UNBOUNDED PRECEDING) rnk
from (select 1 a union all select 2 a) q
----
2 1
1 1

query II
select a,
rank() over (partition by a RANGE UNBOUNDED PRECEDING) rnk
from (select 1 a union all select 2 a) q
----
1 1
2 1

0 comments on commit a47d477

Please sign in to comment.