Skip to content

Commit

Permalink
feat: support push down limit when full join
Browse files Browse the repository at this point in the history
  • Loading branch information
JasonLi-cn committed Oct 16, 2024
1 parent 399e840 commit 6693a49
Show file tree
Hide file tree
Showing 2 changed files with 88 additions and 1 deletion.
1 change: 1 addition & 0 deletions datafusion/optimizer/src/push_down_limit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,7 @@ fn push_down_join(mut join: Join, limit: usize) -> Transformed<Join> {
match join.join_type {
Left => (Some(limit), None),
Right => (None, Some(limit)),
Full => (Some(limit), Some(limit)),
_ => (None, None),
}
};
Expand Down
88 changes: 87 additions & 1 deletion datafusion/sqllogictest/test_files/joins.slt
Original file line number Diff line number Diff line change
Expand Up @@ -4187,4 +4187,90 @@ physical_plan
02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(b@1, y@1)], filter=a@0 < x@1
03)----MemoryExec: partitions=1, partition_sizes=[0]
04)----SortExec: expr=[x@0 ASC NULLS LAST], preserve_partitioning=[false]
05)------MemoryExec: partitions=1, partition_sizes=[0]
05)------MemoryExec: partitions=1, partition_sizes=[0]

# Test full join with limit
statement ok
CREATE TABLE t0(c1 INT UNSIGNED, c2 INT UNSIGNED)
AS VALUES
(1, 1),
(2, 2),
(3, 3),
(4, 4);

statement ok
CREATE TABLE t1(c1 INT UNSIGNED, c2 INT UNSIGNED, c3 BOOLEAN)
AS VALUES
(2, 2, true),
(2, 2, false),
(3, 3, true),
(3, 3, false);

query IIIIB
SELECT * FROM t0 FULL JOIN t1 ON t0.c1 = t1.c1 LIMIT 2;
----
2 2 2 2 true
2 2 2 2 false

query IIIIB
SELECT * FROM t0 FULL JOIN t1 ON t0.c2 >= t1.c2 LIMIT 2;
----
2 2 2 2 true
3 3 2 2 true

query IIIIB
SELECT * FROM t0 FULL JOIN t1 ON t0.c1 = t1.c1 AND t0.c2 >= t1.c2 LIMIT 2;
----
2 2 2 2 true
2 2 2 2 false

## Test !join.on.is_empty() && join.filter.is_none()
query TT
EXPLAIN SELECT * FROM t0 FULL JOIN t1 ON t0.c1 = t1.c1 LIMIT 2;
----
logical_plan
01)Limit: skip=0, fetch=2
02)--Full Join: t0.c1 = t1.c1
03)----Limit: skip=0, fetch=2
04)------TableScan: t0 projection=[c1, c2], fetch=2
05)----Limit: skip=0, fetch=2
06)------TableScan: t1 projection=[c1, c2, c3], fetch=2
physical_plan
01)CoalesceBatchesExec: target_batch_size=3, fetch=2
02)--HashJoinExec: mode=CollectLeft, join_type=Full, on=[(c1@0, c1@0)]
03)----MemoryExec: partitions=1, partition_sizes=[1]
04)----MemoryExec: partitions=1, partition_sizes=[1]

## Test join.on.is_empty() && join.filter.is_some()
query TT
EXPLAIN SELECT * FROM t0 FULL JOIN t1 ON t0.c2 >= t1.c2 LIMIT 2;
----
logical_plan
01)Limit: skip=0, fetch=2
02)--Full Join: Filter: t0.c2 >= t1.c2
03)----Limit: skip=0, fetch=2
04)------TableScan: t0 projection=[c1, c2], fetch=2
05)----Limit: skip=0, fetch=2
06)------TableScan: t1 projection=[c1, c2, c3], fetch=2
physical_plan
01)GlobalLimitExec: skip=0, fetch=2
02)--NestedLoopJoinExec: join_type=Full, filter=c2@0 >= c2@1
03)----MemoryExec: partitions=1, partition_sizes=[1]
04)----MemoryExec: partitions=1, partition_sizes=[1]

## Test !join.on.is_empty() && join.filter.is_some()
query TT
EXPLAIN SELECT * FROM t0 FULL JOIN t1 ON t0.c1 = t1.c1 AND t0.c2 >= t1.c2 LIMIT 2;
----
logical_plan
01)Limit: skip=0, fetch=2
02)--Full Join: t0.c1 = t1.c1 Filter: t0.c2 >= t1.c2
03)----Limit: skip=0, fetch=2
04)------TableScan: t0 projection=[c1, c2], fetch=2
05)----Limit: skip=0, fetch=2
06)------TableScan: t1 projection=[c1, c2, c3], fetch=2
physical_plan
01)CoalesceBatchesExec: target_batch_size=3, fetch=2
02)--HashJoinExec: mode=CollectLeft, join_type=Full, on=[(c1@0, c1@0)], filter=c2@0 >= c2@1
03)----MemoryExec: partitions=1, partition_sizes=[1]
04)----MemoryExec: partitions=1, partition_sizes=[1]

0 comments on commit 6693a49

Please sign in to comment.