From 9c5935d00b54ed2f029a214ffc64f69cfa854e69 Mon Sep 17 00:00:00 2001 From: Wang Gengliang <ltnwgl@gmail.com> Date: Wed, 27 Sep 2017 12:44:10 +0200 Subject: [PATCH] [SPARK-22141][SQL] Propagate empty relation before checking Cartesian products ## What changes were proposed in this pull request? When inferring constraints from children, Join's condition can be simplified as None. For example, ``` val testRelation = LocalRelation('a.int) val x = testRelation.as("x") val y = testRelation.where($"a" === 2 && !($"a" === 2)).as("y") x.join.where($"x.a" === $"y.a") ``` The plan will become ``` Join Inner :- LocalRelation <empty>, [a#23] +- LocalRelation <empty>, [a#224] ``` And the Cartesian products check will throw exception for above plan. Propagate empty relation before checking Cartesian products, and the issue is resolved. ## How was this patch tested? Unit test Author: Wang Gengliang <ltnwgl@gmail.com> Closes #19362 from gengliangwang/MoveCheckCartesianProducts. --- .../apache/spark/sql/catalyst/optimizer/Optimizer.scala | 4 ++-- .../src/test/scala/org/apache/spark/sql/JoinSuite.scala | 8 ++++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index a602894efb..a391c513ad 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -124,8 +124,6 @@ abstract class Optimizer(sessionCatalog: SessionCatalog) SimplifyCreateMapOps, CombineConcats) ++ extendedOperatorOptimizationRules: _*) :: - Batch("Check Cartesian Products", Once, - CheckCartesianProducts) :: Batch("Join Reorder", Once, CostBasedJoinReorder) :: Batch("Decimal Optimizations", fixedPoint, @@ -136,6 +134,8 @@ abstract class Optimizer(sessionCatalog: SessionCatalog) Batch("LocalRelation", fixedPoint, ConvertToLocalRelation, PropagateEmptyRelation) :: + Batch("Check Cartesian Products", Once, + CheckCartesianProducts) :: Batch("OptimizeCodegen", Once, OptimizeCodegen) :: Batch("RewriteSubquery", Once, diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala index 9d50e8be60..226cc3028b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala @@ -200,6 +200,14 @@ class JoinSuite extends QueryTest with SharedSQLContext { Nil) } + test("SPARK-22141: Propagate empty relation before checking Cartesian products") { + Seq("inner", "left", "right", "left_outer", "right_outer", "full_outer").foreach { joinType => + val x = testData2.where($"a" === 2 && !($"a" === 2)).as("x") + val y = testData2.where($"a" === 1 && !($"a" === 1)).as("y") + checkAnswer(x.join(y, Seq.empty, joinType), Nil) + } + } + test("big inner join, 4 matches per row") { val bigData = testData.union(testData).union(testData).union(testData) val bigDataX = bigData.as("x") -- GitLab