From 61b80d552aafb262b5f817f7bc9c0acd0328715b Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Fri, 3 Jun 2016 14:26:24 -0700
Subject: [PATCH] [SPARK-15547][SQL] nested case class in encoder can have
 different number of fields from the real schema

## What changes were proposed in this pull request?

There are 2 kinds of `GetStructField`:

1. resolved from `UnresolvedExtractValue`, and it will have a `name` property.
2. created when we build deserializer expression for nested tuple, no `name` property.

When we want to validate the ordinals of nested tuple, we should only catch `GetStructField` without the name property.

## How was this patch tested?

new test in `EncoderResolutionSuite`

Author: Wenchen Fan <wenchen@databricks.com>

Closes #13474 from cloud-fan/ordinal-check.
---
 .../org/apache/spark/sql/catalyst/analysis/Analyzer.scala  | 7 ++++++-
 .../sql/catalyst/encoders/EncoderResolutionSuite.scala     | 6 ++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 4f6b4830cd..0e68656467 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -1964,7 +1964,12 @@ class Analyzer(
      */
     private def validateNestedTupleFields(deserializer: Expression): Unit = {
       val structChildToOrdinals = deserializer
-        .collect { case g: GetStructField => g }
+        // There are 2 kinds of `GetStructField`:
+        //   1. resolved from `UnresolvedExtractValue`, and it will have a `name` property.
+        //   2. created when we build deserializer expression for nested tuple, no `name` property.
+        // Here we want to validate the ordinals of nested tuple, so we should only catch
+        // `GetStructField` without the name property.
+        .collect { case g: GetStructField if g.name.isEmpty => g }
         .groupBy(_.child)
         .mapValues(_.map(_.ordinal).distinct.sorted)
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala
index 7251202c7b..802397d50e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala
@@ -115,6 +115,12 @@ class EncoderResolutionSuite extends PlanTest {
     }
   }
 
+  test("nested case class can have different number of fields from the real schema") {
+    val encoder = ExpressionEncoder[(String, StringIntClass)]
+    val attrs = Seq('a.string, 'b.struct('a.string, 'b.int, 'c.int))
+    encoder.resolveAndBind(attrs)
+  }
+
   test("throw exception if real type is not compatible with encoder schema") {
     val msg1 = intercept[AnalysisException] {
       ExpressionEncoder[StringIntClass].resolveAndBind(Seq('a.string, 'b.long))
-- 
GitLab