[SPARK-15308][SQL] RowEncoder should preserve nested column name.

## What changes were proposed in this pull request? The following code generates wrong schema: ``` val schema = new StructType().add( "struct", new StructType() .add("i", IntegerType, nullable = false) .add( "s", new StructType().add("int", IntegerType, nullable = false), nullable = false), nullable = false) val ds = sqlContext.range(10).map(l => Row(l, Row(l)))(RowEncoder(schema)) ds.printSchema() ``` This should print as follows: ``` root |-- struct: struct (nullable = false) | |-- i: integer (nullable = false) | |-- s: struct (nullable = false) | | |-- int: integer (nullable = false) ``` but the result is: ``` root |-- struct: struct (nullable = false) | |-- col1: integer (nullable = false) | |-- col2: struct (nullable = false) | | |-- col1: integer (nullable = false) ``` This PR fixes `RowEncoder` to preserve nested column name. ## How was this patch tested? Existing tests and I added a test to check if `RowEncoder` preserves nested column name. Author: Takuya UESHIN <ueshin@happy-camper.st> Closes #13090 from ueshin/issues/SPARK-15308.

[SPARK-15308][SQL] RowEncoder should preserve nested column name.
d2e1aa97 · Takuya UESHIN · Reynold Xin · 9a9c6f5c · d2e1aa97 · d2e1aa97
Commit d2e1aa97 authored 8 years ago by Takuya UESHIN Committed by Reynold Xin 8 years ago
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala
@@ -62,7 +62,7 @@ object RowEncoder {
    new ExpressionEncoder[Row](
      schema,
      flat = false,
-      serializer.asInstanceOf[CreateStruct].children,
+      serializer.asInstanceOf[CreateNamedStruct].flatten,
      deserializer,
      ClassTag(cls))
  }
@@ -148,28 +148,30 @@ object RowEncoder {
        dataType = t)
    case StructType(fields) =>
-      val convertedFields = fields.zipWithIndex.map { case (f, i) =>
+      val nonNullOutput = CreateNamedStruct(fields.zipWithIndex.flatMap { case (field, index) =>
        val fieldValue = serializerFor(
-          GetExternalRowField(inputObject, i, f.name, externalDataTypeForInput(f.dataType)),
+          GetExternalRowField(
-          f.dataType
+            inputObject, index, field.name, externalDataTypeForInput(field.dataType)),
+          field.dataType
        )
-        if (f.nullable) {
+        val convertedField = if (field.nullable) {
          If(
-            Invoke(inputObject, "isNullAt", BooleanType, Literal(i) :: Nil),
+            Invoke(inputObject, "isNullAt", BooleanType, Literal(index) :: Nil),
-            Literal.create(null, f.dataType),
+            Literal.create(null, field.dataType),
            fieldValue
          )
        } else {
          fieldValue
        }
-      }
+        Literal(field.name) :: convertedField :: Nil
+      })
      if (inputObject.nullable) {
        If(IsNull(inputObject),
          Literal.create(null, inputType),
-          CreateStruct(convertedFields))
+          nonNullOutput)
      } else {
-        CreateStruct(convertedFields)
+        nonNullOutput
      }
  }

--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala
@@ -185,6 +185,28 @@ class RowEncoderSuite extends SparkFunSuite {
    assert(encoder.serializer.head.nullable == false)
  }
+  test("RowEncoder should preserve nested column name") {
+    val schema = new StructType().add(
+      "struct",
+      new StructType()
+        .add("i", IntegerType, nullable = false)
+        .add(
+          "s",
+          new StructType().add("int", IntegerType, nullable = false),
+          nullable = false),
+      nullable = false)
+    val encoder = RowEncoder(schema)
+    assert(encoder.serializer.length == 1)
+    assert(encoder.serializer.head.dataType ==
+      new StructType()
+      .add("i", IntegerType, nullable = false)
+      .add(
+        "s",
+        new StructType().add("int", IntegerType, nullable = false),
+        nullable = false))
+    assert(encoder.serializer.head.nullable == false)
+  }
  test("RowEncoder should support array as the external type for ArrayType") {
    val schema = new StructType()
      .add("array", ArrayType(IntegerType))