From ac10171bea2fc027d6691393b385b3fc0ef3293d Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Tue, 21 Nov 2017 22:24:43 +0100
Subject: [PATCH] [SPARK-22500][SQL] Fix 64KB JVM bytecode limit problem with
 cast

## What changes were proposed in this pull request?

This PR changes `cast` code generation to place generated code for expression for fields of a structure into separated methods if these size could be large.

## How was this patch tested?

Added new test cases into `CastSuite`

Author: Kazuaki Ishizaki <ishizaki@jp.ibm.com>

Closes #19730 from kiszk/SPARK-22500.
---
 .../spark/sql/catalyst/expressions/Cast.scala  | 12 ++++++++++--
 .../sql/catalyst/expressions/CastSuite.scala   | 18 ++++++++++++++++++
 2 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index bc809f559d..12baddf1bf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -1039,13 +1039,21 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
           }
         }
        """
-    }.mkString("\n")
+    }
+    val fieldsEvalCodes = if (ctx.INPUT_ROW != null && ctx.currentVars == null) {
+      ctx.splitExpressions(
+        expressions = fieldsEvalCode,
+        funcName = "castStruct",
+        arguments = ("InternalRow", tmpRow) :: (rowClass, result) :: Nil)
+    } else {
+      fieldsEvalCode.mkString("\n")
+    }
 
     (c, evPrim, evNull) =>
       s"""
         final $rowClass $result = new $rowClass(${fieldsCasts.length});
         final InternalRow $tmpRow = $c;
-        $fieldsEvalCode
+        $fieldsEvalCodes
         $evPrim = $result;
       """
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
index a7ffa884d2..84bd8b2f91 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
@@ -827,4 +827,22 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
 
     checkEvaluation(cast(Literal.create(input, from), to), input)
   }
+
+  test("SPARK-22500: cast for struct should not generate codes beyond 64KB") {
+    val N = 250
+
+    val fromInner = new StructType(
+      (1 to N).map(i => StructField(s"s$i", DoubleType)).toArray)
+    val toInner = new StructType(
+      (1 to N).map(i => StructField(s"i$i", IntegerType)).toArray)
+    val inputInner = Row.fromSeq((1 to N).map(i => i + 0.5))
+    val outputInner = Row.fromSeq((1 to N))
+    val fromOuter = new StructType(
+      (1 to N).map(i => StructField(s"s$i", fromInner)).toArray)
+    val toOuter = new StructType(
+      (1 to N).map(i => StructField(s"s$i", toInner)).toArray)
+    val inputOuter = Row.fromSeq((1 to N).map(_ => inputInner))
+    val outputOuter = Row.fromSeq((1 to N).map(_ => outputInner))
+    checkEvaluation(cast(Literal.create(inputOuter, fromOuter), toOuter), outputOuter)
+  }
 }
-- 
GitLab