From d94a44d7caaf3fe7559d9ad7b10872fa16cf81ca Mon Sep 17 00:00:00 2001
From: Cheng Lian <lian.cs.zju@gmail.com>
Date: Fri, 29 Aug 2014 15:36:04 -0700
Subject: [PATCH] [SPARK-3269][SQL] Decreases initial buffer size for row set
 to prevent OOM

When a large batch size is specified, `SparkSQLOperationManager` OOMs even if the whole result set is much smaller than the batch size.

Author: Cheng Lian <lian.cs.zju@gmail.com>

Closes #2171 from liancheng/jdbc-fetch-size and squashes the following commits:

5e1623b [Cheng Lian] Decreases initial buffer size for row set to prevent OOM
---
 .../hive/thriftserver/server/SparkSQLOperationManager.scala  | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/server/SparkSQLOperationManager.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/server/SparkSQLOperationManager.scala
index 6eccb1ba6d..f12b5a69a0 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/server/SparkSQLOperationManager.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/server/SparkSQLOperationManager.scala
@@ -66,9 +66,10 @@ class SparkSQLOperationManager(hiveContext: HiveContext) extends OperationManage
         if (!iter.hasNext) {
           new RowSet()
         } else {
-          val maxRows = maxRowsL.toInt // Do you really want a row batch larger than Int Max? No.
+          // maxRowsL here typically maps to java.sql.Statement.getFetchSize, which is an int
+          val maxRows = maxRowsL.toInt
           var curRow = 0
-          var rowSet = new ArrayBuffer[Row](maxRows)
+          var rowSet = new ArrayBuffer[Row](maxRows.min(1024))
 
           while (curRow < maxRows && iter.hasNext) {
             val sparkRow = iter.next()
-- 
GitLab