From ab0e625d9e0abd62a20754125952e3a00f2c275a Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Thu, 22 Aug 2013 23:02:09 -0700
Subject: [PATCH] Fix PySpark for assembly run and include it in dist

---
 .gitignore                       |   1 +
 core/lib/PY4J_LICENSE.txt        |  27 +++++++++++++++++++++++++++
 core/lib/PY4J_VERSION.txt        |   1 +
 {python => core}/lib/py4j0.7.jar | Bin
 make-distribution.sh             |   5 ++++-
 pyspark                          |  12 ++++++++----
 6 files changed, 41 insertions(+), 5 deletions(-)
 create mode 100644 core/lib/PY4J_LICENSE.txt
 create mode 100644 core/lib/PY4J_VERSION.txt
 rename {python => core}/lib/py4j0.7.jar (100%)

diff --git a/.gitignore b/.gitignore
index 00fbff6a2c..e1f64a1133 100644
--- a/.gitignore
+++ b/.gitignore
@@ -40,3 +40,4 @@ checkpoint
 derby.log
 dist/
 spark-*-bin.tar.gz
+unit-tests.log
diff --git a/core/lib/PY4J_LICENSE.txt b/core/lib/PY4J_LICENSE.txt
new file mode 100644
index 0000000000..a70279ca14
--- /dev/null
+++ b/core/lib/PY4J_LICENSE.txt
@@ -0,0 +1,27 @@
+
+Copyright (c) 2009-2011, Barthelemy Dagenais All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+- Redistributions of source code must retain the above copyright notice, this
+list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+- The name of the author may not be used to endorse or promote products
+derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
diff --git a/core/lib/PY4J_VERSION.txt b/core/lib/PY4J_VERSION.txt
new file mode 100644
index 0000000000..04a0cd52a8
--- /dev/null
+++ b/core/lib/PY4J_VERSION.txt
@@ -0,0 +1 @@
+b7924aabe9c5e63f0a4d8bbd17019534c7ec014e
diff --git a/python/lib/py4j0.7.jar b/core/lib/py4j0.7.jar
similarity index 100%
rename from python/lib/py4j0.7.jar
rename to core/lib/py4j0.7.jar
diff --git a/make-distribution.sh b/make-distribution.sh
index df7bbf1e74..92b2706126 100755
--- a/make-distribution.sh
+++ b/make-distribution.sh
@@ -94,11 +94,14 @@ echo "Spark $VERSION built for Hadoop $SPARK_HADOOP_VERSION" > "$DISTDIR/RELEASE
 cp $FWDIR/assembly/target/*/*assembly*.jar "$DISTDIR/jars/"
 
 # Copy other things
+mkdir "$DISTDIR"/conf
+cp -r "$FWDIR/conf/*.template" "$DISTDIR"
 cp -r "$FWDIR/bin" "$DISTDIR"
-cp -r "$FWDIR/conf" "$DISTDIR"
+cp -r "$FWDIR/python" "$DISTDIR"
 cp "$FWDIR/spark-class" "$DISTDIR"
 cp "$FWDIR/spark-shell" "$DISTDIR"
 cp "$FWDIR/spark-executor" "$DISTDIR"
+cp "$FWDIR/pyspark" "$DISTDIR"
 
 
 if [ "$MAKE_TGZ" == "true" ]; then
diff --git a/pyspark b/pyspark
index 801239c108..155ccd4fdf 100755
--- a/pyspark
+++ b/pyspark
@@ -24,10 +24,14 @@ FWDIR="$(cd `dirname $0`; pwd)"
 export SPARK_HOME="$FWDIR"
 
 # Exit if the user hasn't compiled Spark
-if [ ! -e "$SPARK_HOME/repl/target" ]; then
-  echo "Failed to find Spark classes in $SPARK_HOME/repl/target" >&2
-  echo "You need to compile Spark before running this program" >&2
-  exit 1
+if [ ! -f "$FWDIR/RELEASE" ]; then
+  # Exit if the user hasn't compiled Spark
+  ls "$FWDIR"/assembly/target/scala-$SCALA_VERSION/spark-assembly*.jar >& /dev/null
+  if [[ $? != 0 ]]; then
+    echo "Failed to find Spark assembly in $FWDIR/assembly/target" >&2
+    echo "You need to compile Spark before running this program" >&2
+    exit 1
+  fi
 fi
 
 # Load environment variables from conf/spark-env.sh, if it exists
-- 
GitLab