From 07da72b45190f7db9daa2c6bd33577d28e19e659 Mon Sep 17 00:00:00 2001
From: Shivaram Venkataraman <shivaram@eecs.berkeley.edu>
Date: Mon, 29 Jul 2013 16:25:17 -0700
Subject: [PATCH] Remove duplicate loss history and clarify why. Also some
 minor style fixes.

---
 .../mllib/classification/LogisticRegression.scala |  5 +----
 .../mllib/optimization/GradientDescent.scala      | 15 +++++++--------
 .../scala/spark/mllib/optimization/Updater.scala  |  2 +-
 3 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/mllib/src/main/scala/spark/mllib/classification/LogisticRegression.scala b/mllib/src/main/scala/spark/mllib/classification/LogisticRegression.scala
index bc1c327729..bf3b05dedb 100644
--- a/mllib/src/main/scala/spark/mllib/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/spark/mllib/classification/LogisticRegression.scala
@@ -151,7 +151,6 @@ object LogisticRegressionLocalRandomSGD {
       input: RDD[(Int, Array[Double])],
       numIterations: Int,
       stepSize: Double,
-
       miniBatchFraction: Double,
       initialWeights: Array[Double])
     : LogisticRegressionModel =
@@ -174,7 +173,6 @@ object LogisticRegressionLocalRandomSGD {
       input: RDD[(Int, Array[Double])],
       numIterations: Int,
       stepSize: Double,
-
       miniBatchFraction: Double)
     : LogisticRegressionModel =
   {
@@ -195,8 +193,7 @@ object LogisticRegressionLocalRandomSGD {
   def train(
       input: RDD[(Int, Array[Double])],
       numIterations: Int,
-      stepSize: Double
-      )
+      stepSize: Double)
     : LogisticRegressionModel =
   {
     train(input, numIterations, stepSize, 1.0)
diff --git a/mllib/src/main/scala/spark/mllib/optimization/GradientDescent.scala b/mllib/src/main/scala/spark/mllib/optimization/GradientDescent.scala
index d4b83a1456..19cda26446 100644
--- a/mllib/src/main/scala/spark/mllib/optimization/GradientDescent.scala
+++ b/mllib/src/main/scala/spark/mllib/optimization/GradientDescent.scala
@@ -61,7 +61,7 @@ object GradientDescent {
 
     // Initialize weights as a column vector
     var weights = new DoubleMatrix(initialWeights.length, 1, initialWeights:_*)
-    var reg_val = 0.0
+    var regVal = 0.0
 
     for (i <- 1 to numIters) {
       val (gradientSum, lossSum) = data.sample(false, miniBatchFraction, 42+i).map {
@@ -71,15 +71,14 @@ object GradientDescent {
           (grad, loss)
       }.reduce((a, b) => (a._1.addi(b._1), a._2 + b._2))
 
-      stochasticLossHistory.append(lossSum / miniBatchSize + reg_val)
+      /**
+       * NOTE(Xinghao): lossSum is computed using the weights from the previous iteration
+       * and regVal is the regularization value computed in the previous iteration as well.
+       */
+      stochasticLossHistory.append(lossSum / miniBatchSize + regVal)
       val update = updater.compute(weights, gradientSum.div(miniBatchSize), stepSize, i, regParam)
       weights = update._1
-      reg_val = update._2
-      stochasticLossHistory.append(lossSum / miniBatchSize + reg_val)
-      /*
-      * NOTE(Xinghao): The loss here is sum of lossSum computed using the weights before applying updater,
-      * and reg_val using weights after applying updater
-      */
+      regVal = update._2
     }
 
     (weights.toArray, stochasticLossHistory.toArray)
diff --git a/mllib/src/main/scala/spark/mllib/optimization/Updater.scala b/mllib/src/main/scala/spark/mllib/optimization/Updater.scala
index 188fe7d972..bbf21e5c28 100644
--- a/mllib/src/main/scala/spark/mllib/optimization/Updater.scala
+++ b/mllib/src/main/scala/spark/mllib/optimization/Updater.scala
@@ -76,7 +76,7 @@ class SquaredL2Updater extends Updater {
     val thisIterStepSize = stepSize / math.sqrt(iter)
     val normGradient = gradient.mul(thisIterStepSize)
     val newWeights = weightsOld.sub(normGradient).div(2.0 * thisIterStepSize * regParam + 1.0)
-    (newWeights, pow(newWeights.norm2,2.0) * regParam)
+    (newWeights, pow(newWeights.norm2, 2.0) * regParam)
   }
 }
 
-- 
GitLab