diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala index c937b9602b8c11bab9188e212808cfdd44399c97..0d073839259c6859f7761afc08b5ff3f884d2e42 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala @@ -133,9 +133,9 @@ class LogisticRegression(override val uid: String) /** * Whether to standardize the training features before fitting the model. * The coefficients of models will be always returned on the original scale, - * so it will be transparent for users. Note that when no regularization, - * with or without standardization, the models should be always converged to - * the same solution. + * so it will be transparent for users. Note that with/without standardization, + * the models should be always converged to the same solution when no regularization + * is applied. In R's GLMNET package, the default behavior is true as well. * Default is true. * @group setParam * */ diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala index 3b85ba001b128b9bb06006cab678b056d812622c..92d819bad8654106c89e98ae4cae75dc15f1e9c7 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala @@ -45,7 +45,7 @@ import org.apache.spark.util.StatCounter */ private[regression] trait LinearRegressionParams extends PredictorParams with HasRegParam with HasElasticNetParam with HasMaxIter with HasTol - with HasFitIntercept + with HasFitIntercept with HasStandardization /** * :: Experimental :: @@ -84,6 +84,18 @@ class LinearRegression(override val uid: String) def setFitIntercept(value: Boolean): this.type = set(fitIntercept, value) setDefault(fitIntercept -> true) + /** + * Whether to standardize the training features before fitting the model. + * The coefficients of models will be always returned on the original scale, + * so it will be transparent for users. Note that with/without standardization, + * the models should be always converged to the same solution when no regularization + * is applied. In R's GLMNET package, the default behavior is true as well. + * Default is true. + * @group setParam + */ + def setStandardization(value: Boolean): this.type = set(standardization, value) + setDefault(standardization -> true) + /** * Set the ElasticNet mixing parameter. * For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty. @@ -165,12 +177,24 @@ class LinearRegression(override val uid: String) val effectiveL2RegParam = (1.0 - $(elasticNetParam)) * effectiveRegParam val costFun = new LeastSquaresCostFun(instances, yStd, yMean, $(fitIntercept), - featuresStd, featuresMean, effectiveL2RegParam) + $(standardization), featuresStd, featuresMean, effectiveL2RegParam) val optimizer = if ($(elasticNetParam) == 0.0 || effectiveRegParam == 0.0) { new BreezeLBFGS[BDV[Double]]($(maxIter), 10, $(tol)) } else { - new BreezeOWLQN[Int, BDV[Double]]($(maxIter), 10, effectiveL1RegParam, $(tol)) + def effectiveL1RegFun = (index: Int) => { + if ($(standardization)) { + effectiveL1RegParam + } else { + // If `standardization` is false, we still standardize the data + // to improve the rate of convergence; as a result, we have to + // perform this reverse standardization by penalizing each component + // differently to get effectively the same objective function when + // the training dataset is not standardized. + if (featuresStd(index) != 0.0) effectiveL1RegParam / featuresStd(index) else 0.0 + } + } + new BreezeOWLQN[Int, BDV[Double]]($(maxIter), 10, effectiveL1RegFun, $(tol)) } val initialWeights = Vectors.zeros(numFeatures) @@ -456,6 +480,7 @@ class LinearRegressionSummary private[regression] ( * @param weights The weights/coefficients corresponding to the features. * @param labelStd The standard deviation value of the label. * @param labelMean The mean value of the label. + * @param fitIntercept Whether to fit an intercept term. * @param featuresStd The standard deviation values of the features. * @param featuresMean The mean values of the features. */ @@ -568,6 +593,7 @@ private class LeastSquaresCostFun( labelStd: Double, labelMean: Double, fitIntercept: Boolean, + standardization: Boolean, featuresStd: Array[Double], featuresMean: Array[Double], effectiveL2regParam: Double) extends DiffFunction[BDV[Double]] { @@ -584,14 +610,38 @@ private class LeastSquaresCostFun( case (aggregator1, aggregator2) => aggregator1.merge(aggregator2) }) - // regVal is the sum of weight squares for L2 regularization - val norm = brzNorm(weights, 2.0) - val regVal = 0.5 * effectiveL2regParam * norm * norm + val totalGradientArray = leastSquaresAggregator.gradient.toArray - val loss = leastSquaresAggregator.loss + regVal - val gradient = leastSquaresAggregator.gradient - axpy(effectiveL2regParam, w, gradient) + val regVal = if (effectiveL2regParam == 0.0) { + 0.0 + } else { + var sum = 0.0 + w.foreachActive { (index, value) => + // The following code will compute the loss of the regularization; also + // the gradient of the regularization, and add back to totalGradientArray. + sum += { + if (standardization) { + totalGradientArray(index) += effectiveL2regParam * value + value * value + } else { + if (featuresStd(index) != 0.0) { + // If `standardization` is false, we still standardize the data + // to improve the rate of convergence; as a result, we have to + // perform this reverse standardization by penalizing each component + // differently to get effectively the same objective function when + // the training dataset is not standardized. + val temp = value / (featuresStd(index) * featuresStd(index)) + totalGradientArray(index) += effectiveL2regParam * temp + value * temp + } else { + 0.0 + } + } + } + } + 0.5 * effectiveL2regParam * sum + } - (loss, gradient.toBreeze.asInstanceOf[BDV[Double]]) + (leastSquaresAggregator.loss + regVal, new BDV(totalGradientArray)) } } diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala index 7cdda3db88ad13e6b5a6144e197dcadba66db3f5..21ad8225bd9f7723ca5bd055ec3b53e65f6091c8 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala @@ -70,6 +70,7 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { assert(lir.getRegParam === 0.0) assert(lir.getElasticNetParam === 0.0) assert(lir.getFitIntercept) + assert(lir.getStandardization) val model = lir.fit(dataset) model.transform(dataset) .select("label", "prediction") @@ -81,8 +82,11 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { } test("linear regression with intercept without regularization") { - val trainer = new LinearRegression - val model = trainer.fit(dataset) + val trainer1 = new LinearRegression + // The result should be the same regardless of standardization without regularization + val trainer2 = (new LinearRegression).setStandardization(false) + val model1 = trainer1.fit(dataset) + val model2 = trainer2.fit(dataset) /* Using the following R code to load the data and train the model using glmnet package. @@ -95,28 +99,36 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { > weights 3 x 1 sparse Matrix of class "dgCMatrix" s0 - (Intercept) 6.300528 - as.numeric.data.V2. 4.701024 - as.numeric.data.V3. 7.198257 + (Intercept) 6.298698 + as.numeric.data.V2. 4.700706 + as.numeric.data.V3. 7.199082 */ val interceptR = 6.298698 val weightsR = Vectors.dense(4.700706, 7.199082) - assert(model.intercept ~== interceptR relTol 1E-3) - assert(model.weights ~= weightsR relTol 1E-3) + assert(model1.intercept ~== interceptR relTol 1E-3) + assert(model1.weights ~= weightsR relTol 1E-3) + assert(model2.intercept ~== interceptR relTol 1E-3) + assert(model2.weights ~= weightsR relTol 1E-3) - model.transform(dataset).select("features", "prediction").collect().foreach { + + model1.transform(dataset).select("features", "prediction").collect().foreach { case Row(features: DenseVector, prediction1: Double) => val prediction2 = - features(0) * model.weights(0) + features(1) * model.weights(1) + model.intercept + features(0) * model1.weights(0) + features(1) * model1.weights(1) + model1.intercept assert(prediction1 ~== prediction2 relTol 1E-5) } } test("linear regression without intercept without regularization") { - val trainer = (new LinearRegression).setFitIntercept(false) - val model = trainer.fit(dataset) - val modelWithoutIntercept = trainer.fit(datasetWithoutIntercept) + val trainer1 = (new LinearRegression).setFitIntercept(false) + // Without regularization the results should be the same + val trainer2 = (new LinearRegression).setFitIntercept(false).setStandardization(false) + val model1 = trainer1.fit(dataset) + val modelWithoutIntercept1 = trainer1.fit(datasetWithoutIntercept) + val model2 = trainer2.fit(dataset) + val modelWithoutIntercept2 = trainer2.fit(datasetWithoutIntercept) + /* weights <- coef(glmnet(features, label, family="gaussian", alpha = 0, lambda = 0, @@ -130,26 +142,34 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { */ val weightsR = Vectors.dense(6.995908, 5.275131) - assert(model.intercept ~== 0 absTol 1E-3) - assert(model.weights ~= weightsR relTol 1E-3) + assert(model1.intercept ~== 0 absTol 1E-3) + assert(model1.weights ~= weightsR relTol 1E-3) + assert(model2.intercept ~== 0 absTol 1E-3) + assert(model2.weights ~= weightsR relTol 1E-3) + /* Then again with the data with no intercept: > weightsWithoutIntercept 3 x 1 sparse Matrix of class "dgCMatrix" - s0 + s0 (Intercept) . as.numeric.data3.V2. 4.70011 as.numeric.data3.V3. 7.19943 */ val weightsWithoutInterceptR = Vectors.dense(4.70011, 7.19943) - assert(modelWithoutIntercept.intercept ~== 0 absTol 1E-3) - assert(modelWithoutIntercept.weights ~= weightsWithoutInterceptR relTol 1E-3) + assert(modelWithoutIntercept1.intercept ~== 0 absTol 1E-3) + assert(modelWithoutIntercept1.weights ~= weightsWithoutInterceptR relTol 1E-3) + assert(modelWithoutIntercept2.intercept ~== 0 absTol 1E-3) + assert(modelWithoutIntercept2.weights ~= weightsWithoutInterceptR relTol 1E-3) } test("linear regression with intercept with L1 regularization") { - val trainer = (new LinearRegression).setElasticNetParam(1.0).setRegParam(0.57) - val model = trainer.fit(dataset) + val trainer1 = (new LinearRegression).setElasticNetParam(1.0).setRegParam(0.57) + val trainer2 = (new LinearRegression).setElasticNetParam(1.0).setRegParam(0.57) + .setStandardization(false) + val model1 = trainer1.fit(dataset) + val model2 = trainer2.fit(dataset) /* weights <- coef(glmnet(features, label, family="gaussian", alpha = 1.0, lambda = 0.57)) @@ -160,24 +180,44 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { as.numeric.data.V2. 4.024821 as.numeric.data.V3. 6.679841 */ - val interceptR = 6.24300 - val weightsR = Vectors.dense(4.024821, 6.679841) + val interceptR1 = 6.24300 + val weightsR1 = Vectors.dense(4.024821, 6.679841) - assert(model.intercept ~== interceptR relTol 1E-3) - assert(model.weights ~= weightsR relTol 1E-3) + assert(model1.intercept ~== interceptR1 relTol 1E-3) + assert(model1.weights ~= weightsR1 relTol 1E-3) - model.transform(dataset).select("features", "prediction").collect().foreach { + /* + weights <- coef(glmnet(features, label, family="gaussian", alpha = 1.0, lambda = 0.57, + standardize=FALSE)) + > weights + 3 x 1 sparse Matrix of class "dgCMatrix" + s0 + (Intercept) 6.416948 + as.numeric.data.V2. 3.893869 + as.numeric.data.V3. 6.724286 + */ + val interceptR2 = 6.416948 + val weightsR2 = Vectors.dense(3.893869, 6.724286) + + assert(model2.intercept ~== interceptR2 relTol 1E-3) + assert(model2.weights ~= weightsR2 relTol 1E-3) + + + model1.transform(dataset).select("features", "prediction").collect().foreach { case Row(features: DenseVector, prediction1: Double) => val prediction2 = - features(0) * model.weights(0) + features(1) * model.weights(1) + model.intercept + features(0) * model1.weights(0) + features(1) * model1.weights(1) + model1.intercept assert(prediction1 ~== prediction2 relTol 1E-5) } } test("linear regression without intercept with L1 regularization") { - val trainer = (new LinearRegression).setElasticNetParam(1.0).setRegParam(0.57) + val trainer1 = (new LinearRegression).setElasticNetParam(1.0).setRegParam(0.57) .setFitIntercept(false) - val model = trainer.fit(dataset) + val trainer2 = (new LinearRegression).setElasticNetParam(1.0).setRegParam(0.57) + .setFitIntercept(false).setStandardization(false) + val model1 = trainer1.fit(dataset) + val model2 = trainer2.fit(dataset) /* weights <- coef(glmnet(features, label, family="gaussian", alpha = 1.0, lambda = 0.57, @@ -189,51 +229,90 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { as.numeric.data.V2. 6.299752 as.numeric.data.V3. 4.772913 */ - val interceptR = 0.0 - val weightsR = Vectors.dense(6.299752, 4.772913) + val interceptR1 = 0.0 + val weightsR1 = Vectors.dense(6.299752, 4.772913) - assert(model.intercept ~== interceptR absTol 1E-5) - assert(model.weights ~= weightsR relTol 1E-3) + assert(model1.intercept ~== interceptR1 absTol 1E-3) + assert(model1.weights ~= weightsR1 relTol 1E-3) - model.transform(dataset).select("features", "prediction").collect().foreach { + /* + weights <- coef(glmnet(features, label, family="gaussian", alpha = 1.0, lambda = 0.57, + intercept=FALSE, standardize=FALSE)) + > weights + 3 x 1 sparse Matrix of class "dgCMatrix" + s0 + (Intercept) . + as.numeric.data.V2. 6.232193 + as.numeric.data.V3. 4.764229 + */ + val interceptR2 = 0.0 + val weightsR2 = Vectors.dense(6.232193, 4.764229) + + assert(model2.intercept ~== interceptR2 absTol 1E-3) + assert(model2.weights ~= weightsR2 relTol 1E-3) + + + model1.transform(dataset).select("features", "prediction").collect().foreach { case Row(features: DenseVector, prediction1: Double) => val prediction2 = - features(0) * model.weights(0) + features(1) * model.weights(1) + model.intercept + features(0) * model1.weights(0) + features(1) * model1.weights(1) + model1.intercept assert(prediction1 ~== prediction2 relTol 1E-5) } } test("linear regression with intercept with L2 regularization") { - val trainer = (new LinearRegression).setElasticNetParam(0.0).setRegParam(2.3) - val model = trainer.fit(dataset) + val trainer1 = (new LinearRegression).setElasticNetParam(0.0).setRegParam(2.3) + val trainer2 = (new LinearRegression).setElasticNetParam(0.0).setRegParam(2.3) + .setStandardization(false) + val model1 = trainer1.fit(dataset) + val model2 = trainer2.fit(dataset) /* - weights <- coef(glmnet(features, label, family="gaussian", alpha = 0.0, lambda = 2.3)) - > weights - 3 x 1 sparse Matrix of class "dgCMatrix" - s0 - (Intercept) 6.328062 - as.numeric.data.V2. 3.222034 - as.numeric.data.V3. 4.926260 + weights <- coef(glmnet(features, label, family="gaussian", alpha = 0.0, lambda = 2.3)) + > weights + 3 x 1 sparse Matrix of class "dgCMatrix" + s0 + (Intercept) 5.269376 + as.numeric.data.V2. 3.736216 + as.numeric.data.V3. 5.712356) */ - val interceptR = 5.269376 - val weightsR = Vectors.dense(3.736216, 5.712356) + val interceptR1 = 5.269376 + val weightsR1 = Vectors.dense(3.736216, 5.712356) - assert(model.intercept ~== interceptR relTol 1E-3) - assert(model.weights ~= weightsR relTol 1E-3) + assert(model1.intercept ~== interceptR1 relTol 1E-3) + assert(model1.weights ~= weightsR1 relTol 1E-3) - model.transform(dataset).select("features", "prediction").collect().foreach { + /* + weights <- coef(glmnet(features, label, family="gaussian", alpha = 0.0, lambda = 2.3, + standardize=FALSE)) + > weights + 3 x 1 sparse Matrix of class "dgCMatrix" + s0 + (Intercept) 5.791109 + as.numeric.data.V2. 3.435466 + as.numeric.data.V3. 5.910406 + */ + val interceptR2 = 5.791109 + val weightsR2 = Vectors.dense(3.435466, 5.910406) + + assert(model2.intercept ~== interceptR2 relTol 1E-3) + assert(model2.weights ~= weightsR2 relTol 1E-3) + + model1.transform(dataset).select("features", "prediction").collect().foreach { case Row(features: DenseVector, prediction1: Double) => val prediction2 = - features(0) * model.weights(0) + features(1) * model.weights(1) + model.intercept + features(0) * model1.weights(0) + features(1) * model1.weights(1) + model1.intercept assert(prediction1 ~== prediction2 relTol 1E-5) } } test("linear regression without intercept with L2 regularization") { - val trainer = (new LinearRegression).setElasticNetParam(0.0).setRegParam(2.3) + val trainer1 = (new LinearRegression).setElasticNetParam(0.0).setRegParam(2.3) .setFitIntercept(false) - val model = trainer.fit(dataset) + val trainer2 = (new LinearRegression).setElasticNetParam(0.0).setRegParam(2.3) + .setFitIntercept(false).setStandardization(false) + val model1 = trainer1.fit(dataset) + val model2 = trainer2.fit(dataset) /* weights <- coef(glmnet(features, label, family="gaussian", alpha = 0.0, lambda = 2.3, @@ -245,23 +324,42 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { as.numeric.data.V2. 5.522875 as.numeric.data.V3. 4.214502 */ - val interceptR = 0.0 - val weightsR = Vectors.dense(5.522875, 4.214502) + val interceptR1 = 0.0 + val weightsR1 = Vectors.dense(5.522875, 4.214502) - assert(model.intercept ~== interceptR absTol 1E-3) - assert(model.weights ~== weightsR relTol 1E-3) + assert(model1.intercept ~== interceptR1 absTol 1E-3) + assert(model1.weights ~= weightsR1 relTol 1E-3) - model.transform(dataset).select("features", "prediction").collect().foreach { + /* + weights <- coef(glmnet(features, label, family="gaussian", alpha = 0.0, lambda = 2.3, + intercept = FALSE, standardize=FALSE)) + > weights + 3 x 1 sparse Matrix of class "dgCMatrix" + s0 + (Intercept) . + as.numeric.data.V2. 5.263704 + as.numeric.data.V3. 4.187419 + */ + val interceptR2 = 0.0 + val weightsR2 = Vectors.dense(5.263704, 4.187419) + + assert(model2.intercept ~== interceptR2 absTol 1E-3) + assert(model2.weights ~= weightsR2 relTol 1E-3) + + model1.transform(dataset).select("features", "prediction").collect().foreach { case Row(features: DenseVector, prediction1: Double) => val prediction2 = - features(0) * model.weights(0) + features(1) * model.weights(1) + model.intercept + features(0) * model1.weights(0) + features(1) * model1.weights(1) + model1.intercept assert(prediction1 ~== prediction2 relTol 1E-5) } } test("linear regression with intercept with ElasticNet regularization") { - val trainer = (new LinearRegression).setElasticNetParam(0.3).setRegParam(1.6) - val model = trainer.fit(dataset) + val trainer1 = (new LinearRegression).setElasticNetParam(0.3).setRegParam(1.6) + val trainer2 = (new LinearRegression).setElasticNetParam(0.3).setRegParam(1.6) + .setStandardization(false) + val model1 = trainer1.fit(dataset) + val model2 = trainer2.fit(dataset) /* weights <- coef(glmnet(features, label, family="gaussian", alpha = 0.3, lambda = 1.6)) @@ -272,24 +370,43 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { as.numeric.data.V2. 3.168435 as.numeric.data.V3. 5.200403 */ - val interceptR = 5.696056 - val weightsR = Vectors.dense(3.670489, 6.001122) + val interceptR1 = 5.696056 + val weightsR1 = Vectors.dense(3.670489, 6.001122) - assert(model.intercept ~== interceptR relTol 1E-3) - assert(model.weights ~== weightsR relTol 1E-3) + assert(model1.intercept ~== interceptR1 relTol 1E-3) + assert(model1.weights ~= weightsR1 relTol 1E-3) - model.transform(dataset).select("features", "prediction").collect().foreach { + /* + weights <- coef(glmnet(features, label, family="gaussian", alpha = 0.3, lambda = 1.6 + standardize=FALSE)) + > weights + 3 x 1 sparse Matrix of class "dgCMatrix" + s0 + (Intercept) 6.114723 + as.numeric.data.V2. 3.409937 + as.numeric.data.V3. 6.146531 + */ + val interceptR2 = 6.114723 + val weightsR2 = Vectors.dense(3.409937, 6.146531) + + assert(model2.intercept ~== interceptR2 relTol 1E-3) + assert(model2.weights ~= weightsR2 relTol 1E-3) + + model1.transform(dataset).select("features", "prediction").collect().foreach { case Row(features: DenseVector, prediction1: Double) => val prediction2 = - features(0) * model.weights(0) + features(1) * model.weights(1) + model.intercept + features(0) * model1.weights(0) + features(1) * model1.weights(1) + model1.intercept assert(prediction1 ~== prediction2 relTol 1E-5) } } test("linear regression without intercept with ElasticNet regularization") { - val trainer = (new LinearRegression).setElasticNetParam(0.3).setRegParam(1.6) + val trainer1 = (new LinearRegression).setElasticNetParam(0.3).setRegParam(1.6) .setFitIntercept(false) - val model = trainer.fit(dataset) + val trainer2 = (new LinearRegression).setElasticNetParam(0.3).setRegParam(1.6) + .setFitIntercept(false).setStandardization(false) + val model1 = trainer1.fit(dataset) + val model2 = trainer2.fit(dataset) /* weights <- coef(glmnet(features, label, family="gaussian", alpha = 0.3, lambda = 1.6, @@ -301,16 +418,32 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { as.numeric.dataM.V2. 5.673348 as.numeric.dataM.V3. 4.322251 */ - val interceptR = 0.0 - val weightsR = Vectors.dense(5.673348, 4.322251) + val interceptR1 = 0.0 + val weightsR1 = Vectors.dense(5.673348, 4.322251) - assert(model.intercept ~== interceptR absTol 1E-3) - assert(model.weights ~= weightsR relTol 1E-3) + assert(model1.intercept ~== interceptR1 absTol 1E-3) + assert(model1.weights ~= weightsR1 relTol 1E-3) - model.transform(dataset).select("features", "prediction").collect().foreach { + /* + weights <- coef(glmnet(features, label, family="gaussian", alpha = 0.3, lambda = 1.6, + intercept=FALSE, standardize=FALSE)) + > weights + 3 x 1 sparse Matrix of class "dgCMatrix" + s0 + (Intercept) . + as.numeric.data.V2. 5.477988 + as.numeric.data.V3. 4.297622 + */ + val interceptR2 = 0.0 + val weightsR2 = Vectors.dense(5.477988, 4.297622) + + assert(model2.intercept ~== interceptR2 absTol 1E-3) + assert(model2.weights ~= weightsR2 relTol 1E-3) + + model1.transform(dataset).select("features", "prediction").collect().foreach { case Row(features: DenseVector, prediction1: Double) => val prediction2 = - features(0) * model.weights(0) + features(1) * model.weights(1) + model.intercept + features(0) * model1.weights(0) + features(1) * model1.weights(1) + model1.intercept assert(prediction1 ~== prediction2 relTol 1E-5) } } @@ -372,5 +505,4 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { .zip(testSummary.residuals.select("residuals").collect()) .forall { case (Row(r1: Double), Row(r2: Double)) => r1 ~== r2 relTol 1E-5 } } - }