From 111a62474a2fb7f4e7f19fcfb8efaae37aa40400 Mon Sep 17 00:00:00 2001 From: Yanbo Liang <ybliang8@gmail.com> Date: Tue, 12 Apr 2016 11:34:40 -0700 Subject: [PATCH] [SPARK-14147][ML][SPARKR] SparkR predict should not output feature column ## What changes were proposed in this pull request? SparkR does not support type of vector which is the default type of feature column in ML. R predict also does not output intermediate feature column. So SparkR ```predict``` should not output feature column. In this PR, I only fix this issue for ```naiveBayes``` and ```survreg```. ```kmeans``` has the right code route already and ```glm``` will be fixed at SparkRWrapper refactor(#12294). ## How was this patch tested? No new tests. cc mengxr shivaram Author: Yanbo Liang <ybliang8@gmail.com> Closes #11958 from yanboliang/spark-14147. --- .../org/apache/spark/ml/r/AFTSurvivalRegressionWrapper.scala | 2 +- .../main/scala/org/apache/spark/ml/r/NaiveBayesWrapper.scala | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/AFTSurvivalRegressionWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/AFTSurvivalRegressionWrapper.scala index 2ae411555f..7835468626 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/r/AFTSurvivalRegressionWrapper.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/r/AFTSurvivalRegressionWrapper.scala @@ -44,7 +44,7 @@ private[r] class AFTSurvivalRegressionWrapper private ( } def transform(dataset: Dataset[_]): DataFrame = { - pipeline.transform(dataset) + pipeline.transform(dataset).drop(aftModel.getFeaturesCol) } } diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/NaiveBayesWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/NaiveBayesWrapper.scala index 2cd709d2ee..b17207e99b 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/r/NaiveBayesWrapper.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/r/NaiveBayesWrapper.scala @@ -37,7 +37,9 @@ private[r] class NaiveBayesWrapper private ( lazy val tables: Array[Double] = naiveBayesModel.theta.toArray.map(math.exp) def transform(dataset: Dataset[_]): DataFrame = { - pipeline.transform(dataset).drop(PREDICTED_LABEL_INDEX_COL) + pipeline.transform(dataset) + .drop(PREDICTED_LABEL_INDEX_COL) + .drop(naiveBayesModel.getFeaturesCol) } } -- GitLab