From 0698e6c88ca11fdfd6e5498cab784cf6dbcdfacb Mon Sep 17 00:00:00 2001 From: Yanbo Liang <ybliang8@gmail.com> Date: Thu, 11 May 2017 14:48:13 +0800 Subject: [PATCH] [SPARK-20606][ML] Revert "[] ML 2.2 QA: Remove deprecated methods for ML" This reverts commit b8733e0ad9f5a700f385e210450fd2c10137293e. Author: Yanbo Liang <ybliang8@gmail.com> Closes #17944 from yanboliang/spark-20606-revert. --- .../DecisionTreeClassifier.scala | 18 +-- .../ml/classification/GBTClassifier.scala | 24 ++-- .../RandomForestClassifier.scala | 24 ++-- .../ml/regression/DecisionTreeRegressor.scala | 18 +-- .../spark/ml/regression/GBTRegressor.scala | 24 ++-- .../ml/regression/RandomForestRegressor.scala | 24 ++-- .../org/apache/spark/ml/tree/treeParams.scala | 105 ++++++++++++++++++ .../org/apache/spark/ml/util/ReadWrite.scala | 16 +++ project/MimaExcludes.scala | 68 ------------ python/pyspark/ml/util.py | 32 ++++++ 10 files changed, 219 insertions(+), 134 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala index 5fb105c6af..9f60f0896e 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala @@ -54,27 +54,27 @@ class DecisionTreeClassifier @Since("1.4.0") ( /** @group setParam */ @Since("1.4.0") - def setMaxDepth(value: Int): this.type = set(maxDepth, value) + override def setMaxDepth(value: Int): this.type = set(maxDepth, value) /** @group setParam */ @Since("1.4.0") - def setMaxBins(value: Int): this.type = set(maxBins, value) + override def setMaxBins(value: Int): this.type = set(maxBins, value) /** @group setParam */ @Since("1.4.0") - def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value) + override def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value) /** @group setParam */ @Since("1.4.0") - def setMinInfoGain(value: Double): this.type = set(minInfoGain, value) + override def setMinInfoGain(value: Double): this.type = set(minInfoGain, value) /** @group expertSetParam */ @Since("1.4.0") - def setMaxMemoryInMB(value: Int): this.type = set(maxMemoryInMB, value) + override def setMaxMemoryInMB(value: Int): this.type = set(maxMemoryInMB, value) /** @group expertSetParam */ @Since("1.4.0") - def setCacheNodeIds(value: Boolean): this.type = set(cacheNodeIds, value) + override def setCacheNodeIds(value: Boolean): this.type = set(cacheNodeIds, value) /** * Specifies how often to checkpoint the cached node IDs. @@ -86,15 +86,15 @@ class DecisionTreeClassifier @Since("1.4.0") ( * @group setParam */ @Since("1.4.0") - def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value) + override def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value) /** @group setParam */ @Since("1.4.0") - def setImpurity(value: String): this.type = set(impurity, value) + override def setImpurity(value: String): this.type = set(impurity, value) /** @group setParam */ @Since("1.6.0") - def setSeed(value: Long): this.type = set(seed, value) + override def setSeed(value: Long): this.type = set(seed, value) override protected def train(dataset: Dataset[_]): DecisionTreeClassificationModel = { val categoricalFeatures: Map[Int, Int] = diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala index 263ed10f19..ade0960f87 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala @@ -70,27 +70,27 @@ class GBTClassifier @Since("1.4.0") ( /** @group setParam */ @Since("1.4.0") - def setMaxDepth(value: Int): this.type = set(maxDepth, value) + override def setMaxDepth(value: Int): this.type = set(maxDepth, value) /** @group setParam */ @Since("1.4.0") - def setMaxBins(value: Int): this.type = set(maxBins, value) + override def setMaxBins(value: Int): this.type = set(maxBins, value) /** @group setParam */ @Since("1.4.0") - def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value) + override def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value) /** @group setParam */ @Since("1.4.0") - def setMinInfoGain(value: Double): this.type = set(minInfoGain, value) + override def setMinInfoGain(value: Double): this.type = set(minInfoGain, value) /** @group expertSetParam */ @Since("1.4.0") - def setMaxMemoryInMB(value: Int): this.type = set(maxMemoryInMB, value) + override def setMaxMemoryInMB(value: Int): this.type = set(maxMemoryInMB, value) /** @group expertSetParam */ @Since("1.4.0") - def setCacheNodeIds(value: Boolean): this.type = set(cacheNodeIds, value) + override def setCacheNodeIds(value: Boolean): this.type = set(cacheNodeIds, value) /** * Specifies how often to checkpoint the cached node IDs. @@ -102,7 +102,7 @@ class GBTClassifier @Since("1.4.0") ( * @group setParam */ @Since("1.4.0") - def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value) + override def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value) /** * The impurity setting is ignored for GBT models. @@ -111,7 +111,7 @@ class GBTClassifier @Since("1.4.0") ( * @group setParam */ @Since("1.4.0") - def setImpurity(value: String): this.type = { + override def setImpurity(value: String): this.type = { logWarning("GBTClassifier.setImpurity should NOT be used") this } @@ -120,21 +120,21 @@ class GBTClassifier @Since("1.4.0") ( /** @group setParam */ @Since("1.4.0") - def setSubsamplingRate(value: Double): this.type = set(subsamplingRate, value) + override def setSubsamplingRate(value: Double): this.type = set(subsamplingRate, value) /** @group setParam */ @Since("1.4.0") - def setSeed(value: Long): this.type = set(seed, value) + override def setSeed(value: Long): this.type = set(seed, value) // Parameters from GBTParams: /** @group setParam */ @Since("1.4.0") - def setMaxIter(value: Int): this.type = set(maxIter, value) + override def setMaxIter(value: Int): this.type = set(maxIter, value) /** @group setParam */ @Since("1.4.0") - def setStepSize(value: Double): this.type = set(stepSize, value) + override def setStepSize(value: Double): this.type = set(stepSize, value) // Parameters from GBTClassifierParams: diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala index 441cfda899..ab4c235209 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala @@ -56,27 +56,27 @@ class RandomForestClassifier @Since("1.4.0") ( /** @group setParam */ @Since("1.4.0") - def setMaxDepth(value: Int): this.type = set(maxDepth, value) + override def setMaxDepth(value: Int): this.type = set(maxDepth, value) /** @group setParam */ @Since("1.4.0") - def setMaxBins(value: Int): this.type = set(maxBins, value) + override def setMaxBins(value: Int): this.type = set(maxBins, value) /** @group setParam */ @Since("1.4.0") - def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value) + override def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value) /** @group setParam */ @Since("1.4.0") - def setMinInfoGain(value: Double): this.type = set(minInfoGain, value) + override def setMinInfoGain(value: Double): this.type = set(minInfoGain, value) /** @group expertSetParam */ @Since("1.4.0") - def setMaxMemoryInMB(value: Int): this.type = set(maxMemoryInMB, value) + override def setMaxMemoryInMB(value: Int): this.type = set(maxMemoryInMB, value) /** @group expertSetParam */ @Since("1.4.0") - def setCacheNodeIds(value: Boolean): this.type = set(cacheNodeIds, value) + override def setCacheNodeIds(value: Boolean): this.type = set(cacheNodeIds, value) /** * Specifies how often to checkpoint the cached node IDs. @@ -88,31 +88,31 @@ class RandomForestClassifier @Since("1.4.0") ( * @group setParam */ @Since("1.4.0") - def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value) + override def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value) /** @group setParam */ @Since("1.4.0") - def setImpurity(value: String): this.type = set(impurity, value) + override def setImpurity(value: String): this.type = set(impurity, value) // Parameters from TreeEnsembleParams: /** @group setParam */ @Since("1.4.0") - def setSubsamplingRate(value: Double): this.type = set(subsamplingRate, value) + override def setSubsamplingRate(value: Double): this.type = set(subsamplingRate, value) /** @group setParam */ @Since("1.4.0") - def setSeed(value: Long): this.type = set(seed, value) + override def setSeed(value: Long): this.type = set(seed, value) // Parameters from RandomForestParams: /** @group setParam */ @Since("1.4.0") - def setNumTrees(value: Int): this.type = set(numTrees, value) + override def setNumTrees(value: Int): this.type = set(numTrees, value) /** @group setParam */ @Since("1.4.0") - def setFeatureSubsetStrategy(value: String): this.type = + override def setFeatureSubsetStrategy(value: String): this.type = set(featureSubsetStrategy, value) override protected def train(dataset: Dataset[_]): RandomForestClassificationModel = { diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala index c2b0358e84..01c5cc1c7e 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala @@ -53,27 +53,27 @@ class DecisionTreeRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: S // Override parameter setters from parent trait for Java API compatibility. /** @group setParam */ @Since("1.4.0") - def setMaxDepth(value: Int): this.type = set(maxDepth, value) + override def setMaxDepth(value: Int): this.type = set(maxDepth, value) /** @group setParam */ @Since("1.4.0") - def setMaxBins(value: Int): this.type = set(maxBins, value) + override def setMaxBins(value: Int): this.type = set(maxBins, value) /** @group setParam */ @Since("1.4.0") - def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value) + override def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value) /** @group setParam */ @Since("1.4.0") - def setMinInfoGain(value: Double): this.type = set(minInfoGain, value) + override def setMinInfoGain(value: Double): this.type = set(minInfoGain, value) /** @group expertSetParam */ @Since("1.4.0") - def setMaxMemoryInMB(value: Int): this.type = set(maxMemoryInMB, value) + override def setMaxMemoryInMB(value: Int): this.type = set(maxMemoryInMB, value) /** @group expertSetParam */ @Since("1.4.0") - def setCacheNodeIds(value: Boolean): this.type = set(cacheNodeIds, value) + override def setCacheNodeIds(value: Boolean): this.type = set(cacheNodeIds, value) /** * Specifies how often to checkpoint the cached node IDs. @@ -85,15 +85,15 @@ class DecisionTreeRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: S * @group setParam */ @Since("1.4.0") - def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value) + override def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value) /** @group setParam */ @Since("1.4.0") - def setImpurity(value: String): this.type = set(impurity, value) + override def setImpurity(value: String): this.type = set(impurity, value) /** @group setParam */ @Since("1.6.0") - def setSeed(value: Long): this.type = set(seed, value) + override def setSeed(value: Long): this.type = set(seed, value) /** @group setParam */ @Since("2.0.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala index 8d9b519efb..08d175cb94 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala @@ -68,27 +68,27 @@ class GBTRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: String) /** @group setParam */ @Since("1.4.0") - def setMaxDepth(value: Int): this.type = set(maxDepth, value) + override def setMaxDepth(value: Int): this.type = set(maxDepth, value) /** @group setParam */ @Since("1.4.0") - def setMaxBins(value: Int): this.type = set(maxBins, value) + override def setMaxBins(value: Int): this.type = set(maxBins, value) /** @group setParam */ @Since("1.4.0") - def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value) + override def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value) /** @group setParam */ @Since("1.4.0") - def setMinInfoGain(value: Double): this.type = set(minInfoGain, value) + override def setMinInfoGain(value: Double): this.type = set(minInfoGain, value) /** @group expertSetParam */ @Since("1.4.0") - def setMaxMemoryInMB(value: Int): this.type = set(maxMemoryInMB, value) + override def setMaxMemoryInMB(value: Int): this.type = set(maxMemoryInMB, value) /** @group expertSetParam */ @Since("1.4.0") - def setCacheNodeIds(value: Boolean): this.type = set(cacheNodeIds, value) + override def setCacheNodeIds(value: Boolean): this.type = set(cacheNodeIds, value) /** * Specifies how often to checkpoint the cached node IDs. @@ -100,7 +100,7 @@ class GBTRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: String) * @group setParam */ @Since("1.4.0") - def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value) + override def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value) /** * The impurity setting is ignored for GBT models. @@ -109,7 +109,7 @@ class GBTRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: String) * @group setParam */ @Since("1.4.0") - def setImpurity(value: String): this.type = { + override def setImpurity(value: String): this.type = { logWarning("GBTRegressor.setImpurity should NOT be used") this } @@ -118,21 +118,21 @@ class GBTRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: String) /** @group setParam */ @Since("1.4.0") - def setSubsamplingRate(value: Double): this.type = set(subsamplingRate, value) + override def setSubsamplingRate(value: Double): this.type = set(subsamplingRate, value) /** @group setParam */ @Since("1.4.0") - def setSeed(value: Long): this.type = set(seed, value) + override def setSeed(value: Long): this.type = set(seed, value) // Parameters from GBTParams: /** @group setParam */ @Since("1.4.0") - def setMaxIter(value: Int): this.type = set(maxIter, value) + override def setMaxIter(value: Int): this.type = set(maxIter, value) /** @group setParam */ @Since("1.4.0") - def setStepSize(value: Double): this.type = set(stepSize, value) + override def setStepSize(value: Double): this.type = set(stepSize, value) // Parameters from GBTRegressorParams: diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala index 7b9ddf6e95..a58da50fad 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala @@ -55,27 +55,27 @@ class RandomForestRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: S /** @group setParam */ @Since("1.4.0") - def setMaxDepth(value: Int): this.type = set(maxDepth, value) + override def setMaxDepth(value: Int): this.type = set(maxDepth, value) /** @group setParam */ @Since("1.4.0") - def setMaxBins(value: Int): this.type = set(maxBins, value) + override def setMaxBins(value: Int): this.type = set(maxBins, value) /** @group setParam */ @Since("1.4.0") - def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value) + override def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value) /** @group setParam */ @Since("1.4.0") - def setMinInfoGain(value: Double): this.type = set(minInfoGain, value) + override def setMinInfoGain(value: Double): this.type = set(minInfoGain, value) /** @group expertSetParam */ @Since("1.4.0") - def setMaxMemoryInMB(value: Int): this.type = set(maxMemoryInMB, value) + override def setMaxMemoryInMB(value: Int): this.type = set(maxMemoryInMB, value) /** @group expertSetParam */ @Since("1.4.0") - def setCacheNodeIds(value: Boolean): this.type = set(cacheNodeIds, value) + override def setCacheNodeIds(value: Boolean): this.type = set(cacheNodeIds, value) /** * Specifies how often to checkpoint the cached node IDs. @@ -87,31 +87,31 @@ class RandomForestRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: S * @group setParam */ @Since("1.4.0") - def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value) + override def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value) /** @group setParam */ @Since("1.4.0") - def setImpurity(value: String): this.type = set(impurity, value) + override def setImpurity(value: String): this.type = set(impurity, value) // Parameters from TreeEnsembleParams: /** @group setParam */ @Since("1.4.0") - def setSubsamplingRate(value: Double): this.type = set(subsamplingRate, value) + override def setSubsamplingRate(value: Double): this.type = set(subsamplingRate, value) /** @group setParam */ @Since("1.4.0") - def setSeed(value: Long): this.type = set(seed, value) + override def setSeed(value: Long): this.type = set(seed, value) // Parameters from RandomForestParams: /** @group setParam */ @Since("1.4.0") - def setNumTrees(value: Int): this.type = set(numTrees, value) + override def setNumTrees(value: Int): this.type = set(numTrees, value) /** @group setParam */ @Since("1.4.0") - def setFeatureSubsetStrategy(value: String): this.type = + override def setFeatureSubsetStrategy(value: String): this.type = set(featureSubsetStrategy, value) override protected def train(dataset: Dataset[_]): RandomForestRegressionModel = { diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala index 5526d4d75b..cd1950bd76 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala @@ -109,24 +109,80 @@ private[ml] trait DecisionTreeParams extends PredictorParams setDefault(maxDepth -> 5, maxBins -> 32, minInstancesPerNode -> 1, minInfoGain -> 0.0, maxMemoryInMB -> 256, cacheNodeIds -> false, checkpointInterval -> 10) + /** + * @deprecated This method is deprecated and will be removed in 2.2.0. + * @group setParam + */ + @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0") + def setMaxDepth(value: Int): this.type = set(maxDepth, value) + /** @group getParam */ final def getMaxDepth: Int = $(maxDepth) + /** + * @deprecated This method is deprecated and will be removed in 2.2.0. + * @group setParam + */ + @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0") + def setMaxBins(value: Int): this.type = set(maxBins, value) + /** @group getParam */ final def getMaxBins: Int = $(maxBins) + /** + * @deprecated This method is deprecated and will be removed in 2.2.0. + * @group setParam + */ + @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0") + def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value) + /** @group getParam */ final def getMinInstancesPerNode: Int = $(minInstancesPerNode) + /** + * @deprecated This method is deprecated and will be removed in 2.2.0. + * @group setParam + */ + @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0") + def setMinInfoGain(value: Double): this.type = set(minInfoGain, value) + /** @group getParam */ final def getMinInfoGain: Double = $(minInfoGain) + /** + * @deprecated This method is deprecated and will be removed in 2.2.0. + * @group setParam + */ + @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0") + def setSeed(value: Long): this.type = set(seed, value) + + /** + * @deprecated This method is deprecated and will be removed in 2.2.0. + * @group expertSetParam + */ + @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0") + def setMaxMemoryInMB(value: Int): this.type = set(maxMemoryInMB, value) + /** @group expertGetParam */ final def getMaxMemoryInMB: Int = $(maxMemoryInMB) + /** + * @deprecated This method is deprecated and will be removed in 2.2.0. + * @group expertSetParam + */ + @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0") + def setCacheNodeIds(value: Boolean): this.type = set(cacheNodeIds, value) + /** @group expertGetParam */ final def getCacheNodeIds: Boolean = $(cacheNodeIds) + /** + * @deprecated This method is deprecated and will be removed in 2.2.0. + * @group setParam + */ + @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0") + def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value) + /** (private[ml]) Create a Strategy instance to use with the old API. */ private[ml] def getOldStrategy( categoricalFeatures: Map[Int, Int], @@ -169,6 +225,13 @@ private[ml] trait TreeClassifierParams extends Params { setDefault(impurity -> "gini") + /** + * @deprecated This method is deprecated and will be removed in 2.2.0. + * @group setParam + */ + @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0") + def setImpurity(value: String): this.type = set(impurity, value) + /** @group getParam */ final def getImpurity: String = $(impurity).toLowerCase(Locale.ROOT) @@ -213,6 +276,13 @@ private[ml] trait TreeRegressorParams extends Params { setDefault(impurity -> "variance") + /** + * @deprecated This method is deprecated and will be removed in 2.2.0. + * @group setParam + */ + @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0") + def setImpurity(value: String): this.type = set(impurity, value) + /** @group getParam */ final def getImpurity: String = $(impurity).toLowerCase(Locale.ROOT) @@ -268,6 +338,13 @@ private[ml] trait TreeEnsembleParams extends DecisionTreeParams { setDefault(subsamplingRate -> 1.0) + /** + * @deprecated This method is deprecated and will be removed in 2.2.0. + * @group setParam + */ + @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0") + def setSubsamplingRate(value: Double): this.type = set(subsamplingRate, value) + /** @group getParam */ final def getSubsamplingRate: Double = $(subsamplingRate) @@ -305,6 +382,13 @@ private[ml] trait RandomForestParams extends TreeEnsembleParams { setDefault(numTrees -> 20) + /** + * @deprecated This method is deprecated and will be removed in 2.2.0. + * @group setParam + */ + @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0") + def setNumTrees(value: Int): this.type = set(numTrees, value) + /** @group getParam */ final def getNumTrees: Int = $(numTrees) @@ -346,6 +430,13 @@ private[ml] trait RandomForestParams extends TreeEnsembleParams { setDefault(featureSubsetStrategy -> "auto") + /** + * @deprecated This method is deprecated and will be removed in 2.2.0. + * @group setParam + */ + @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0") + def setFeatureSubsetStrategy(value: String): this.type = set(featureSubsetStrategy, value) + /** @group getParam */ final def getFeatureSubsetStrategy: String = $(featureSubsetStrategy).toLowerCase(Locale.ROOT) } @@ -380,6 +471,13 @@ private[ml] trait GBTParams extends TreeEnsembleParams with HasMaxIter { // final val validationTol: DoubleParam = new DoubleParam(this, "validationTol", "") // validationTol -> 1e-5 + /** + * @deprecated This method is deprecated and will be removed in 2.2.0. + * @group setParam + */ + @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0") + def setMaxIter(value: Int): this.type = set(maxIter, value) + /** * Param for Step size (a.k.a. learning rate) in interval (0, 1] for shrinking * the contribution of each estimator. @@ -393,6 +491,13 @@ private[ml] trait GBTParams extends TreeEnsembleParams with HasMaxIter { /** @group getParam */ final def getStepSize: Double = $(stepSize) + /** + * @deprecated This method is deprecated and will be removed in 2.2.0. + * @group setParam + */ + @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0") + def setStepSize(value: Double): this.type = set(stepSize, value) + setDefault(maxIter -> 20, stepSize -> 0.1) /** (private[ml]) Create a BoostingStrategy instance to use with the old API. */ diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala b/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala index f7e570fd5c..a8b80031fa 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala @@ -42,6 +42,16 @@ import org.apache.spark.util.Utils private[util] sealed trait BaseReadWrite { private var optionSparkSession: Option[SparkSession] = None + /** + * Sets the Spark SQLContext to use for saving/loading. + */ + @Since("1.6.0") + @deprecated("Use session instead, This method will be removed in 2.2.0.", "2.0.0") + def context(sqlContext: SQLContext): this.type = { + optionSparkSession = Option(sqlContext.sparkSession) + this + } + /** * Sets the Spark Session to use for saving/loading. */ @@ -120,6 +130,9 @@ abstract class MLWriter extends BaseReadWrite with Logging { // override for Java compatibility override def session(sparkSession: SparkSession): this.type = super.session(sparkSession) + + // override for Java compatibility + override def context(sqlContext: SQLContext): this.type = super.session(sqlContext.sparkSession) } /** @@ -175,6 +188,9 @@ abstract class MLReader[T] extends BaseReadWrite { // override for Java compatibility override def session(sparkSession: SparkSession): this.type = super.session(sparkSession) + + // override for Java compatibility + override def context(sqlContext: SQLContext): this.type = super.session(sqlContext.sparkSession) } /** diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala index 2dff154967..3cc089dced 100644 --- a/project/MimaExcludes.scala +++ b/project/MimaExcludes.scala @@ -1008,74 +1008,6 @@ object MimaExcludes { ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.classification.RandomForestClassificationModel.setFeatureSubsetStrategy"), ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.regression.RandomForestRegressionModel.numTrees"), ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.regression.RandomForestRegressionModel.setFeatureSubsetStrategy") - ) ++ Seq( - // [SPARK-20606] ML 2.2 QA: Remove deprecated methods for ML - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.DecisionTreeClassificationModel.setSeed"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.DecisionTreeClassificationModel.setMinInfoGain"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.DecisionTreeClassificationModel.setCacheNodeIds"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.DecisionTreeClassificationModel.setCheckpointInterval"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.DecisionTreeClassificationModel.setMaxDepth"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.DecisionTreeClassificationModel.setImpurity"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.DecisionTreeClassificationModel.setMaxMemoryInMB"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.DecisionTreeClassificationModel.setMaxBins"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.DecisionTreeClassificationModel.setMinInstancesPerNode"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.GBTClassificationModel.setSeed"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.GBTClassificationModel.setMinInfoGain"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.GBTClassificationModel.setSubsamplingRate"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.GBTClassificationModel.setMaxIter"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.GBTClassificationModel.setCacheNodeIds"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.GBTClassificationModel.setCheckpointInterval"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.GBTClassificationModel.setMaxDepth"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.GBTClassificationModel.setImpurity"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.GBTClassificationModel.setMaxMemoryInMB"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.GBTClassificationModel.setStepSize"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.GBTClassificationModel.setMaxBins"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.GBTClassificationModel.setMinInstancesPerNode"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.RandomForestClassificationModel.setSeed"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.RandomForestClassificationModel.setMinInfoGain"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.RandomForestClassificationModel.setSubsamplingRate"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.RandomForestClassificationModel.setCacheNodeIds"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.RandomForestClassificationModel.setCheckpointInterval"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.RandomForestClassificationModel.setMaxDepth"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.RandomForestClassificationModel.setImpurity"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.RandomForestClassificationModel.setMaxMemoryInMB"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.RandomForestClassificationModel.setFeatureSubsetStrategy"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.RandomForestClassificationModel.setMaxBins"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.RandomForestClassificationModel.setMinInstancesPerNode"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.DecisionTreeRegressionModel.setSeed"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.DecisionTreeRegressionModel.setMinInfoGain"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.DecisionTreeRegressionModel.setCacheNodeIds"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.DecisionTreeRegressionModel.setCheckpointInterval"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.DecisionTreeRegressionModel.setMaxDepth"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.DecisionTreeRegressionModel.setImpurity"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.DecisionTreeRegressionModel.setMaxMemoryInMB"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.DecisionTreeRegressionModel.setMaxBins"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.DecisionTreeRegressionModel.setMinInstancesPerNode"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.GBTRegressionModel.setSeed"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.GBTRegressionModel.setMinInfoGain"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.GBTRegressionModel.setSubsamplingRate"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.GBTRegressionModel.setMaxIter"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.GBTRegressionModel.setCacheNodeIds"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.GBTRegressionModel.setCheckpointInterval"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.GBTRegressionModel.setMaxDepth"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.GBTRegressionModel.setImpurity"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.GBTRegressionModel.setMaxMemoryInMB"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.GBTRegressionModel.setStepSize"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.GBTRegressionModel.setMaxBins"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.GBTRegressionModel.setMinInstancesPerNode"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.RandomForestRegressionModel.setSeed"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.RandomForestRegressionModel.setMinInfoGain"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.RandomForestRegressionModel.setSubsamplingRate"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.RandomForestRegressionModel.setCacheNodeIds"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.RandomForestRegressionModel.setCheckpointInterval"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.RandomForestRegressionModel.setMaxDepth"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.RandomForestRegressionModel.setImpurity"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.RandomForestRegressionModel.setMaxMemoryInMB"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.RandomForestRegressionModel.setFeatureSubsetStrategy"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.RandomForestRegressionModel.setMaxBins"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.RandomForestRegressionModel.setMinInstancesPerNode"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.util.MLWriter.context"), - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.util.MLReader.context") ) } diff --git a/python/pyspark/ml/util.py b/python/pyspark/ml/util.py index 688109ab11..02016f172a 100644 --- a/python/pyspark/ml/util.py +++ b/python/pyspark/ml/util.py @@ -76,6 +76,13 @@ class MLWriter(object): """Overwrites if the output path already exists.""" raise NotImplementedError("MLWriter is not yet implemented for type: %s" % type(self)) + def context(self, sqlContext): + """ + Sets the SQL context to use for saving. + .. note:: Deprecated in 2.1 and will be removed in 2.2, use session instead. + """ + raise NotImplementedError("MLWriter is not yet implemented for type: %s" % type(self)) + def session(self, sparkSession): """Sets the Spark Session to use for saving.""" raise NotImplementedError("MLWriter is not yet implemented for type: %s" % type(self)) @@ -103,6 +110,15 @@ class JavaMLWriter(MLWriter): self._jwrite.overwrite() return self + def context(self, sqlContext): + """ + Sets the SQL context to use for saving. + .. note:: Deprecated in 2.1 and will be removed in 2.2, use session instead. + """ + warnings.warn("Deprecated in 2.1 and will be removed in 2.2, use session instead.") + self._jwrite.context(sqlContext._ssql_ctx) + return self + def session(self, sparkSession): """Sets the Spark Session to use for saving.""" self._jwrite.session(sparkSession._jsparkSession) @@ -149,6 +165,13 @@ class MLReader(object): """Load the ML instance from the input path.""" raise NotImplementedError("MLReader is not yet implemented for type: %s" % type(self)) + def context(self, sqlContext): + """ + Sets the SQL context to use for loading. + .. note:: Deprecated in 2.1 and will be removed in 2.2, use session instead. + """ + raise NotImplementedError("MLReader is not yet implemented for type: %s" % type(self)) + def session(self, sparkSession): """Sets the Spark Session to use for loading.""" raise NotImplementedError("MLReader is not yet implemented for type: %s" % type(self)) @@ -174,6 +197,15 @@ class JavaMLReader(MLReader): % self._clazz) return self._clazz._from_java(java_obj) + def context(self, sqlContext): + """ + Sets the SQL context to use for loading. + .. note:: Deprecated in 2.1 and will be removed in 2.2, use session instead. + """ + warnings.warn("Deprecated in 2.1 and will be removed in 2.2, use session instead.") + self._jread.context(sqlContext._ssql_ctx) + return self + def session(self, sparkSession): """Sets the Spark Session to use for loading.""" self._jread.session(sparkSession._jsparkSession) -- GitLab