Skip to content
Snippets Groups Projects
Commit 372fa61f authored by Zheng RuiFeng's avatar Zheng RuiFeng Committed by Reynold Xin
Browse files

[SPARK-15770][ML] Annotation audit for Experimental and DeveloperApi

## What changes were proposed in this pull request?
1, remove comments `:: Experimental ::` for non-experimental API
2, add comments `:: Experimental ::` for experimental API
3, add comments `:: DeveloperApi ::` for developerApi API

## How was this patch tested?
manual tests

Author: Zheng RuiFeng <ruifengz@foxmail.com>

Closes #13514 from zhengruifeng/del_experimental.
parent 4e767d0f
No related branches found
No related tags found
No related merge requests found
Showing
with 50 additions and 3 deletions
......@@ -27,6 +27,9 @@ import org.apache.spark.annotation.DeveloperApi
@DeveloperApi
sealed abstract class AttributeType(val name: String)
/**
* :: DeveloperApi ::
*/
@DeveloperApi
object AttributeType {
......
......@@ -675,6 +675,8 @@ class DistributedLDAModel private[ml] (
private var _checkpointFiles: Array[String] = oldDistributedModel.checkpointFiles
/**
* :: DeveloperApi ::
*
* If using checkpointing and [[LDA.keepLastCheckpoint]] is set to true, then there may be
* saved checkpoint files. This method is provided so that users can manage those files.
*
......@@ -689,6 +691,8 @@ class DistributedLDAModel private[ml] (
def getCheckpointFiles: Array[String] = _checkpointFiles
/**
* :: DeveloperApi ::
*
* Remove any remaining checkpoint files from training.
*
* @see [[getCheckpointFiles]]
......
......@@ -31,6 +31,8 @@ import org.apache.spark.rdd.RDD
import org.apache.spark.storage.StorageLevel
/**
* :: Experimental ::
*
* A bisecting k-means algorithm based on the paper "A comparison of document clustering techniques"
* by Steinbach, Karypis, and Kumar, with modification to fit Spark.
* The algorithm starts from a single cluster that contains all points.
......@@ -396,6 +398,8 @@ private object BisectingKMeans extends Serializable {
}
/**
* :: Experimental ::
*
* Represents a node in a clustering tree.
*
* @param index node index, negative for internal nodes and non-negative for leaf nodes
......
......@@ -32,6 +32,8 @@ import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{Row, SparkSession}
/**
* :: Experimental ::
*
* Clustering model produced by [[BisectingKMeans]].
* The prediction is done level-by-level from the root node to a leaf node, and at each node among
* its children the closest to the input point is selected.
......
......@@ -425,6 +425,13 @@ class LocalLDAModel private[spark] (
}
/**
* :: Experimental ::
*
* Local (non-distributed) model fitted by [[LDA]].
*
* This model stores the inferred topics only; it does not store info about the training dataset.
*/
@Experimental
@Since("1.5.0")
object LocalLDAModel extends Loader[LocalLDAModel] {
......@@ -814,7 +821,15 @@ class DistributedLDAModel private[clustering] (
}
}
/**
* :: Experimental ::
*
* Distributed model fitted by [[LDA]].
* This type of model is currently only produced by Expectation-Maximization (EM).
*
* This model stores the inferred topics, the full training dataset, and the topic distribution
* for each training document.
*/
@Experimental
@Since("1.5.0")
object DistributedLDAModel extends Loader[DistributedLDAModel] {
......
......@@ -25,7 +25,6 @@ import org.apache.spark.rdd.RDD
import org.apache.spark.sql.DataFrame
/**
* ::Experimental::
* Evaluator for multiclass classification.
*
* @param predictionAndLabels an RDD of (prediction, label) pairs.
......
......@@ -28,7 +28,6 @@ import org.apache.spark.internal.Logging
import org.apache.spark.rdd.RDD
/**
* ::Experimental::
* Evaluator for ranking algorithms.
*
* Java users should use [[RankingMetrics$.of]] to create a [[RankingMetrics]] instance.
......
......@@ -97,6 +97,9 @@ class StandardScalerModel @Since("1.3.0") (
@Since("1.3.0")
def this(std: Vector) = this(std, null)
/**
* :: DeveloperApi ::
*/
@Since("1.3.0")
@DeveloperApi
def setWithMean(withMean: Boolean): this.type = {
......@@ -105,6 +108,9 @@ class StandardScalerModel @Since("1.3.0") (
this
}
/**
* :: DeveloperApi ::
*/
@Since("1.3.0")
@DeveloperApi
def setWithStd(withStd: Boolean): this.type = {
......
......@@ -418,6 +418,7 @@ object RandomRDDs {
}
/**
* :: DeveloperApi ::
* [[RandomRDDs#randomJavaRDD]] with the default seed.
*/
@DeveloperApi
......@@ -431,6 +432,7 @@ object RandomRDDs {
}
/**
* :: DeveloperApi ::
* [[RandomRDDs#randomJavaRDD]] with the default seed & numPartitions
*/
@DeveloperApi
......@@ -854,6 +856,7 @@ object RandomRDDs {
}
/**
* :: DeveloperApi ::
* Java-friendly version of [[RandomRDDs#randomVectorRDD]].
*/
@DeveloperApi
......@@ -869,6 +872,7 @@ object RandomRDDs {
}
/**
* :: DeveloperApi ::
* [[RandomRDDs#randomJavaVectorRDD]] with the default seed.
*/
@DeveloperApi
......@@ -883,6 +887,7 @@ object RandomRDDs {
}
/**
* :: DeveloperApi ::
* [[RandomRDDs#randomJavaVectorRDD]] with the default number of partitions and the default seed.
*/
@DeveloperApi
......
......@@ -25,6 +25,7 @@ import org.apache.spark.rdd.RDD
import org.apache.spark.util.BoundedPriorityQueue
/**
* :: DeveloperApi ::
* Machine learning specific Pair RDD functions.
*/
@DeveloperApi
......@@ -50,6 +51,9 @@ class MLPairRDDFunctions[K: ClassTag, V: ClassTag](self: RDD[(K, V)]) extends Se
}
}
/**
* :: DeveloperApi ::
*/
@DeveloperApi
object MLPairRDDFunctions {
/** Implicit conversion from a pair RDD to MLPairRDDFunctions. */
......
......@@ -24,6 +24,7 @@ import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.rdd.RDD
/**
* :: DeveloperApi ::
* Machine learning specific RDD functions.
*/
@DeveloperApi
......@@ -53,6 +54,9 @@ class RDDFunctions[T: ClassTag](self: RDD[T]) extends Serializable {
}
/**
* :: DeveloperApi ::
*/
@DeveloperApi
object RDDFunctions {
......
......@@ -216,6 +216,7 @@ class ALS private (
}
/**
* :: DeveloperApi ::
* Set period (in iterations) between checkpoints (default = 10). Checkpointing helps with
* recovery (when nodes fail) and StackOverflow exceptions caused by long lineage. It also helps
* with eliminating temporary shuffle files on disk, which can be important when there are many
......
......@@ -20,6 +20,7 @@ package org.apache.spark.mllib.tree.model
import org.apache.spark.annotation.{DeveloperApi, Since}
/**
* :: DeveloperApi ::
* Predicted value for a node
* @param predict predicted value
* @param prob probability of the label (classification only)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment