From 372fa61f511843f53498b9e843a84cfdd76fa2b2 Mon Sep 17 00:00:00 2001
From: Zheng RuiFeng <ruifengz@foxmail.com>
Date: Sun, 5 Jun 2016 11:55:25 -0700
Subject: [PATCH] [SPARK-15770][ML] Annotation audit for Experimental and
 DeveloperApi

## What changes were proposed in this pull request?
1, remove comments `:: Experimental ::` for non-experimental API
2, add comments `:: Experimental ::` for experimental API
3, add comments `:: DeveloperApi ::` for developerApi API

## How was this patch tested?
manual tests

Author: Zheng RuiFeng <ruifengz@foxmail.com>

Closes #13514 from zhengruifeng/del_experimental.
---
 .../spark/ml/attribute/AttributeType.scala      |  3 +++
 .../org/apache/spark/ml/clustering/LDA.scala    |  4 ++++
 .../mllib/clustering/BisectingKMeans.scala      |  4 ++++
 .../mllib/clustering/BisectingKMeansModel.scala |  2 ++
 .../spark/mllib/clustering/LDAModel.scala       | 17 ++++++++++++++++-
 .../mllib/evaluation/MulticlassMetrics.scala    |  1 -
 .../spark/mllib/evaluation/RankingMetrics.scala |  1 -
 .../spark/mllib/feature/StandardScaler.scala    |  6 ++++++
 .../apache/spark/mllib/random/RandomRDDs.scala  |  5 +++++
 .../spark/mllib/rdd/MLPairRDDFunctions.scala    |  4 ++++
 .../apache/spark/mllib/rdd/RDDFunctions.scala   |  4 ++++
 .../apache/spark/mllib/recommendation/ALS.scala |  1 +
 .../apache/spark/mllib/tree/model/Predict.scala |  1 +
 13 files changed, 50 insertions(+), 3 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeType.scala b/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeType.scala
index 5c7089b491..078fecf088 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeType.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeType.scala
@@ -27,6 +27,9 @@ import org.apache.spark.annotation.DeveloperApi
 @DeveloperApi
 sealed abstract class AttributeType(val name: String)
 
+/**
+ * :: DeveloperApi ::
+ */
 @DeveloperApi
 object AttributeType {
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala
index 5aec692c98..609e50eb49 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala
@@ -675,6 +675,8 @@ class DistributedLDAModel private[ml] (
   private var _checkpointFiles: Array[String] = oldDistributedModel.checkpointFiles
 
   /**
+   * :: DeveloperApi ::
+   *
    * If using checkpointing and [[LDA.keepLastCheckpoint]] is set to true, then there may be
    * saved checkpoint files.  This method is provided so that users can manage those files.
    *
@@ -689,6 +691,8 @@ class DistributedLDAModel private[ml] (
   def getCheckpointFiles: Array[String] = _checkpointFiles
 
   /**
+   * :: DeveloperApi ::
+   *
    * Remove any remaining checkpoint files from training.
    *
    * @see [[getCheckpointFiles]]
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala
index e4bd0dc25e..91edcf2a79 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala
@@ -31,6 +31,8 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.storage.StorageLevel
 
 /**
+ * :: Experimental ::
+ *
  * A bisecting k-means algorithm based on the paper "A comparison of document clustering techniques"
  * by Steinbach, Karypis, and Kumar, with modification to fit Spark.
  * The algorithm starts from a single cluster that contains all points.
@@ -396,6 +398,8 @@ private object BisectingKMeans extends Serializable {
 }
 
 /**
+ * :: Experimental ::
+ *
  * Represents a node in a clustering tree.
  *
  * @param index node index, negative for internal nodes and non-negative for leaf nodes
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala
index b3546a1ee3..11fd940b8b 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala
@@ -32,6 +32,8 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{Row, SparkSession}
 
 /**
+ * :: Experimental ::
+ *
  * Clustering model produced by [[BisectingKMeans]].
  * The prediction is done level-by-level from the root node to a leaf node, and at each node among
  * its children the closest to the input point is selected.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
index 4f07236225..d295826300 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
@@ -425,6 +425,13 @@ class LocalLDAModel private[spark] (
 
 }
 
+/**
+ * :: Experimental ::
+ *
+ * Local (non-distributed) model fitted by [[LDA]].
+ *
+ * This model stores the inferred topics only; it does not store info about the training dataset.
+ */
 @Experimental
 @Since("1.5.0")
 object LocalLDAModel extends Loader[LocalLDAModel] {
@@ -814,7 +821,15 @@ class DistributedLDAModel private[clustering] (
   }
 }
 
-
+/**
+ * :: Experimental ::
+ *
+ * Distributed model fitted by [[LDA]].
+ * This type of model is currently only produced by Expectation-Maximization (EM).
+ *
+ * This model stores the inferred topics, the full training dataset, and the topic distribution
+ * for each training document.
+ */
 @Experimental
 @Since("1.5.0")
 object DistributedLDAModel extends Loader[DistributedLDAModel] {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala
index 719695a338..9a6a8dbdcc 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala
@@ -25,7 +25,6 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.DataFrame
 
 /**
- * ::Experimental::
  * Evaluator for multiclass classification.
  *
  * @param predictionAndLabels an RDD of (prediction, label) pairs.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
index 4ed4a05894..e29b51c3a1 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
@@ -28,7 +28,6 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
 
 /**
- * ::Experimental::
  * Evaluator for ranking algorithms.
  *
  * Java users should use [[RankingMetrics$.of]] to create a [[RankingMetrics]] instance.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala
index ee97045f34..b7d6c60568 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala
@@ -97,6 +97,9 @@ class StandardScalerModel @Since("1.3.0") (
   @Since("1.3.0")
   def this(std: Vector) = this(std, null)
 
+  /**
+   * :: DeveloperApi ::
+   */
   @Since("1.3.0")
   @DeveloperApi
   def setWithMean(withMean: Boolean): this.type = {
@@ -105,6 +108,9 @@ class StandardScalerModel @Since("1.3.0") (
     this
   }
 
+  /**
+   * :: DeveloperApi ::
+   */
   @Since("1.3.0")
   @DeveloperApi
   def setWithStd(withStd: Boolean): this.type = {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala b/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala
index b0a716936a..c2bc1f17cc 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala
@@ -418,6 +418,7 @@ object RandomRDDs {
   }
 
   /**
+   * :: DeveloperApi ::
    * [[RandomRDDs#randomJavaRDD]] with the default seed.
    */
   @DeveloperApi
@@ -431,6 +432,7 @@ object RandomRDDs {
   }
 
   /**
+   * :: DeveloperApi ::
    * [[RandomRDDs#randomJavaRDD]] with the default seed & numPartitions
    */
   @DeveloperApi
@@ -854,6 +856,7 @@ object RandomRDDs {
   }
 
   /**
+   * :: DeveloperApi ::
    * Java-friendly version of [[RandomRDDs#randomVectorRDD]].
    */
   @DeveloperApi
@@ -869,6 +872,7 @@ object RandomRDDs {
   }
 
   /**
+   * :: DeveloperApi ::
    * [[RandomRDDs#randomJavaVectorRDD]] with the default seed.
    */
   @DeveloperApi
@@ -883,6 +887,7 @@ object RandomRDDs {
   }
 
   /**
+   * :: DeveloperApi ::
    * [[RandomRDDs#randomJavaVectorRDD]] with the default number of partitions and the default seed.
    */
   @DeveloperApi
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala b/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala
index 1b93e2d764..af4bc9f290 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala
@@ -25,6 +25,7 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.util.BoundedPriorityQueue
 
 /**
+ * :: DeveloperApi ::
  * Machine learning specific Pair RDD functions.
  */
 @DeveloperApi
@@ -50,6 +51,9 @@ class MLPairRDDFunctions[K: ClassTag, V: ClassTag](self: RDD[(K, V)]) extends Se
   }
 }
 
+/**
+ * :: DeveloperApi ::
+ */
 @DeveloperApi
 object MLPairRDDFunctions {
   /** Implicit conversion from a pair RDD to MLPairRDDFunctions. */
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/rdd/RDDFunctions.scala b/mllib/src/main/scala/org/apache/spark/mllib/rdd/RDDFunctions.scala
index e8a937ffcb..0f7857b8d8 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/rdd/RDDFunctions.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/rdd/RDDFunctions.scala
@@ -24,6 +24,7 @@ import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.rdd.RDD
 
 /**
+ * :: DeveloperApi ::
  * Machine learning specific RDD functions.
  */
 @DeveloperApi
@@ -53,6 +54,9 @@ class RDDFunctions[T: ClassTag](self: RDD[T]) extends Serializable {
 
 }
 
+/**
+ * :: DeveloperApi ::
+ */
 @DeveloperApi
 object RDDFunctions {
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
index 467cb83cd1..cc9ee15738 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
@@ -216,6 +216,7 @@ class ALS private (
   }
 
   /**
+   * :: DeveloperApi ::
    * Set period (in iterations) between checkpoints (default = 10). Checkpointing helps with
    * recovery (when nodes fail) and StackOverflow exceptions caused by long lineage. It also helps
    * with eliminating temporary shuffle files on disk, which can be important when there are many
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Predict.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Predict.scala
index 06ceff19d8..1dbdd2d860 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Predict.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Predict.scala
@@ -20,6 +20,7 @@ package org.apache.spark.mllib.tree.model
 import org.apache.spark.annotation.{DeveloperApi, Since}
 
 /**
+ * :: DeveloperApi ::
  * Predicted value for a node
  * @param predict predicted value
  * @param prob probability of the label (classification only)
-- 
GitLab