diff --git a/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeType.scala b/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeType.scala index 5c7089b491677eebaa2f7b6c7f4ddbf5c3d29a20..078fecf08828285a0c6dd0323523acd741c6a300 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeType.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeType.scala @@ -27,6 +27,9 @@ import org.apache.spark.annotation.DeveloperApi @DeveloperApi sealed abstract class AttributeType(val name: String) +/** + * :: DeveloperApi :: + */ @DeveloperApi object AttributeType { diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala index 5aec692c98e6e2c2672a3ec33e8d0b0948b1eccb..609e50eb494cec51f276dfa5685bc8281b8d8e1b 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala @@ -675,6 +675,8 @@ class DistributedLDAModel private[ml] ( private var _checkpointFiles: Array[String] = oldDistributedModel.checkpointFiles /** + * :: DeveloperApi :: + * * If using checkpointing and [[LDA.keepLastCheckpoint]] is set to true, then there may be * saved checkpoint files. This method is provided so that users can manage those files. * @@ -689,6 +691,8 @@ class DistributedLDAModel private[ml] ( def getCheckpointFiles: Array[String] = _checkpointFiles /** + * :: DeveloperApi :: + * * Remove any remaining checkpoint files from training. * * @see [[getCheckpointFiles]] diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala index e4bd0dc25ee54cbfaadb0de946e769b907e7698a..91edcf2a7925b154c82ae13910d577640b0ee072 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala @@ -31,6 +31,8 @@ import org.apache.spark.rdd.RDD import org.apache.spark.storage.StorageLevel /** + * :: Experimental :: + * * A bisecting k-means algorithm based on the paper "A comparison of document clustering techniques" * by Steinbach, Karypis, and Kumar, with modification to fit Spark. * The algorithm starts from a single cluster that contains all points. @@ -396,6 +398,8 @@ private object BisectingKMeans extends Serializable { } /** + * :: Experimental :: + * * Represents a node in a clustering tree. * * @param index node index, negative for internal nodes and non-negative for leaf nodes diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala index b3546a1ee36772ed79602d6a27183c42e9faf4b9..11fd940b8b205b6dffd7a6e6533fbd061ab0800f 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala @@ -32,6 +32,8 @@ import org.apache.spark.rdd.RDD import org.apache.spark.sql.{Row, SparkSession} /** + * :: Experimental :: + * * Clustering model produced by [[BisectingKMeans]]. * The prediction is done level-by-level from the root node to a leaf node, and at each node among * its children the closest to the input point is selected. diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala index 4f07236225cd249500e68ce308c182b89b6b6c26..d295826300419afa21ed49542acf78bae4ce6757 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala @@ -425,6 +425,13 @@ class LocalLDAModel private[spark] ( } +/** + * :: Experimental :: + * + * Local (non-distributed) model fitted by [[LDA]]. + * + * This model stores the inferred topics only; it does not store info about the training dataset. + */ @Experimental @Since("1.5.0") object LocalLDAModel extends Loader[LocalLDAModel] { @@ -814,7 +821,15 @@ class DistributedLDAModel private[clustering] ( } } - +/** + * :: Experimental :: + * + * Distributed model fitted by [[LDA]]. + * This type of model is currently only produced by Expectation-Maximization (EM). + * + * This model stores the inferred topics, the full training dataset, and the topic distribution + * for each training document. + */ @Experimental @Since("1.5.0") object DistributedLDAModel extends Loader[DistributedLDAModel] { diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala index 719695a3380df82e5899368749d467b1f1fae25f..9a6a8dbdccbf3764f2cdb18797fd80f182aced8d 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala @@ -25,7 +25,6 @@ import org.apache.spark.rdd.RDD import org.apache.spark.sql.DataFrame /** - * ::Experimental:: * Evaluator for multiclass classification. * * @param predictionAndLabels an RDD of (prediction, label) pairs. diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala index 4ed4a058945c36ee8d9a4097c5c1efbc688ef322..e29b51c3a19da17c00050c13020e36940ac55184 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala @@ -28,7 +28,6 @@ import org.apache.spark.internal.Logging import org.apache.spark.rdd.RDD /** - * ::Experimental:: * Evaluator for ranking algorithms. * * Java users should use [[RankingMetrics$.of]] to create a [[RankingMetrics]] instance. diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala index ee97045f34dc8a16987584241e1d7a23f370bbb8..b7d6c6056803a8bccef7e687810728673e85460a 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala @@ -97,6 +97,9 @@ class StandardScalerModel @Since("1.3.0") ( @Since("1.3.0") def this(std: Vector) = this(std, null) + /** + * :: DeveloperApi :: + */ @Since("1.3.0") @DeveloperApi def setWithMean(withMean: Boolean): this.type = { @@ -105,6 +108,9 @@ class StandardScalerModel @Since("1.3.0") ( this } + /** + * :: DeveloperApi :: + */ @Since("1.3.0") @DeveloperApi def setWithStd(withStd: Boolean): this.type = { diff --git a/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala b/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala index b0a716936ae6f2c0f7230e8ffff883c1bc501533..c2bc1f17ccd58ce5749653dc648aa182bbe1ea6d 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala @@ -418,6 +418,7 @@ object RandomRDDs { } /** + * :: DeveloperApi :: * [[RandomRDDs#randomJavaRDD]] with the default seed. */ @DeveloperApi @@ -431,6 +432,7 @@ object RandomRDDs { } /** + * :: DeveloperApi :: * [[RandomRDDs#randomJavaRDD]] with the default seed & numPartitions */ @DeveloperApi @@ -854,6 +856,7 @@ object RandomRDDs { } /** + * :: DeveloperApi :: * Java-friendly version of [[RandomRDDs#randomVectorRDD]]. */ @DeveloperApi @@ -869,6 +872,7 @@ object RandomRDDs { } /** + * :: DeveloperApi :: * [[RandomRDDs#randomJavaVectorRDD]] with the default seed. */ @DeveloperApi @@ -883,6 +887,7 @@ object RandomRDDs { } /** + * :: DeveloperApi :: * [[RandomRDDs#randomJavaVectorRDD]] with the default number of partitions and the default seed. */ @DeveloperApi diff --git a/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala b/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala index 1b93e2d764c690880f8e5ce8267e053fcd9179f6..af4bc9f290c3e850b8e13d295631db0c9ab1961e 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala @@ -25,6 +25,7 @@ import org.apache.spark.rdd.RDD import org.apache.spark.util.BoundedPriorityQueue /** + * :: DeveloperApi :: * Machine learning specific Pair RDD functions. */ @DeveloperApi @@ -50,6 +51,9 @@ class MLPairRDDFunctions[K: ClassTag, V: ClassTag](self: RDD[(K, V)]) extends Se } } +/** + * :: DeveloperApi :: + */ @DeveloperApi object MLPairRDDFunctions { /** Implicit conversion from a pair RDD to MLPairRDDFunctions. */ diff --git a/mllib/src/main/scala/org/apache/spark/mllib/rdd/RDDFunctions.scala b/mllib/src/main/scala/org/apache/spark/mllib/rdd/RDDFunctions.scala index e8a937ffcb96ffaa3d3927b278768a6789e7e8e8..0f7857b8d862727e4908bfd8373fd9a6ead41f52 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/rdd/RDDFunctions.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/rdd/RDDFunctions.scala @@ -24,6 +24,7 @@ import org.apache.spark.annotation.DeveloperApi import org.apache.spark.rdd.RDD /** + * :: DeveloperApi :: * Machine learning specific RDD functions. */ @DeveloperApi @@ -53,6 +54,9 @@ class RDDFunctions[T: ClassTag](self: RDD[T]) extends Serializable { } +/** + * :: DeveloperApi :: + */ @DeveloperApi object RDDFunctions { diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala index 467cb83cd166213cdacf1b5c279e3cfd53063aad..cc9ee15738ad6509fe210277de54461fc89fc8bc 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala @@ -216,6 +216,7 @@ class ALS private ( } /** + * :: DeveloperApi :: * Set period (in iterations) between checkpoints (default = 10). Checkpointing helps with * recovery (when nodes fail) and StackOverflow exceptions caused by long lineage. It also helps * with eliminating temporary shuffle files on disk, which can be important when there are many diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Predict.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Predict.scala index 06ceff19d863367fc181591f749540fb63a7103c..1dbdd2d860efdb2c4c60695e6cafcd905518194d 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Predict.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Predict.scala @@ -20,6 +20,7 @@ package org.apache.spark.mllib.tree.model import org.apache.spark.annotation.{DeveloperApi, Since} /** + * :: DeveloperApi :: * Predicted value for a node * @param predict predicted value * @param prob probability of the label (classification only)