diff --git a/docs/mllib-guide.md b/docs/mllib-guide.md index 26350ce1069c7a8ffa6cd567d09664e93c4fd6c6..89ac64a086ea01b39db16069f3a2511b91f9eaad 100644 --- a/docs/mllib-guide.md +++ b/docs/mllib-guide.md @@ -476,8 +476,8 @@ import org.apache.spark.mllib.linalg.MatrixEntry // Load and parse the data file val data = sc.textFile("mllib/data/als/test.data").map { line => - val parts = line.split(',') - MatrixEntry(parts(0).toInt, parts(1).toInt, parts(2).toDouble) + val parts = line.split(',') + MatrixEntry(parts(0).toInt, parts(1).toInt, parts(2).toDouble) } val m = 4 val n = 4 diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/MatrixEntry.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/MatrixEntry.scala index c7f2abab9750ce9d79c4fd11f6299a5314ec5f61..416996fcbe7606ce8a05d3bf9af8eddc32e5fff5 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/MatrixEntry.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/MatrixEntry.scala @@ -20,8 +20,8 @@ package org.apache.spark.mllib.linalg /** * Class that represents an entry in a sparse matrix of doubles. * - * @param i row index (1 indexing used) - * @param j column index (1 indexing used) + * @param i row index (0 indexing used) + * @param j column index (0 indexing used) * @param mval value of entry in matrix */ case class MatrixEntry(val i: Int, val j: Int, val mval: Double) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/SVDecomposedMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/MatrixSVD.scala similarity index 100% rename from mllib/src/main/scala/org/apache/spark/mllib/linalg/SVDecomposedMatrix.scala rename to mllib/src/main/scala/org/apache/spark/mllib/linalg/MatrixSVD.scala diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/SVD.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/SVD.scala index 6590e8f357d70e042e5fa149eecc69713bcc153a..ba7a0fde77d1c70b34a6ff925b54283e9d89b6a9 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/SVD.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/SVD.scala @@ -49,7 +49,7 @@ class SVD { /** * Top-level methods for calling Singular Value Decomposition - * NOTE: All matrices are in 1-indexed sparse format RDD[((int, int), value)] + * NOTE: All matrices are in 0-indexed sparse format RDD[((int, int), value)] */ object SVD { /** @@ -73,7 +73,7 @@ object SVD { * U is m x k and satisfies U'U = eye(k) * V is n x k and satisfies V'V = eye(k) * - * All input and output is expected in sparse matrix format, 1-indexed + * All input and output is expected in sparse matrix format, 0-indexed * as tuples of the form ((i,j),value) all in RDDs using the * SparseMatrix class * @@ -110,7 +110,7 @@ object SVD { // Construct jblas A^T A locally val ata = DoubleMatrix.zeros(n, n) for (entry <- emits.toArray) { - ata.put(entry._1._1 - 1, entry._1._2 - 1, entry._2) + ata.put(entry._1._1, entry._1._2, entry._2) } // Since A^T A is small, we can compute its SVD directly @@ -129,18 +129,18 @@ object SVD { // prepare V for returning val retVdata = sc.makeRDD( Array.tabulate(V.rows, sigma.length){ (i,j) => - MatrixEntry(i + 1, j + 1, V.get(i,j)) }.flatten) + MatrixEntry(i, j, V.get(i,j)) }.flatten) val retV = SparseMatrix(retVdata, V.rows, sigma.length) val retSdata = sc.makeRDD(Array.tabulate(sigma.length){ - x => MatrixEntry(x + 1, x + 1, sigma(x))}) + x => MatrixEntry(x, x, sigma(x))}) val retS = SparseMatrix(retSdata, sigma.length, sigma.length) // Compute U as U = A V S^-1 // turn V S^-1 into an RDD as a sparse matrix val vsirdd = sc.makeRDD(Array.tabulate(V.rows, sigma.length) - { (i,j) => ((i + 1, j + 1), V.get(i,j) / sigma(j)) }.flatten) + { (i,j) => ((i, j), V.get(i,j) / sigma(j)) }.flatten) // Multiply A by VS^-1 val aCols = data.map(entry => (entry.j, (entry.i, entry.mval))) diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/SVDSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/SVDSuite.scala index f239e8505ff1a53960e20203e2de421650687099..12b3801722e93629fec873a50ec68a0bf1b2dd2c 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/SVDSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/SVDSuite.scala @@ -50,7 +50,7 @@ class SVDSuite extends FunSuite with BeforeAndAfterAll { val m = matrix.m val n = matrix.n val ret = DoubleMatrix.zeros(m, n) - matrix.data.toArray.map(x => ret.put(x.i - 1, x.j - 1, x.mval)) + matrix.data.toArray.map(x => ret.put(x.i, x.j, x.mval)) ret } @@ -68,7 +68,7 @@ class SVDSuite extends FunSuite with BeforeAndAfterAll { val m = 10 val n = 3 val data = sc.makeRDD(Array.tabulate(m,n){ (a, b) => - MatrixEntry(a + 1, b + 1, (a + 2).toDouble * (b + 1) / (1 + a + b)) }.flatten ) + MatrixEntry(a, b, (a + 2).toDouble * (b + 1) / (1 + a + b)) }.flatten ) val a = SparseMatrix(data, m, n) @@ -97,7 +97,7 @@ class SVDSuite extends FunSuite with BeforeAndAfterAll { val m = 10 val n = 3 val data = sc.makeRDD(Array.tabulate(m, n){ (a,b) => - MatrixEntry(a + 1, b + 1, 1.0) }.flatten ) + MatrixEntry(a, b, 1.0) }.flatten ) val k = 1 val a = SparseMatrix(data, m, n) @@ -130,7 +130,7 @@ class SVDSuite extends FunSuite with BeforeAndAfterAll { val m = 10 val n = 3 val data = sc.makeRDD(Array.tabulate(m,n){ (a, b) => - MatrixEntry(a + 1, b + 1, (a + 2).toDouble * (b + 1)/(1 + a + b)) }.flatten ) + MatrixEntry(a, b, (a + 2).toDouble * (b + 1)/(1 + a + b)) }.flatten ) val a = SparseMatrix(data, m, n) val k = 1 // only one svalue above this