Skip to content
Snippets Groups Projects
Commit f1d8871c authored by Andrew Ash's avatar Andrew Ash
Browse files

Uniform whitespace across scala examples

parent adba773f
No related branches found
No related tags found
No related merge requests found
...@@ -10,73 +10,73 @@ import scala.collection.mutable.HashSet ...@@ -10,73 +10,73 @@ import scala.collection.mutable.HashSet
* K-means clustering. * K-means clustering.
*/ */
object LocalKMeans { object LocalKMeans {
val N = 1000 val N = 1000
val R = 1000 // Scaling factor val R = 1000 // Scaling factor
val D = 10 val D = 10
val K = 10 val K = 10
val convergeDist = 0.001 val convergeDist = 0.001
val rand = new Random(42) val rand = new Random(42)
def generateData = { def generateData = {
def generatePoint(i: Int) = { def generatePoint(i: Int) = {
Vector(D, _ => rand.nextDouble * R) Vector(D, _ => rand.nextDouble * R)
} }
Array.tabulate(N)(generatePoint) Array.tabulate(N)(generatePoint)
} }
def closestPoint(p: Vector, centers: HashMap[Int, Vector]): Int = { def closestPoint(p: Vector, centers: HashMap[Int, Vector]): Int = {
var index = 0 var index = 0
var bestIndex = 0 var bestIndex = 0
var closest = Double.PositiveInfinity var closest = Double.PositiveInfinity
for (i <- 1 to centers.size) { for (i <- 1 to centers.size) {
val vCurr = centers.get(i).get val vCurr = centers.get(i).get
val tempDist = p.squaredDist(vCurr) val tempDist = p.squaredDist(vCurr)
if (tempDist < closest) { if (tempDist < closest) {
closest = tempDist closest = tempDist
bestIndex = i bestIndex = i
} }
} }
return bestIndex return bestIndex
} }
def main(args: Array[String]) { def main(args: Array[String]) {
val data = generateData val data = generateData
var points = new HashSet[Vector] var points = new HashSet[Vector]
var kPoints = new HashMap[Int, Vector] var kPoints = new HashMap[Int, Vector]
var tempDist = 1.0 var tempDist = 1.0
while (points.size < K) { while (points.size < K) {
points.add(data(rand.nextInt(N))) points.add(data(rand.nextInt(N)))
} }
val iter = points.iterator val iter = points.iterator
for (i <- 1 to points.size) { for (i <- 1 to points.size) {
kPoints.put(i, iter.next()) kPoints.put(i, iter.next())
} }
println("Initial centers: " + kPoints) println("Initial centers: " + kPoints)
while(tempDist > convergeDist) { while(tempDist > convergeDist) {
var closest = data.map (p => (closestPoint(p, kPoints), (p, 1))) var closest = data.map (p => (closestPoint(p, kPoints), (p, 1)))
var mappings = closest.groupBy[Int] (x => x._1) var mappings = closest.groupBy[Int] (x => x._1)
var pointStats = mappings.map(pair => pair._2.reduceLeft [(Int, (Vector, Int))] {case ((id1, (x1, y1)), (id2, (x2, y2))) => (id1, (x1 + x2, y1+y2))}) var pointStats = mappings.map(pair => pair._2.reduceLeft [(Int, (Vector, Int))] {case ((id1, (x1, y1)), (id2, (x2, y2))) => (id1, (x1 + x2, y1+y2))})
var newPoints = pointStats.map {mapping => (mapping._1, mapping._2._1/mapping._2._2)} var newPoints = pointStats.map {mapping => (mapping._1, mapping._2._1/mapping._2._2)}
tempDist = 0.0 tempDist = 0.0
for (mapping <- newPoints) { for (mapping <- newPoints) {
tempDist += kPoints.get(mapping._1).get.squaredDist(mapping._2) tempDist += kPoints.get(mapping._1).get.squaredDist(mapping._2)
} }
for (newP <- newPoints) { for (newP <- newPoints) {
kPoints.put(newP._1, newP._2) kPoints.put(newP._1, newP._2)
} }
} }
println("Final centers: " + kPoints) println("Final centers: " + kPoints)
} }
} }
...@@ -8,7 +8,7 @@ object MultiBroadcastTest { ...@@ -8,7 +8,7 @@ object MultiBroadcastTest {
System.err.println("Usage: BroadcastTest <master> [<slices>] [numElem]") System.err.println("Usage: BroadcastTest <master> [<slices>] [numElem]")
System.exit(1) System.exit(1)
} }
val sc = new SparkContext(args(0), "Broadcast Test", val sc = new SparkContext(args(0), "Broadcast Test",
System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR"))) System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")))
...@@ -19,7 +19,7 @@ object MultiBroadcastTest { ...@@ -19,7 +19,7 @@ object MultiBroadcastTest {
for (i <- 0 until arr1.length) { for (i <- 0 until arr1.length) {
arr1(i) = i arr1(i) = i
} }
var arr2 = new Array[Int](num) var arr2 = new Array[Int](num)
for (i <- 0 until arr2.length) { for (i <- 0 until arr2.length) {
arr2(i) = i arr2(i) = i
...@@ -30,7 +30,7 @@ object MultiBroadcastTest { ...@@ -30,7 +30,7 @@ object MultiBroadcastTest {
sc.parallelize(1 to 10, slices).foreach { sc.parallelize(1 to 10, slices).foreach {
i => println(barr1.value.size + barr2.value.size) i => println(barr1.value.size + barr2.value.size)
} }
System.exit(0) System.exit(0)
} }
} }
...@@ -11,7 +11,7 @@ object SimpleSkewedGroupByTest { ...@@ -11,7 +11,7 @@ object SimpleSkewedGroupByTest {
"[numMappers] [numKVPairs] [valSize] [numReducers] [ratio]") "[numMappers] [numKVPairs] [valSize] [numReducers] [ratio]")
System.exit(1) System.exit(1)
} }
var numMappers = if (args.length > 1) args(1).toInt else 2 var numMappers = if (args.length > 1) args(1).toInt else 2
var numKVPairs = if (args.length > 2) args(2).toInt else 1000 var numKVPairs = if (args.length > 2) args(2).toInt else 1000
var valSize = if (args.length > 3) args(3).toInt else 1000 var valSize = if (args.length > 3) args(3).toInt else 1000
...@@ -20,7 +20,7 @@ object SimpleSkewedGroupByTest { ...@@ -20,7 +20,7 @@ object SimpleSkewedGroupByTest {
val sc = new SparkContext(args(0), "GroupBy Test", val sc = new SparkContext(args(0), "GroupBy Test",
System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR"))) System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")))
val pairs1 = sc.parallelize(0 until numMappers, numMappers).flatMap { p => val pairs1 = sc.parallelize(0 until numMappers, numMappers).flatMap { p =>
val ranGen = new Random val ranGen = new Random
var result = new Array[(Int, Array[Byte])](numKVPairs) var result = new Array[(Int, Array[Byte])](numKVPairs)
......
...@@ -10,7 +10,7 @@ object SkewedGroupByTest { ...@@ -10,7 +10,7 @@ object SkewedGroupByTest {
System.err.println("Usage: GroupByTest <master> [numMappers] [numKVPairs] [KeySize] [numReducers]") System.err.println("Usage: GroupByTest <master> [numMappers] [numKVPairs] [KeySize] [numReducers]")
System.exit(1) System.exit(1)
} }
var numMappers = if (args.length > 1) args(1).toInt else 2 var numMappers = if (args.length > 1) args(1).toInt else 2
var numKVPairs = if (args.length > 2) args(2).toInt else 1000 var numKVPairs = if (args.length > 2) args(2).toInt else 1000
var valSize = if (args.length > 3) args(3).toInt else 1000 var valSize = if (args.length > 3) args(3).toInt else 1000
...@@ -18,7 +18,7 @@ object SkewedGroupByTest { ...@@ -18,7 +18,7 @@ object SkewedGroupByTest {
val sc = new SparkContext(args(0), "GroupBy Test", val sc = new SparkContext(args(0), "GroupBy Test",
System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR"))) System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")))
val pairs1 = sc.parallelize(0 until numMappers, numMappers).flatMap { p => val pairs1 = sc.parallelize(0 until numMappers, numMappers).flatMap { p =>
val ranGen = new Random val ranGen = new Random
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment