aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
authorPatrick Wendell <pwendell@gmail.com>2014-02-09 10:09:19 -0800
committerPatrick Wendell <pwendell@gmail.com>2014-02-09 10:09:19 -0800
commitb69f8b2a01669851c656739b6886efe4cddef31a (patch)
tree9ed0b8a6883f0ae64ce1d3f5c206306731a5f93f /mllib
parentb6dba10ae59215b5c4e40f7632563f592f138c87 (diff)
downloadspark-b69f8b2a01669851c656739b6886efe4cddef31a.tar.gz
spark-b69f8b2a01669851c656739b6886efe4cddef31a.tar.bz2
spark-b69f8b2a01669851c656739b6886efe4cddef31a.zip
Merge pull request #557 from ScrapCodes/style. Closes #557.
SPARK-1058, Fix Style Errors and Add Scala Style to Spark Build. Author: Patrick Wendell <pwendell@gmail.com> Author: Prashant Sharma <scrapcodes@gmail.com> == Merge branch commits == commit 1a8bd1c059b842cb95cc246aaea74a79fec684f4 Author: Prashant Sharma <scrapcodes@gmail.com> Date: Sun Feb 9 17:39:07 2014 +0530 scala style fixes commit f91709887a8e0b608c5c2b282db19b8a44d53a43 Author: Patrick Wendell <pwendell@gmail.com> Date: Fri Jan 24 11:22:53 2014 -0800 Adding scalastyle snapshot
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala3
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/linalg/SVD.scala8
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala2
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala20
4 files changed, 18 insertions, 15 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
index efc0eb9353..efe99a31be 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
@@ -106,7 +106,8 @@ class PythonMLLibAPI extends Serializable {
bytes
}
- private def trainRegressionModel(trainFunc: (RDD[LabeledPoint], Array[Double]) => GeneralizedLinearModel,
+ private def trainRegressionModel(
+ trainFunc: (RDD[LabeledPoint], Array[Double]) => GeneralizedLinearModel,
dataBytesJRDD: JavaRDD[Array[Byte]], initialWeightsBA: Array[Byte]):
java.util.LinkedList[java.lang.Object] = {
val data = dataBytesJRDD.rdd.map(xBytes => {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/SVD.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/SVD.scala
index e476b53450..8803c4c1a0 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/SVD.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/SVD.scala
@@ -105,7 +105,7 @@ object SVD {
cols.flatMap{ case (colind1, mval1) =>
cols.map{ case (colind2, mval2) =>
((colind1, colind2), mval1*mval2) } }
- }.reduceByKey(_+_)
+ }.reduceByKey(_ + _)
// Construct jblas A^T A locally
val ata = DoubleMatrix.zeros(n, n)
@@ -145,10 +145,10 @@ object SVD {
// Multiply A by VS^-1
val aCols = data.map(entry => (entry.j, (entry.i, entry.mval)))
val bRows = vsirdd.map(entry => (entry._1._1, (entry._1._2, entry._2)))
- val retUdata = aCols.join(bRows).map( {case (key, ( (rowInd, rowVal), (colInd, colVal)) )
- => ((rowInd, colInd), rowVal*colVal)}).reduceByKey(_+_)
+ val retUdata = aCols.join(bRows).map( {case (key, ( (rowInd, rowVal), (colInd, colVal)))
+ => ((rowInd, colInd), rowVal*colVal)}).reduceByKey(_ + _)
.map{ case ((row, col), mval) => MatrixEntry(row, col, mval)}
- val retU = SparseMatrix(retUdata, m, sigma.length)
+ val retU = SparseMatrix(retUdata, m, sigma.length)
MatrixSVD(retU, retS, retV)
}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
index b77364e08d..cd80134737 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
@@ -142,7 +142,7 @@ object GradientDescent extends Logging {
var regVal = 0.0
for (i <- 1 to numIterations) {
- val (gradientSum, lossSum) = data.sample(false, miniBatchFraction, 42+i).map {
+ val (gradientSum, lossSum) = data.sample(false, miniBatchFraction, 42 + i).map {
case (y, features) =>
val featuresCol = new DoubleMatrix(features.length, 1, features:_*)
val (grad, loss) = gradient.compute(featuresCol, y, weights)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
index c5f64b1350..a990e0fb01 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
@@ -84,8 +84,9 @@ case class Rating(val user: Int, val product: Int, val rating: Double)
* [[http://research.yahoo.com/pub/2433]], adapted for the blocked approach used here.
*
* Essentially instead of finding the low-rank approximations to the rating matrix `R`,
- * this finds the approximations for a preference matrix `P` where the elements of `P` are 1 if r > 0
- * and 0 if r = 0. The ratings then act as 'confidence' values related to strength of indicated user
+ * this finds the approximations for a preference matrix `P` where the elements of `P` are 1 if
+ * r > 0 and 0 if r = 0. The ratings then act as 'confidence' values related to strength of
+ * indicated user
* preferences rather than explicit ratings given to items.
*/
class ALS private (var numBlocks: Int, var rank: Int, var iterations: Int, var lambda: Double,
@@ -152,8 +153,8 @@ class ALS private (var numBlocks: Int, var rank: Int, var iterations: Int, var l
val (userInLinks, userOutLinks) = makeLinkRDDs(numBlocks, ratingsByUserBlock)
val (productInLinks, productOutLinks) = makeLinkRDDs(numBlocks, ratingsByProductBlock)
- // Initialize user and product factors randomly, but use a deterministic seed for each partition
- // so that fault recovery works
+ // Initialize user and product factors randomly, but use a deterministic seed for each
+ // partition so that fault recovery works
val seedGen = new Random()
val seed1 = seedGen.nextInt()
val seed2 = seedGen.nextInt()
@@ -268,7 +269,8 @@ class ALS private (var numBlocks: Int, var rank: Int, var iterations: Int, var l
val groupedRatings = blockRatings(productBlock).groupBy(_.product).toArray
// Sort them by product ID
val ordering = new Ordering[(Int, ArrayBuffer[Rating])] {
- def compare(a: (Int, ArrayBuffer[Rating]), b: (Int, ArrayBuffer[Rating])): Int = a._1 - b._1
+ def compare(a: (Int, ArrayBuffer[Rating]), b: (Int, ArrayBuffer[Rating])): Int =
+ a._1 - b._1
}
Sorting.quickSort(groupedRatings)(ordering)
// Translate the user IDs to indices based on userIdToPos
@@ -369,7 +371,8 @@ class ALS private (var numBlocks: Int, var rank: Int, var iterations: Int, var l
val tempXtX = DoubleMatrix.zeros(triangleSize)
val fullXtX = DoubleMatrix.zeros(rank, rank)
- // Compute the XtX and Xy values for each user by adding products it rated in each product block
+ // Compute the XtX and Xy values for each user by adding products it rated in each product
+ // block
for (productBlock <- 0 until numBlocks) {
for (p <- 0 until blockFactors(productBlock).length) {
val x = new DoubleMatrix(blockFactors(productBlock)(p))
@@ -544,9 +547,8 @@ object ALS {
* @param iterations number of iterations of ALS (recommended: 10-20)
* @param lambda regularization factor (recommended: 0.01)
*/
- def trainImplicit(ratings: RDD[Rating], rank: Int, iterations: Int, lambda: Double, alpha: Double)
- : MatrixFactorizationModel =
- {
+ def trainImplicit(ratings: RDD[Rating], rank: Int, iterations: Int, lambda: Double,
+ alpha: Double): MatrixFactorizationModel = {
trainImplicit(ratings, rank, iterations, lambda, -1, alpha)
}