fixing formatting

author: Ginger Smith <vsmith@berkeley.edu> 2013-08-05 11:22:18 -0700
committer: Ginger Smith <vsmith@berkeley.edu> 2013-08-05 11:22:18 -0700
commit: 8c8947e2b66169dddb828b801ffaa43cc400b8a5 (patch)
tree: a44e0e06e801e4e9fe421e28ec46730825cbca85 /mllib
parent: 4ab4df5edbc1bded810a8a3e1dfc7f8ae40a7c30 (diff)
download: spark-8c8947e2b66169dddb828b801ffaa43cc400b8a5.tar.gz
spark-8c8947e2b66169dddb828b801ffaa43cc400b8a5.tar.bz2
spark-8c8947e2b66169dddb828b801ffaa43cc400b8a5.zip
1 files changed, 23 insertions, 16 deletions
diff --git a/mllib/src/main/scala/spark/mllib/util/MFDataGenerator.scala b/mllib/src/main/scala/spark/mllib/util/MFDataGenerator.scala
index 8637d27cd0..1d2b5c89f0 100644
--- a/mllib/src/main/scala/spark/mllib/util/MFDataGenerator.scala
+++ b/mllib/src/main/scala/spark/mllib/util/MFDataGenerator.scala
@@ -24,25 +24,32 @@ import org.jblas.DoubleMatrix
 import spark.{RDD, SparkContext}
 import spark.mllib.util.MLUtils
 
+/**
+* Generate RDD(s) containing data for Matrix Factorization.
+*
+* This method samples training entries according to the oversampling factor
+* 'tr_samp_fact', which is a multiplicative factor of the number of
+* degrees of freedom of the matrix: rank*(m+n-rank).
+* 
+* It optionally samples entries for a testing matrix using 
+* 'te_samp_fact', the percentage of the number of training entries 
+* to use for testing.
+*
+* This method takes the following inputs:
+* 	sparkMaster 		 (String) The master URL.
+* 	outputPath  		 (String) Directory to save output.
+* 	m 					 		 (Int) Number of rows in data matrix.
+* 	n 							 (Int) Number of columns in data matrix.
+* 	rank 					 (Int) Underlying rank of data matrix.
+* 	tr_samp_fact 	 (Double) Oversampling factor.
+* 	noise 					 (Boolean) Whether to add gaussian noise to training data.
+* 	sigma 					 (Double) Standard deviation of added gaussian noise.
+* 	test 					 (Boolean) Whether to create testing RDD.
+* 	te_samp_fact 	 (Double) Percentage of training data to use as test data.
+*/
 
 object MFDataGenerator{
 
-   /**
-   * Generate RDD(s) containing data for Matrix Factorization. This function chooses
-   * positive labels with probability `probOne` and scales positive examples by `eps`.
-   *
-   * @param sc SparkContext to use for creating the RDD.
-   * @param outputPath Directory to save output.
-   * @param m Number of rows in data matrix.
-   * @param n Number of columns in data matrix.
-   * @param rank Underlying rank of data matrix.
-   * @param tr_samp_fact Oversampling factor.
-   * @param noise Boolean value - whether to add gaussian noise to training data.
-   * @param sigma Standard deviation of added gaussian noise.
-   * @param test Boolean value - whether to create testing RDD.
-   * @param te_samp_fact Percentage of training data to use as test data.
-   */
-
   def main(args: Array[String]) {
     if (args.length != 10) {
       println("Usage: MFGenerator " +
author	Ginger Smith <vsmith@berkeley.edu>	2013-08-05 11:22:18 -0700
committer	Ginger Smith <vsmith@berkeley.edu>	2013-08-05 11:22:18 -0700
commit	8c8947e2b66169dddb828b801ffaa43cc400b8a5 (patch)
tree	a44e0e06e801e4e9fe421e28ec46730825cbca85 /mllib
parent	4ab4df5edbc1bded810a8a3e1dfc7f8ae40a7c30 (diff)
download	spark-8c8947e2b66169dddb828b801ffaa43cc400b8a5.tar.gz spark-8c8947e2b66169dddb828b801ffaa43cc400b8a5.tar.bz2 spark-8c8947e2b66169dddb828b801ffaa43cc400b8a5.zip