diff options
11 files changed, 141 insertions, 1 deletions
diff --git a/examples/src/main/python/als.py b/examples/src/main/python/als.py index 1a7c4c51f4..c862650b0a 100755 --- a/examples/src/main/python/als.py +++ b/examples/src/main/python/als.py @@ -16,6 +16,9 @@ # """ +This is an example implementation of ALS for learning how to use Spark. Please refer to +ALS in pyspark.mllib.recommendation for more conventional use. + This example requires numpy (http://www.numpy.org/) """ from os.path import realpath @@ -49,9 +52,15 @@ def update(i, vec, mat, ratings): if __name__ == "__main__": + """ Usage: als [M] [U] [F] [iterations] [slices]" """ + + print >> sys.stderr, """WARN: This is a naive implementation of ALS and is given as an + example. Please use the ALS method found in pyspark.mllib.recommendation for more + conventional use.""" + sc = SparkContext(appName="PythonALS") M = int(sys.argv[1]) if len(sys.argv) > 1 else 100 U = int(sys.argv[2]) if len(sys.argv) > 2 else 500 diff --git a/examples/src/main/python/kmeans.py b/examples/src/main/python/kmeans.py index 988fc45baf..036bdf4c4f 100755 --- a/examples/src/main/python/kmeans.py +++ b/examples/src/main/python/kmeans.py @@ -45,9 +45,15 @@ def closestPoint(p, centers): if __name__ == "__main__": + if len(sys.argv) != 4: print >> sys.stderr, "Usage: kmeans <file> <k> <convergeDist>" exit(-1) + + print >> sys.stderr, """WARN: This is a naive implementation of KMeans Clustering and is given + as an example! Please refer to examples/src/main/python/mllib/kmeans.py for an example on + how to use MLlib's KMeans implementation.""" + sc = SparkContext(appName="PythonKMeans") lines = sc.textFile(sys.argv[1]) data = lines.map(parseVector).cache() diff --git a/examples/src/main/python/logistic_regression.py b/examples/src/main/python/logistic_regression.py index 6c33deabfd..8456b272f9 100755 --- a/examples/src/main/python/logistic_regression.py +++ b/examples/src/main/python/logistic_regression.py @@ -47,9 +47,15 @@ def readPointBatch(iterator): return [matrix] if __name__ == "__main__": + if len(sys.argv) != 3: print >> sys.stderr, "Usage: logistic_regression <file> <iterations>" exit(-1) + + print >> sys.stderr, """WARN: This is a naive implementation of Logistic Regression and is + given as an example! Please refer to examples/src/main/python/mllib/logistic_regression.py + to see how MLlib's implementation is used.""" + sc = SparkContext(appName="PythonLR") points = sc.textFile(sys.argv[1]).mapPartitions(readPointBatch).cache() iterations = int(sys.argv[2]) diff --git a/examples/src/main/scala/org/apache/spark/examples/LocalALS.scala b/examples/src/main/scala/org/apache/spark/examples/LocalALS.scala index 658f73d96a..1f576319b3 100644 --- a/examples/src/main/scala/org/apache/spark/examples/LocalALS.scala +++ b/examples/src/main/scala/org/apache/spark/examples/LocalALS.scala @@ -25,6 +25,9 @@ import cern.jet.math._ /** * Alternating least squares matrix factorization. + * + * This is an example implementation for learning how to use Spark. For more conventional use, + * please refer to org.apache.spark.mllib.recommendation.ALS */ object LocalALS { // Parameters set through command line arguments @@ -107,7 +110,16 @@ object LocalALS { solved2D.viewColumn(0) } + def showWarning() { + System.err.println( + """WARN: This is a naive implementation of ALS and is given as an example! + |Please use the ALS method found in org.apache.spark.mllib.recommendation + |for more conventional use. + """.stripMargin) + } + def main(args: Array[String]) { + args match { case Array(m, u, f, iters) => { M = m.toInt @@ -120,6 +132,9 @@ object LocalALS { System.exit(1) } } + + showWarning() + printf("Running with M=%d, U=%d, F=%d, iters=%d\n", M, U, F, ITERATIONS) val R = generateR() diff --git a/examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala b/examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala index 0ef3001ca4..931faac546 100644 --- a/examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala +++ b/examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala @@ -21,6 +21,12 @@ import java.util.Random import breeze.linalg.{Vector, DenseVector} +/** + * Logistic regression based classification. + * + * This is an example implementation for learning how to use Spark. For more conventional use, + * please refer to org.apache.spark.mllib.classification.LogisticRegression + */ object LocalFileLR { val D = 10 // Numer of dimensions val rand = new Random(42) @@ -32,7 +38,18 @@ object LocalFileLR { DataPoint(new DenseVector(nums.slice(1, D + 1)), nums(0)) } + def showWarning() { + System.err.println( + """WARN: This is a naive implementation of Logistic Regression and is given as an example! + |Please use the LogisticRegression method found in org.apache.spark.mllib.classification + |for more conventional use. + """.stripMargin) + } + def main(args: Array[String]) { + + showWarning() + val lines = scala.io.Source.fromFile(args(0)).getLines().toArray val points = lines.map(parsePoint _) val ITERATIONS = args(1).toInt diff --git a/examples/src/main/scala/org/apache/spark/examples/LocalKMeans.scala b/examples/src/main/scala/org/apache/spark/examples/LocalKMeans.scala index e33a1b336d..17624c20cf 100644 --- a/examples/src/main/scala/org/apache/spark/examples/LocalKMeans.scala +++ b/examples/src/main/scala/org/apache/spark/examples/LocalKMeans.scala @@ -28,6 +28,9 @@ import org.apache.spark.SparkContext._ /** * K-means clustering. + * + * This is an example implementation for learning how to use Spark. For more conventional use, + * please refer to org.apache.spark.mllib.clustering.KMeans */ object LocalKMeans { val N = 1000 @@ -61,7 +64,18 @@ object LocalKMeans { bestIndex } + def showWarning() { + System.err.println( + """WARN: This is a naive implementation of KMeans Clustering and is given as an example! + |Please use the KMeans method found in org.apache.spark.mllib.clustering + |for more conventional use. + """.stripMargin) + } + def main(args: Array[String]) { + + showWarning() + val data = generateData var points = new HashSet[Vector[Double]] var kPoints = new HashMap[Int, Vector[Double]] diff --git a/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala b/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala index 385b48089d..2d75b9d259 100644 --- a/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala +++ b/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala @@ -23,6 +23,9 @@ import breeze.linalg.{Vector, DenseVector} /** * Logistic regression based classification. + * + * This is an example implementation for learning how to use Spark. For more conventional use, + * please refer to org.apache.spark.mllib.classification.LogisticRegression */ object LocalLR { val N = 10000 // Number of data points @@ -42,9 +45,19 @@ object LocalLR { Array.tabulate(N)(generatePoint) } + def showWarning() { + System.err.println( + """WARN: This is a naive implementation of Logistic Regression and is given as an example! + |Please use the LogisticRegression method found in org.apache.spark.mllib.classification + |for more conventional use. + """.stripMargin) + } + def main(args: Array[String]) { - val data = generateData + showWarning() + + val data = generateData // Initialize w to a random value var w = DenseVector.fill(D){2 * rand.nextDouble - 1} println("Initial w: " + w) diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala b/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala index 5cbc966bf0..fde8ffeedf 100644 --- a/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala +++ b/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala @@ -27,6 +27,9 @@ import org.apache.spark._ /** * Alternating least squares matrix factorization. + * + * This is an example implementation for learning how to use Spark. For more conventional use, + * please refer to org.apache.spark.mllib.recommendation.ALS */ object SparkALS { // Parameters set through command line arguments @@ -87,7 +90,16 @@ object SparkALS { solved2D.viewColumn(0) } + def showWarning() { + System.err.println( + """WARN: This is a naive implementation of ALS and is given as an example! + |Please use the ALS method found in org.apache.spark.mllib.recommendation + |for more conventional use. + """.stripMargin) + } + def main(args: Array[String]) { + var slices = 0 val options = (0 to 4).map(i => if (i < args.length) Some(args(i)) else None) @@ -103,7 +115,11 @@ object SparkALS { System.err.println("Usage: SparkALS [M] [U] [F] [iters] [slices]") System.exit(1) } + + showWarning() + printf("Running with M=%d, U=%d, F=%d, iters=%d\n", M, U, F, ITERATIONS) + val sparkConf = new SparkConf().setAppName("SparkALS") val sc = new SparkContext(sparkConf) diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala b/examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala index 4906a696e9..d583cf421e 100644 --- a/examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala +++ b/examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala @@ -30,6 +30,9 @@ import org.apache.spark.scheduler.InputFormatInfo /** * Logistic regression based classification. + * + * This is an example implementation for learning how to use Spark. For more conventional use, + * please refer to org.apache.spark.mllib.classification.LogisticRegression */ object SparkHdfsLR { val D = 10 // Numer of dimensions @@ -48,12 +51,23 @@ object SparkHdfsLR { DataPoint(new DenseVector(x), y) } + def showWarning() { + System.err.println( + """WARN: This is a naive implementation of Logistic Regression and is given as an example! + |Please use the LogisticRegression method found in org.apache.spark.mllib.classification + |for more conventional use. + """.stripMargin) + } + def main(args: Array[String]) { + if (args.length < 2) { System.err.println("Usage: SparkHdfsLR <file> <iters>") System.exit(1) } + showWarning() + val sparkConf = new SparkConf().setAppName("SparkHdfsLR") val inputPath = args(0) val conf = SparkHadoopUtil.get.newConfiguration() diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala b/examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala index 79cfedf332..48e8d11cdf 100644 --- a/examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala +++ b/examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala @@ -24,6 +24,9 @@ import org.apache.spark.SparkContext._ /** * K-means clustering. + * + * This is an example implementation for learning how to use Spark. For more conventional use, + * please refer to org.apache.spark.mllib.clustering.KMeans */ object SparkKMeans { @@ -46,11 +49,23 @@ object SparkKMeans { bestIndex } + def showWarning() { + System.err.println( + """WARN: This is a naive implementation of KMeans Clustering and is given as an example! + |Please use the KMeans method found in org.apache.spark.mllib.clustering + |for more conventional use. + """.stripMargin) + } + def main(args: Array[String]) { + if (args.length < 3) { System.err.println("Usage: SparkKMeans <file> <k> <convergeDist>") System.exit(1) } + + showWarning() + val sparkConf = new SparkConf().setAppName("SparkKMeans") val sc = new SparkContext(sparkConf) val lines = sc.textFile(args(0)) diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala b/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala index 99ceb3089e..fc23308fc4 100644 --- a/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala +++ b/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala @@ -28,6 +28,9 @@ import org.apache.spark._ /** * Logistic regression based classification. * Usage: SparkLR [slices] + * + * This is an example implementation for learning how to use Spark. For more conventional use, + * please refer to org.apache.spark.mllib.classification.LogisticRegression */ object SparkLR { val N = 10000 // Number of data points @@ -47,7 +50,18 @@ object SparkLR { Array.tabulate(N)(generatePoint) } + def showWarning() { + System.err.println( + """WARN: This is a naive implementation of Logistic Regression and is given as an example! + |Please use the LogisticRegression method found in org.apache.spark.mllib.classification + |for more conventional use. + """.stripMargin) + } + def main(args: Array[String]) { + + showWarning() + val sparkConf = new SparkConf().setAppName("SparkLR") val sc = new SparkContext(sparkConf) val numSlices = if (args.length > 0) args(0).toInt else 2 @@ -66,6 +80,7 @@ object SparkLR { } println("Final w: " + w) + sc.stop() } } |