aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xexamples/src/main/python/als.py9
-rwxr-xr-xexamples/src/main/python/kmeans.py6
-rwxr-xr-xexamples/src/main/python/logistic_regression.py6
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/LocalALS.scala15
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala17
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/LocalKMeans.scala14
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/LocalLR.scala15
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/SparkALS.scala16
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala14
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala15
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/SparkLR.scala15
11 files changed, 141 insertions, 1 deletions
diff --git a/examples/src/main/python/als.py b/examples/src/main/python/als.py
index 1a7c4c51f4..c862650b0a 100755
--- a/examples/src/main/python/als.py
+++ b/examples/src/main/python/als.py
@@ -16,6 +16,9 @@
#
"""
+This is an example implementation of ALS for learning how to use Spark. Please refer to
+ALS in pyspark.mllib.recommendation for more conventional use.
+
This example requires numpy (http://www.numpy.org/)
"""
from os.path import realpath
@@ -49,9 +52,15 @@ def update(i, vec, mat, ratings):
if __name__ == "__main__":
+
"""
Usage: als [M] [U] [F] [iterations] [slices]"
"""
+
+ print >> sys.stderr, """WARN: This is a naive implementation of ALS and is given as an
+ example. Please use the ALS method found in pyspark.mllib.recommendation for more
+ conventional use."""
+
sc = SparkContext(appName="PythonALS")
M = int(sys.argv[1]) if len(sys.argv) > 1 else 100
U = int(sys.argv[2]) if len(sys.argv) > 2 else 500
diff --git a/examples/src/main/python/kmeans.py b/examples/src/main/python/kmeans.py
index 988fc45baf..036bdf4c4f 100755
--- a/examples/src/main/python/kmeans.py
+++ b/examples/src/main/python/kmeans.py
@@ -45,9 +45,15 @@ def closestPoint(p, centers):
if __name__ == "__main__":
+
if len(sys.argv) != 4:
print >> sys.stderr, "Usage: kmeans <file> <k> <convergeDist>"
exit(-1)
+
+ print >> sys.stderr, """WARN: This is a naive implementation of KMeans Clustering and is given
+ as an example! Please refer to examples/src/main/python/mllib/kmeans.py for an example on
+ how to use MLlib's KMeans implementation."""
+
sc = SparkContext(appName="PythonKMeans")
lines = sc.textFile(sys.argv[1])
data = lines.map(parseVector).cache()
diff --git a/examples/src/main/python/logistic_regression.py b/examples/src/main/python/logistic_regression.py
index 6c33deabfd..8456b272f9 100755
--- a/examples/src/main/python/logistic_regression.py
+++ b/examples/src/main/python/logistic_regression.py
@@ -47,9 +47,15 @@ def readPointBatch(iterator):
return [matrix]
if __name__ == "__main__":
+
if len(sys.argv) != 3:
print >> sys.stderr, "Usage: logistic_regression <file> <iterations>"
exit(-1)
+
+ print >> sys.stderr, """WARN: This is a naive implementation of Logistic Regression and is
+ given as an example! Please refer to examples/src/main/python/mllib/logistic_regression.py
+ to see how MLlib's implementation is used."""
+
sc = SparkContext(appName="PythonLR")
points = sc.textFile(sys.argv[1]).mapPartitions(readPointBatch).cache()
iterations = int(sys.argv[2])
diff --git a/examples/src/main/scala/org/apache/spark/examples/LocalALS.scala b/examples/src/main/scala/org/apache/spark/examples/LocalALS.scala
index 658f73d96a..1f576319b3 100644
--- a/examples/src/main/scala/org/apache/spark/examples/LocalALS.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/LocalALS.scala
@@ -25,6 +25,9 @@ import cern.jet.math._
/**
* Alternating least squares matrix factorization.
+ *
+ * This is an example implementation for learning how to use Spark. For more conventional use,
+ * please refer to org.apache.spark.mllib.recommendation.ALS
*/
object LocalALS {
// Parameters set through command line arguments
@@ -107,7 +110,16 @@ object LocalALS {
solved2D.viewColumn(0)
}
+ def showWarning() {
+ System.err.println(
+ """WARN: This is a naive implementation of ALS and is given as an example!
+ |Please use the ALS method found in org.apache.spark.mllib.recommendation
+ |for more conventional use.
+ """.stripMargin)
+ }
+
def main(args: Array[String]) {
+
args match {
case Array(m, u, f, iters) => {
M = m.toInt
@@ -120,6 +132,9 @@ object LocalALS {
System.exit(1)
}
}
+
+ showWarning()
+
printf("Running with M=%d, U=%d, F=%d, iters=%d\n", M, U, F, ITERATIONS)
val R = generateR()
diff --git a/examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala b/examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala
index 0ef3001ca4..931faac546 100644
--- a/examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala
@@ -21,6 +21,12 @@ import java.util.Random
import breeze.linalg.{Vector, DenseVector}
+/**
+ * Logistic regression based classification.
+ *
+ * This is an example implementation for learning how to use Spark. For more conventional use,
+ * please refer to org.apache.spark.mllib.classification.LogisticRegression
+ */
object LocalFileLR {
val D = 10 // Numer of dimensions
val rand = new Random(42)
@@ -32,7 +38,18 @@ object LocalFileLR {
DataPoint(new DenseVector(nums.slice(1, D + 1)), nums(0))
}
+ def showWarning() {
+ System.err.println(
+ """WARN: This is a naive implementation of Logistic Regression and is given as an example!
+ |Please use the LogisticRegression method found in org.apache.spark.mllib.classification
+ |for more conventional use.
+ """.stripMargin)
+ }
+
def main(args: Array[String]) {
+
+ showWarning()
+
val lines = scala.io.Source.fromFile(args(0)).getLines().toArray
val points = lines.map(parsePoint _)
val ITERATIONS = args(1).toInt
diff --git a/examples/src/main/scala/org/apache/spark/examples/LocalKMeans.scala b/examples/src/main/scala/org/apache/spark/examples/LocalKMeans.scala
index e33a1b336d..17624c20cf 100644
--- a/examples/src/main/scala/org/apache/spark/examples/LocalKMeans.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/LocalKMeans.scala
@@ -28,6 +28,9 @@ import org.apache.spark.SparkContext._
/**
* K-means clustering.
+ *
+ * This is an example implementation for learning how to use Spark. For more conventional use,
+ * please refer to org.apache.spark.mllib.clustering.KMeans
*/
object LocalKMeans {
val N = 1000
@@ -61,7 +64,18 @@ object LocalKMeans {
bestIndex
}
+ def showWarning() {
+ System.err.println(
+ """WARN: This is a naive implementation of KMeans Clustering and is given as an example!
+ |Please use the KMeans method found in org.apache.spark.mllib.clustering
+ |for more conventional use.
+ """.stripMargin)
+ }
+
def main(args: Array[String]) {
+
+ showWarning()
+
val data = generateData
var points = new HashSet[Vector[Double]]
var kPoints = new HashMap[Int, Vector[Double]]
diff --git a/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala b/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala
index 385b48089d..2d75b9d259 100644
--- a/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala
@@ -23,6 +23,9 @@ import breeze.linalg.{Vector, DenseVector}
/**
* Logistic regression based classification.
+ *
+ * This is an example implementation for learning how to use Spark. For more conventional use,
+ * please refer to org.apache.spark.mllib.classification.LogisticRegression
*/
object LocalLR {
val N = 10000 // Number of data points
@@ -42,9 +45,19 @@ object LocalLR {
Array.tabulate(N)(generatePoint)
}
+ def showWarning() {
+ System.err.println(
+ """WARN: This is a naive implementation of Logistic Regression and is given as an example!
+ |Please use the LogisticRegression method found in org.apache.spark.mllib.classification
+ |for more conventional use.
+ """.stripMargin)
+ }
+
def main(args: Array[String]) {
- val data = generateData
+ showWarning()
+
+ val data = generateData
// Initialize w to a random value
var w = DenseVector.fill(D){2 * rand.nextDouble - 1}
println("Initial w: " + w)
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala b/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala
index 5cbc966bf0..fde8ffeedf 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala
@@ -27,6 +27,9 @@ import org.apache.spark._
/**
* Alternating least squares matrix factorization.
+ *
+ * This is an example implementation for learning how to use Spark. For more conventional use,
+ * please refer to org.apache.spark.mllib.recommendation.ALS
*/
object SparkALS {
// Parameters set through command line arguments
@@ -87,7 +90,16 @@ object SparkALS {
solved2D.viewColumn(0)
}
+ def showWarning() {
+ System.err.println(
+ """WARN: This is a naive implementation of ALS and is given as an example!
+ |Please use the ALS method found in org.apache.spark.mllib.recommendation
+ |for more conventional use.
+ """.stripMargin)
+ }
+
def main(args: Array[String]) {
+
var slices = 0
val options = (0 to 4).map(i => if (i < args.length) Some(args(i)) else None)
@@ -103,7 +115,11 @@ object SparkALS {
System.err.println("Usage: SparkALS [M] [U] [F] [iters] [slices]")
System.exit(1)
}
+
+ showWarning()
+
printf("Running with M=%d, U=%d, F=%d, iters=%d\n", M, U, F, ITERATIONS)
+
val sparkConf = new SparkConf().setAppName("SparkALS")
val sc = new SparkContext(sparkConf)
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala b/examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala
index 4906a696e9..d583cf421e 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala
@@ -30,6 +30,9 @@ import org.apache.spark.scheduler.InputFormatInfo
/**
* Logistic regression based classification.
+ *
+ * This is an example implementation for learning how to use Spark. For more conventional use,
+ * please refer to org.apache.spark.mllib.classification.LogisticRegression
*/
object SparkHdfsLR {
val D = 10 // Numer of dimensions
@@ -48,12 +51,23 @@ object SparkHdfsLR {
DataPoint(new DenseVector(x), y)
}
+ def showWarning() {
+ System.err.println(
+ """WARN: This is a naive implementation of Logistic Regression and is given as an example!
+ |Please use the LogisticRegression method found in org.apache.spark.mllib.classification
+ |for more conventional use.
+ """.stripMargin)
+ }
+
def main(args: Array[String]) {
+
if (args.length < 2) {
System.err.println("Usage: SparkHdfsLR <file> <iters>")
System.exit(1)
}
+ showWarning()
+
val sparkConf = new SparkConf().setAppName("SparkHdfsLR")
val inputPath = args(0)
val conf = SparkHadoopUtil.get.newConfiguration()
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala b/examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala
index 79cfedf332..48e8d11cdf 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala
@@ -24,6 +24,9 @@ import org.apache.spark.SparkContext._
/**
* K-means clustering.
+ *
+ * This is an example implementation for learning how to use Spark. For more conventional use,
+ * please refer to org.apache.spark.mllib.clustering.KMeans
*/
object SparkKMeans {
@@ -46,11 +49,23 @@ object SparkKMeans {
bestIndex
}
+ def showWarning() {
+ System.err.println(
+ """WARN: This is a naive implementation of KMeans Clustering and is given as an example!
+ |Please use the KMeans method found in org.apache.spark.mllib.clustering
+ |for more conventional use.
+ """.stripMargin)
+ }
+
def main(args: Array[String]) {
+
if (args.length < 3) {
System.err.println("Usage: SparkKMeans <file> <k> <convergeDist>")
System.exit(1)
}
+
+ showWarning()
+
val sparkConf = new SparkConf().setAppName("SparkKMeans")
val sc = new SparkContext(sparkConf)
val lines = sc.textFile(args(0))
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala b/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala
index 99ceb3089e..fc23308fc4 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala
@@ -28,6 +28,9 @@ import org.apache.spark._
/**
* Logistic regression based classification.
* Usage: SparkLR [slices]
+ *
+ * This is an example implementation for learning how to use Spark. For more conventional use,
+ * please refer to org.apache.spark.mllib.classification.LogisticRegression
*/
object SparkLR {
val N = 10000 // Number of data points
@@ -47,7 +50,18 @@ object SparkLR {
Array.tabulate(N)(generatePoint)
}
+ def showWarning() {
+ System.err.println(
+ """WARN: This is a naive implementation of Logistic Regression and is given as an example!
+ |Please use the LogisticRegression method found in org.apache.spark.mllib.classification
+ |for more conventional use.
+ """.stripMargin)
+ }
+
def main(args: Array[String]) {
+
+ showWarning()
+
val sparkConf = new SparkConf().setAppName("SparkLR")
val sc = new SparkContext(sparkConf)
val numSlices = if (args.length > 0) args(0).toInt else 2
@@ -66,6 +80,7 @@ object SparkLR {
}
println("Final w: " + w)
+
sc.stop()
}
}