diff options
author | Devaraj K <devaraj@apache.org> | 2016-02-22 17:16:56 -0800 |
---|---|---|
committer | Xiangrui Meng <meng@databricks.com> | 2016-02-22 17:16:56 -0800 |
commit | 9f410871ca03f4c04bd965b2e4f80167ce543139 (patch) | |
tree | 8c04aa65938c5dbcea96de42463b625ccc0ef313 /docs | |
parent | 2063781840831469b394313694bfd25cbde2bb1e (diff) | |
download | spark-9f410871ca03f4c04bd965b2e4f80167ce543139.tar.gz spark-9f410871ca03f4c04bd965b2e4f80167ce543139.tar.bz2 spark-9f410871ca03f4c04bd965b2e4f80167ce543139.zip |
[SPARK-13016][DOCUMENTATION] Replace example code in mllib-dimensionality-reduction.md using include_example
Replaced example example code in mllib-dimensionality-reduction.md using
include_example
Author: Devaraj K <devaraj@apache.org>
Closes #11132 from devaraj-kavali/SPARK-13016.
Diffstat (limited to 'docs')
-rw-r--r-- | docs/mllib-dimensionality-reduction.md | 113 |
1 files changed, 5 insertions, 108 deletions
diff --git a/docs/mllib-dimensionality-reduction.md b/docs/mllib-dimensionality-reduction.md index 11d8e0bd1d..cceddce9f7 100644 --- a/docs/mllib-dimensionality-reduction.md +++ b/docs/mllib-dimensionality-reduction.md @@ -64,19 +64,7 @@ passes, $O(n)$ storage on each executor, and $O(n k)$ storage on the driver. <div data-lang="scala" markdown="1"> Refer to the [`SingularValueDecomposition` Scala docs](api/scala/index.html#org.apache.spark.mllib.linalg.SingularValueDecomposition) for details on the API. -{% highlight scala %} -import org.apache.spark.mllib.linalg.Matrix -import org.apache.spark.mllib.linalg.distributed.RowMatrix -import org.apache.spark.mllib.linalg.SingularValueDecomposition - -val mat: RowMatrix = ... - -// Compute the top 20 singular values and corresponding singular vectors. -val svd: SingularValueDecomposition[RowMatrix, Matrix] = mat.computeSVD(20, computeU = true) -val U: RowMatrix = svd.U // The U factor is a RowMatrix. -val s: Vector = svd.s // The singular values are stored in a local dense vector. -val V: Matrix = svd.V // The V factor is a local dense matrix. -{% endhighlight %} +{% include_example scala/org/apache/spark/examples/mllib/SVDExample.scala %} The same code applies to `IndexedRowMatrix` if `U` is defined as an `IndexedRowMatrix`. @@ -84,43 +72,7 @@ The same code applies to `IndexedRowMatrix` if `U` is defined as an <div data-lang="java" markdown="1"> Refer to the [`SingularValueDecomposition` Java docs](api/java/org/apache/spark/mllib/linalg/SingularValueDecomposition.html) for details on the API. -{% highlight java %} -import java.util.LinkedList; - -import org.apache.spark.api.java.*; -import org.apache.spark.mllib.linalg.distributed.RowMatrix; -import org.apache.spark.mllib.linalg.Matrix; -import org.apache.spark.mllib.linalg.SingularValueDecomposition; -import org.apache.spark.mllib.linalg.Vector; -import org.apache.spark.mllib.linalg.Vectors; -import org.apache.spark.rdd.RDD; -import org.apache.spark.SparkConf; -import org.apache.spark.SparkContext; - -public class SVD { - public static void main(String[] args) { - SparkConf conf = new SparkConf().setAppName("SVD Example"); - SparkContext sc = new SparkContext(conf); - - double[][] array = ... - LinkedList<Vector> rowsList = new LinkedList<Vector>(); - for (int i = 0; i < array.length; i++) { - Vector currentRow = Vectors.dense(array[i]); - rowsList.add(currentRow); - } - JavaRDD<Vector> rows = JavaSparkContext.fromSparkContext(sc).parallelize(rowsList); - - // Create a RowMatrix from JavaRDD<Vector>. - RowMatrix mat = new RowMatrix(rows.rdd()); - - // Compute the top 4 singular values and corresponding singular vectors. - SingularValueDecomposition<RowMatrix, Matrix> svd = mat.computeSVD(4, true, 1.0E-9d); - RowMatrix U = svd.U(); - Vector s = svd.s(); - Matrix V = svd.V(); - } -} -{% endhighlight %} +{% include_example java/org/apache/spark/examples/mllib/JavaSVDExample.java %} The same code applies to `IndexedRowMatrix` if `U` is defined as an `IndexedRowMatrix`. @@ -151,36 +103,14 @@ and use them to project the vectors into a low-dimensional space. Refer to the [`RowMatrix` Scala docs](api/scala/index.html#org.apache.spark.mllib.linalg.distributed.RowMatrix) for details on the API. -{% highlight scala %} -import org.apache.spark.mllib.linalg.Matrix -import org.apache.spark.mllib.linalg.distributed.RowMatrix - -val mat: RowMatrix = ... - -// Compute the top 10 principal components. -val pc: Matrix = mat.computePrincipalComponents(10) // Principal components are stored in a local dense matrix. - -// Project the rows to the linear space spanned by the top 10 principal components. -val projected: RowMatrix = mat.multiply(pc) -{% endhighlight %} +{% include_example scala/org/apache/spark/examples/mllib/PCAOnRowMatrixExample.scala %} The following code demonstrates how to compute principal components on source vectors and use them to project the vectors into a low-dimensional space while keeping associated labels: Refer to the [`PCA` Scala docs](api/scala/index.html#org.apache.spark.mllib.feature.PCA) for details on the API. -{% highlight scala %} -import org.apache.spark.mllib.regression.LabeledPoint -import org.apache.spark.mllib.feature.PCA - -val data: RDD[LabeledPoint] = ... - -// Compute the top 10 principal components. -val pca = new PCA(10).fit(data.map(_.features)) - -// Project vectors to the linear space spanned by the top 10 principal components, keeping the label -val projected = data.map(p => p.copy(features = pca.transform(p.features))) -{% endhighlight %} +{% include_example scala/org/apache/spark/examples/mllib/PCAOnSourceVectorExample.scala %} </div> @@ -192,40 +122,7 @@ The number of columns should be small, e.g, less than 1000. Refer to the [`RowMatrix` Java docs](api/java/org/apache/spark/mllib/linalg/distributed/RowMatrix.html) for details on the API. -{% highlight java %} -import java.util.LinkedList; - -import org.apache.spark.api.java.*; -import org.apache.spark.mllib.linalg.distributed.RowMatrix; -import org.apache.spark.mllib.linalg.Matrix; -import org.apache.spark.mllib.linalg.Vector; -import org.apache.spark.mllib.linalg.Vectors; -import org.apache.spark.rdd.RDD; -import org.apache.spark.SparkConf; -import org.apache.spark.SparkContext; - -public class PCA { - public static void main(String[] args) { - SparkConf conf = new SparkConf().setAppName("PCA Example"); - SparkContext sc = new SparkContext(conf); - - double[][] array = ... - LinkedList<Vector> rowsList = new LinkedList<Vector>(); - for (int i = 0; i < array.length; i++) { - Vector currentRow = Vectors.dense(array[i]); - rowsList.add(currentRow); - } - JavaRDD<Vector> rows = JavaSparkContext.fromSparkContext(sc).parallelize(rowsList); - - // Create a RowMatrix from JavaRDD<Vector>. - RowMatrix mat = new RowMatrix(rows.rdd()); - - // Compute the top 3 principal components. - Matrix pc = mat.computePrincipalComponents(3); - RowMatrix projected = mat.multiply(pc); - } -} -{% endhighlight %} +{% include_example java/org/apache/spark/examples/mllib/JavaPCAExample.java %} </div> </div> |