aboutsummaryrefslogtreecommitdiff
path: root/docs/mllib-dimensionality-reduction.md
diff options
context:
space:
mode:
authorDevaraj K <devaraj@apache.org>2016-02-22 17:16:56 -0800
committerXiangrui Meng <meng@databricks.com>2016-02-22 17:16:56 -0800
commit9f410871ca03f4c04bd965b2e4f80167ce543139 (patch)
tree8c04aa65938c5dbcea96de42463b625ccc0ef313 /docs/mllib-dimensionality-reduction.md
parent2063781840831469b394313694bfd25cbde2bb1e (diff)
downloadspark-9f410871ca03f4c04bd965b2e4f80167ce543139.tar.gz
spark-9f410871ca03f4c04bd965b2e4f80167ce543139.tar.bz2
spark-9f410871ca03f4c04bd965b2e4f80167ce543139.zip
[SPARK-13016][DOCUMENTATION] Replace example code in mllib-dimensionality-reduction.md using include_example
Replaced example example code in mllib-dimensionality-reduction.md using include_example Author: Devaraj K <devaraj@apache.org> Closes #11132 from devaraj-kavali/SPARK-13016.
Diffstat (limited to 'docs/mllib-dimensionality-reduction.md')
-rw-r--r--docs/mllib-dimensionality-reduction.md113
1 files changed, 5 insertions, 108 deletions
diff --git a/docs/mllib-dimensionality-reduction.md b/docs/mllib-dimensionality-reduction.md
index 11d8e0bd1d..cceddce9f7 100644
--- a/docs/mllib-dimensionality-reduction.md
+++ b/docs/mllib-dimensionality-reduction.md
@@ -64,19 +64,7 @@ passes, $O(n)$ storage on each executor, and $O(n k)$ storage on the driver.
<div data-lang="scala" markdown="1">
Refer to the [`SingularValueDecomposition` Scala docs](api/scala/index.html#org.apache.spark.mllib.linalg.SingularValueDecomposition) for details on the API.
-{% highlight scala %}
-import org.apache.spark.mllib.linalg.Matrix
-import org.apache.spark.mllib.linalg.distributed.RowMatrix
-import org.apache.spark.mllib.linalg.SingularValueDecomposition
-
-val mat: RowMatrix = ...
-
-// Compute the top 20 singular values and corresponding singular vectors.
-val svd: SingularValueDecomposition[RowMatrix, Matrix] = mat.computeSVD(20, computeU = true)
-val U: RowMatrix = svd.U // The U factor is a RowMatrix.
-val s: Vector = svd.s // The singular values are stored in a local dense vector.
-val V: Matrix = svd.V // The V factor is a local dense matrix.
-{% endhighlight %}
+{% include_example scala/org/apache/spark/examples/mllib/SVDExample.scala %}
The same code applies to `IndexedRowMatrix` if `U` is defined as an
`IndexedRowMatrix`.
@@ -84,43 +72,7 @@ The same code applies to `IndexedRowMatrix` if `U` is defined as an
<div data-lang="java" markdown="1">
Refer to the [`SingularValueDecomposition` Java docs](api/java/org/apache/spark/mllib/linalg/SingularValueDecomposition.html) for details on the API.
-{% highlight java %}
-import java.util.LinkedList;
-
-import org.apache.spark.api.java.*;
-import org.apache.spark.mllib.linalg.distributed.RowMatrix;
-import org.apache.spark.mllib.linalg.Matrix;
-import org.apache.spark.mllib.linalg.SingularValueDecomposition;
-import org.apache.spark.mllib.linalg.Vector;
-import org.apache.spark.mllib.linalg.Vectors;
-import org.apache.spark.rdd.RDD;
-import org.apache.spark.SparkConf;
-import org.apache.spark.SparkContext;
-
-public class SVD {
- public static void main(String[] args) {
- SparkConf conf = new SparkConf().setAppName("SVD Example");
- SparkContext sc = new SparkContext(conf);
-
- double[][] array = ...
- LinkedList<Vector> rowsList = new LinkedList<Vector>();
- for (int i = 0; i < array.length; i++) {
- Vector currentRow = Vectors.dense(array[i]);
- rowsList.add(currentRow);
- }
- JavaRDD<Vector> rows = JavaSparkContext.fromSparkContext(sc).parallelize(rowsList);
-
- // Create a RowMatrix from JavaRDD<Vector>.
- RowMatrix mat = new RowMatrix(rows.rdd());
-
- // Compute the top 4 singular values and corresponding singular vectors.
- SingularValueDecomposition<RowMatrix, Matrix> svd = mat.computeSVD(4, true, 1.0E-9d);
- RowMatrix U = svd.U();
- Vector s = svd.s();
- Matrix V = svd.V();
- }
-}
-{% endhighlight %}
+{% include_example java/org/apache/spark/examples/mllib/JavaSVDExample.java %}
The same code applies to `IndexedRowMatrix` if `U` is defined as an
`IndexedRowMatrix`.
@@ -151,36 +103,14 @@ and use them to project the vectors into a low-dimensional space.
Refer to the [`RowMatrix` Scala docs](api/scala/index.html#org.apache.spark.mllib.linalg.distributed.RowMatrix) for details on the API.
-{% highlight scala %}
-import org.apache.spark.mllib.linalg.Matrix
-import org.apache.spark.mllib.linalg.distributed.RowMatrix
-
-val mat: RowMatrix = ...
-
-// Compute the top 10 principal components.
-val pc: Matrix = mat.computePrincipalComponents(10) // Principal components are stored in a local dense matrix.
-
-// Project the rows to the linear space spanned by the top 10 principal components.
-val projected: RowMatrix = mat.multiply(pc)
-{% endhighlight %}
+{% include_example scala/org/apache/spark/examples/mllib/PCAOnRowMatrixExample.scala %}
The following code demonstrates how to compute principal components on source vectors
and use them to project the vectors into a low-dimensional space while keeping associated labels:
Refer to the [`PCA` Scala docs](api/scala/index.html#org.apache.spark.mllib.feature.PCA) for details on the API.
-{% highlight scala %}
-import org.apache.spark.mllib.regression.LabeledPoint
-import org.apache.spark.mllib.feature.PCA
-
-val data: RDD[LabeledPoint] = ...
-
-// Compute the top 10 principal components.
-val pca = new PCA(10).fit(data.map(_.features))
-
-// Project vectors to the linear space spanned by the top 10 principal components, keeping the label
-val projected = data.map(p => p.copy(features = pca.transform(p.features)))
-{% endhighlight %}
+{% include_example scala/org/apache/spark/examples/mllib/PCAOnSourceVectorExample.scala %}
</div>
@@ -192,40 +122,7 @@ The number of columns should be small, e.g, less than 1000.
Refer to the [`RowMatrix` Java docs](api/java/org/apache/spark/mllib/linalg/distributed/RowMatrix.html) for details on the API.
-{% highlight java %}
-import java.util.LinkedList;
-
-import org.apache.spark.api.java.*;
-import org.apache.spark.mllib.linalg.distributed.RowMatrix;
-import org.apache.spark.mllib.linalg.Matrix;
-import org.apache.spark.mllib.linalg.Vector;
-import org.apache.spark.mllib.linalg.Vectors;
-import org.apache.spark.rdd.RDD;
-import org.apache.spark.SparkConf;
-import org.apache.spark.SparkContext;
-
-public class PCA {
- public static void main(String[] args) {
- SparkConf conf = new SparkConf().setAppName("PCA Example");
- SparkContext sc = new SparkContext(conf);
-
- double[][] array = ...
- LinkedList<Vector> rowsList = new LinkedList<Vector>();
- for (int i = 0; i < array.length; i++) {
- Vector currentRow = Vectors.dense(array[i]);
- rowsList.add(currentRow);
- }
- JavaRDD<Vector> rows = JavaSparkContext.fromSparkContext(sc).parallelize(rowsList);
-
- // Create a RowMatrix from JavaRDD<Vector>.
- RowMatrix mat = new RowMatrix(rows.rdd());
-
- // Compute the top 3 principal components.
- Matrix pc = mat.computePrincipalComponents(3);
- RowMatrix projected = mat.multiply(pc);
- }
-}
-{% endhighlight %}
+{% include_example java/org/apache/spark/examples/mllib/JavaPCAExample.java %}
</div>
</div>