aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
authorXusen Yin <yinxusen@gmail.com>2016-07-07 11:28:04 +0100
committerSean Owen <sowen@cloudera.com>2016-07-07 11:28:04 +0100
commit4c6f00d09c016dfc1d2de6e694dff219c9027fa0 (patch)
tree9107e69c3f038158e4bcb4406915751dd0fddbfd /mllib
parent986b2514013ed9ebab526f2cf3dc714cc9e480bf (diff)
downloadspark-4c6f00d09c016dfc1d2de6e694dff219c9027fa0.tar.gz
spark-4c6f00d09c016dfc1d2de6e694dff219c9027fa0.tar.bz2
spark-4c6f00d09c016dfc1d2de6e694dff219c9027fa0.zip
[SPARK-16372][MLLIB] Retag RDD to tallSkinnyQR of RowMatrix
## What changes were proposed in this pull request? The following Java code because of type erasing: ```Java JavaRDD<Vector> rows = jsc.parallelize(...); RowMatrix mat = new RowMatrix(rows.rdd()); QRDecomposition<RowMatrix, Matrix> result = mat.tallSkinnyQR(true); ``` We should use retag to restore the type to prevent the following exception: ```Java java.lang.ClassCastException: [Ljava.lang.Object; cannot be cast to [Lorg.apache.spark.mllib.linalg.Vector; ``` ## How was this patch tested? Java unit test Author: Xusen Yin <yinxusen@gmail.com> Closes #14051 from yinxusen/SPARK-16372.
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala2
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala2
-rw-r--r--mllib/src/test/java/org/apache/spark/mllib/linalg/distributed/JavaRowMatrixSuite.java44
3 files changed, 46 insertions, 2 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
index f4819f77eb..a80cca70f4 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
@@ -1127,7 +1127,7 @@ private[python] class PythonMLLibAPI extends Serializable {
* Wrapper around RowMatrix constructor.
*/
def createRowMatrix(rows: JavaRDD[Vector], numRows: Long, numCols: Int): RowMatrix = {
- new RowMatrix(rows.rdd.retag(classOf[Vector]), numRows, numCols)
+ new RowMatrix(rows.rdd, numRows, numCols)
}
/**
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
index cd5209d0eb..1c94479ef0 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
@@ -537,7 +537,7 @@ class RowMatrix @Since("1.0.0") (
def tallSkinnyQR(computeQ: Boolean = false): QRDecomposition[RowMatrix, Matrix] = {
val col = numCols().toInt
// split rows horizontally into smaller matrices, and compute QR for each of them
- val blockQRs = rows.glom().map { partRows =>
+ val blockQRs = rows.retag(classOf[Vector]).glom().map { partRows =>
val bdm = BDM.zeros[Double](partRows.length, col)
var i = 0
partRows.foreach { row =>
diff --git a/mllib/src/test/java/org/apache/spark/mllib/linalg/distributed/JavaRowMatrixSuite.java b/mllib/src/test/java/org/apache/spark/mllib/linalg/distributed/JavaRowMatrixSuite.java
new file mode 100644
index 0000000000..c01af40549
--- /dev/null
+++ b/mllib/src/test/java/org/apache/spark/mllib/linalg/distributed/JavaRowMatrixSuite.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.linalg.distributed;
+
+import java.util.Arrays;
+
+import org.junit.Test;
+
+import org.apache.spark.SharedSparkSession;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.mllib.linalg.Matrix;
+import org.apache.spark.mllib.linalg.QRDecomposition;
+import org.apache.spark.mllib.linalg.Vector;
+import org.apache.spark.mllib.linalg.Vectors;
+
+public class JavaRowMatrixSuite extends SharedSparkSession {
+
+ @Test
+ public void rowMatrixQRDecomposition() {
+ Vector v1 = Vectors.dense(1.0, 10.0, 100.0);
+ Vector v2 = Vectors.dense(2.0, 20.0, 200.0);
+ Vector v3 = Vectors.dense(3.0, 30.0, 300.0);
+
+ JavaRDD<Vector> rows = jsc.parallelize(Arrays.asList(v1, v2, v3), 1);
+ RowMatrix mat = new RowMatrix(rows.rdd());
+
+ QRDecomposition<RowMatrix, Matrix> result = mat.tallSkinnyQR(true);
+ }
+}