aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
authorWenchen Fan <cloud0fan@outlook.com>2015-08-01 00:17:15 -0700
committerReynold Xin <rxin@databricks.com>2015-08-01 00:17:15 -0700
commit1d59a4162bf5142af270ed7f4b3eab42870c87b7 (patch)
tree98e1c51aafb41c2c64042b30d3ddcf2205da1414 /mllib
parentd90f2cf7a2a1d1e69f9ab385f35f62d4091b5302 (diff)
downloadspark-1d59a4162bf5142af270ed7f4b3eab42870c87b7.tar.gz
spark-1d59a4162bf5142af270ed7f4b3eab42870c87b7.tar.bz2
spark-1d59a4162bf5142af270ed7f4b3eab42870c87b7.zip
[SPARK-9480][SQL] add MapData and cleanup internal row stuff
This PR adds a `MapData` as internal representation of map type in Spark SQL, and provides a default implementation with just 2 `ArrayData`. After that, we have specialized getters for all internal type, so I removed generic getter in `ArrayData` and added specialized `toArray` for it. Also did some refactor and cleanup for `InternalRow` and its subclasses. Author: Wenchen Fan <cloud0fan@outlook.com> Closes #7799 from cloud-fan/map-data and squashes the following commits: 77d482f [Wenchen Fan] fix python e8f6682 [Wenchen Fan] skip MapData equality check in HiveInspectorSuite 40cc9db [Wenchen Fan] add toString 6e06ec9 [Wenchen Fan] some more cleanup a90aca1 [Wenchen Fan] add MapData
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala6
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala6
2 files changed, 6 insertions, 6 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
index 88914fa875..1c858348bf 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
@@ -179,12 +179,12 @@ private[spark] class MatrixUDT extends UserDefinedType[Matrix] {
val tpe = row.getByte(0)
val numRows = row.getInt(1)
val numCols = row.getInt(2)
- val values = row.getArray(5).toArray.map(_.asInstanceOf[Double])
+ val values = row.getArray(5).toDoubleArray()
val isTransposed = row.getBoolean(6)
tpe match {
case 0 =>
- val colPtrs = row.getArray(3).toArray.map(_.asInstanceOf[Int])
- val rowIndices = row.getArray(4).toArray.map(_.asInstanceOf[Int])
+ val colPtrs = row.getArray(3).toIntArray()
+ val rowIndices = row.getArray(4).toIntArray()
new SparseMatrix(numRows, numCols, colPtrs, rowIndices, values, isTransposed)
case 1 =>
new DenseMatrix(numRows, numCols, values, isTransposed)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
index 89a1818db0..96d1f48ba2 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
@@ -209,11 +209,11 @@ private[spark] class VectorUDT extends UserDefinedType[Vector] {
tpe match {
case 0 =>
val size = row.getInt(1)
- val indices = row.getArray(2).toArray().map(_.asInstanceOf[Int])
- val values = row.getArray(3).toArray().map(_.asInstanceOf[Double])
+ val indices = row.getArray(2).toIntArray()
+ val values = row.getArray(3).toDoubleArray()
new SparseVector(size, indices, values)
case 1 =>
- val values = row.getArray(3).toArray().map(_.asInstanceOf[Double])
+ val values = row.getArray(3).toDoubleArray()
new DenseVector(values)
}
}