diff options
author | Wenchen Fan <cloud0fan@outlook.com> | 2015-08-01 00:17:15 -0700 |
---|---|---|
committer | Reynold Xin <rxin@databricks.com> | 2015-08-01 00:17:15 -0700 |
commit | 1d59a4162bf5142af270ed7f4b3eab42870c87b7 (patch) | |
tree | 98e1c51aafb41c2c64042b30d3ddcf2205da1414 /mllib | |
parent | d90f2cf7a2a1d1e69f9ab385f35f62d4091b5302 (diff) | |
download | spark-1d59a4162bf5142af270ed7f4b3eab42870c87b7.tar.gz spark-1d59a4162bf5142af270ed7f4b3eab42870c87b7.tar.bz2 spark-1d59a4162bf5142af270ed7f4b3eab42870c87b7.zip |
[SPARK-9480][SQL] add MapData and cleanup internal row stuff
This PR adds a `MapData` as internal representation of map type in Spark SQL, and provides a default implementation with just 2 `ArrayData`.
After that, we have specialized getters for all internal type, so I removed generic getter in `ArrayData` and added specialized `toArray` for it.
Also did some refactor and cleanup for `InternalRow` and its subclasses.
Author: Wenchen Fan <cloud0fan@outlook.com>
Closes #7799 from cloud-fan/map-data and squashes the following commits:
77d482f [Wenchen Fan] fix python
e8f6682 [Wenchen Fan] skip MapData equality check in HiveInspectorSuite
40cc9db [Wenchen Fan] add toString
6e06ec9 [Wenchen Fan] some more cleanup
a90aca1 [Wenchen Fan] add MapData
Diffstat (limited to 'mllib')
-rw-r--r-- | mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala | 6 | ||||
-rw-r--r-- | mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala | 6 |
2 files changed, 6 insertions, 6 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala index 88914fa875..1c858348bf 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala @@ -179,12 +179,12 @@ private[spark] class MatrixUDT extends UserDefinedType[Matrix] { val tpe = row.getByte(0) val numRows = row.getInt(1) val numCols = row.getInt(2) - val values = row.getArray(5).toArray.map(_.asInstanceOf[Double]) + val values = row.getArray(5).toDoubleArray() val isTransposed = row.getBoolean(6) tpe match { case 0 => - val colPtrs = row.getArray(3).toArray.map(_.asInstanceOf[Int]) - val rowIndices = row.getArray(4).toArray.map(_.asInstanceOf[Int]) + val colPtrs = row.getArray(3).toIntArray() + val rowIndices = row.getArray(4).toIntArray() new SparseMatrix(numRows, numCols, colPtrs, rowIndices, values, isTransposed) case 1 => new DenseMatrix(numRows, numCols, values, isTransposed) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala index 89a1818db0..96d1f48ba2 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala @@ -209,11 +209,11 @@ private[spark] class VectorUDT extends UserDefinedType[Vector] { tpe match { case 0 => val size = row.getInt(1) - val indices = row.getArray(2).toArray().map(_.asInstanceOf[Int]) - val values = row.getArray(3).toArray().map(_.asInstanceOf[Double]) + val indices = row.getArray(2).toIntArray() + val values = row.getArray(3).toDoubleArray() new SparseVector(size, indices, values) case 1 => - val values = row.getArray(3).toArray().map(_.asInstanceOf[Double]) + val values = row.getArray(3).toDoubleArray() new DenseVector(values) } } |