From 1d59a4162bf5142af270ed7f4b3eab42870c87b7 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Sat, 1 Aug 2015 00:17:15 -0700 Subject: [SPARK-9480][SQL] add MapData and cleanup internal row stuff This PR adds a `MapData` as internal representation of map type in Spark SQL, and provides a default implementation with just 2 `ArrayData`. After that, we have specialized getters for all internal type, so I removed generic getter in `ArrayData` and added specialized `toArray` for it. Also did some refactor and cleanup for `InternalRow` and its subclasses. Author: Wenchen Fan Closes #7799 from cloud-fan/map-data and squashes the following commits: 77d482f [Wenchen Fan] fix python e8f6682 [Wenchen Fan] skip MapData equality check in HiveInspectorSuite 40cc9db [Wenchen Fan] add toString 6e06ec9 [Wenchen Fan] some more cleanup a90aca1 [Wenchen Fan] add MapData --- mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala | 6 +++--- mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'mllib') diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala index 88914fa875..1c858348bf 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala @@ -179,12 +179,12 @@ private[spark] class MatrixUDT extends UserDefinedType[Matrix] { val tpe = row.getByte(0) val numRows = row.getInt(1) val numCols = row.getInt(2) - val values = row.getArray(5).toArray.map(_.asInstanceOf[Double]) + val values = row.getArray(5).toDoubleArray() val isTransposed = row.getBoolean(6) tpe match { case 0 => - val colPtrs = row.getArray(3).toArray.map(_.asInstanceOf[Int]) - val rowIndices = row.getArray(4).toArray.map(_.asInstanceOf[Int]) + val colPtrs = row.getArray(3).toIntArray() + val rowIndices = row.getArray(4).toIntArray() new SparseMatrix(numRows, numCols, colPtrs, rowIndices, values, isTransposed) case 1 => new DenseMatrix(numRows, numCols, values, isTransposed) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala index 89a1818db0..96d1f48ba2 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala @@ -209,11 +209,11 @@ private[spark] class VectorUDT extends UserDefinedType[Vector] { tpe match { case 0 => val size = row.getInt(1) - val indices = row.getArray(2).toArray().map(_.asInstanceOf[Int]) - val values = row.getArray(3).toArray().map(_.asInstanceOf[Double]) + val indices = row.getArray(2).toIntArray() + val values = row.getArray(3).toDoubleArray() new SparseVector(size, indices, values) case 1 => - val values = row.getArray(3).toArray().map(_.asInstanceOf[Double]) + val values = row.getArray(3).toDoubleArray() new DenseVector(values) } } -- cgit v1.2.3