aboutsummaryrefslogtreecommitdiff
path: root/sql/catalyst
diff options
context:
space:
mode:
authorReynold Xin <rxin@databricks.com>2015-01-14 00:38:55 -0800
committerReynold Xin <rxin@databricks.com>2015-01-14 00:38:55 -0800
commitd5eeb35167e1ab72fab7778757163ff0aacaef2c (patch)
treec71d81126126e8810519a384c214601e3edf7ca7 /sql/catalyst
parentf9969098c8cb15e36c718b80c6cf5b534a6cf7c3 (diff)
downloadspark-d5eeb35167e1ab72fab7778757163ff0aacaef2c.tar.gz
spark-d5eeb35167e1ab72fab7778757163ff0aacaef2c.tar.bz2
spark-d5eeb35167e1ab72fab7778757163ff0aacaef2c.zip
[SPARK-5167][SQL] Move Row into sql package and make it usable for Java.
Mostly just moving stuff around. This should still be source compatible since we type aliased Row previously in org.apache.spark.sql.Row. Added the following APIs to Row: ```scala def getMap[K, V](i: Int): scala.collection.Map[K, V] def getJavaMap[K, V](i: Int): java.util.Map[K, V] def getSeq[T](i: Int): Seq[T] def getList[T](i: Int): java.util.List[T] def getStruct(i: Int): StructType ``` Author: Reynold Xin <rxin@databricks.com> Closes #4030 from rxin/sql-row and squashes the following commits: 6c85c29 [Reynold Xin] Fixed style violation by adding a new line to Row.scala. 82b064a [Reynold Xin] [SPARK-5167][SQL] Move Row into sql package and make it usable for Java.
Diffstat (limited to 'sql/catalyst')
-rw-r--r--sql/catalyst/src/main/java/org/apache/spark/sql/RowFactory.java34
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala240
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala4
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala (renamed from sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Row.scala)113
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/types/dataTypes.scala2
5 files changed, 303 insertions, 90 deletions
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/RowFactory.java b/sql/catalyst/src/main/java/org/apache/spark/sql/RowFactory.java
new file mode 100644
index 0000000000..62fcec824d
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/RowFactory.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql;
+
+import org.apache.spark.sql.catalyst.expressions.GenericRow;
+
+/**
+ * A factory class used to construct {@link Row} objects.
+ */
+public class RowFactory {
+
+ /**
+ * Create a {@link Row} from an array of values. Position i in the array becomes position i
+ * in the created {@link Row} object.
+ */
+ public static Row create(Object[] values) {
+ return new GenericRow(values);
+ }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
new file mode 100644
index 0000000000..d7a4e014ce
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
@@ -0,0 +1,240 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.sql.catalyst.expressions.GenericRow
+
+
+object Row {
+ /**
+ * This method can be used to extract fields from a [[Row]] object in a pattern match. Example:
+ * {{{
+ * import org.apache.spark.sql._
+ *
+ * val pairs = sql("SELECT key, value FROM src").rdd.map {
+ * case Row(key: Int, value: String) =>
+ * key -> value
+ * }
+ * }}}
+ */
+ def unapplySeq(row: Row): Some[Seq[Any]] = Some(row)
+
+ /**
+ * This method can be used to construct a [[Row]] with the given values.
+ */
+ def apply(values: Any*): Row = new GenericRow(values.toArray)
+
+ /**
+ * This method can be used to construct a [[Row]] from a [[Seq]] of values.
+ */
+ def fromSeq(values: Seq[Any]): Row = new GenericRow(values.toArray)
+}
+
+
+/**
+ * Represents one row of output from a relational operator. Allows both generic access by ordinal,
+ * which will incur boxing overhead for primitives, as well as native primitive access.
+ *
+ * It is invalid to use the native primitive interface to retrieve a value that is null, instead a
+ * user must check `isNullAt` before attempting to retrieve a value that might be null.
+ *
+ * To create a new Row, use [[RowFactory.create()]] in Java or [[Row.apply()]] in Scala.
+ *
+ * A [[Row]] object can be constructed by providing field values. Example:
+ * {{{
+ * import org.apache.spark.sql._
+ *
+ * // Create a Row from values.
+ * Row(value1, value2, value3, ...)
+ * // Create a Row from a Seq of values.
+ * Row.fromSeq(Seq(value1, value2, ...))
+ * }}}
+ *
+ * A value of a row can be accessed through both generic access by ordinal,
+ * which will incur boxing overhead for primitives, as well as native primitive access.
+ * An example of generic access by ordinal:
+ * {{{
+ * import org.apache.spark.sql._
+ *
+ * val row = Row(1, true, "a string", null)
+ * // row: Row = [1,true,a string,null]
+ * val firstValue = row(0)
+ * // firstValue: Any = 1
+ * val fourthValue = row(3)
+ * // fourthValue: Any = null
+ * }}}
+ *
+ * For native primitive access, it is invalid to use the native primitive interface to retrieve
+ * a value that is null, instead a user must check `isNullAt` before attempting to retrieve a
+ * value that might be null.
+ * An example of native primitive access:
+ * {{{
+ * // using the row from the previous example.
+ * val firstValue = row.getInt(0)
+ * // firstValue: Int = 1
+ * val isNull = row.isNullAt(3)
+ * // isNull: Boolean = true
+ * }}}
+ *
+ * Interfaces related to native primitive access are:
+ *
+ * `isNullAt(i: Int): Boolean`
+ *
+ * `getInt(i: Int): Int`
+ *
+ * `getLong(i: Int): Long`
+ *
+ * `getDouble(i: Int): Double`
+ *
+ * `getFloat(i: Int): Float`
+ *
+ * `getBoolean(i: Int): Boolean`
+ *
+ * `getShort(i: Int): Short`
+ *
+ * `getByte(i: Int): Byte`
+ *
+ * `getString(i: Int): String`
+ *
+ * In Scala, fields in a [[Row]] object can be extracted in a pattern match. Example:
+ * {{{
+ * import org.apache.spark.sql._
+ *
+ * val pairs = sql("SELECT key, value FROM src").rdd.map {
+ * case Row(key: Int, value: String) =>
+ * key -> value
+ * }
+ * }}}
+ *
+ * @group row
+ */
+trait Row extends Seq[Any] with Serializable {
+ def apply(i: Int): Any
+
+ /** Returns the value at position i. If the value is null, null is returned. */
+ def get(i: Int): Any = apply(i)
+
+ /** Checks whether the value at position i is null. */
+ def isNullAt(i: Int): Boolean
+
+ /**
+ * Returns the value at position i as a primitive int.
+ * Throws an exception if the type mismatches or if the value is null.
+ */
+ def getInt(i: Int): Int
+
+ /**
+ * Returns the value at position i as a primitive long.
+ * Throws an exception if the type mismatches or if the value is null.
+ */
+ def getLong(i: Int): Long
+
+ /**
+ * Returns the value at position i as a primitive double.
+ * Throws an exception if the type mismatches or if the value is null.
+ */
+ def getDouble(i: Int): Double
+
+ /**
+ * Returns the value at position i as a primitive float.
+ * Throws an exception if the type mismatches or if the value is null.
+ */
+ def getFloat(i: Int): Float
+
+ /**
+ * Returns the value at position i as a primitive boolean.
+ * Throws an exception if the type mismatches or if the value is null.
+ */
+ def getBoolean(i: Int): Boolean
+
+ /**
+ * Returns the value at position i as a primitive short.
+ * Throws an exception if the type mismatches or if the value is null.
+ */
+ def getShort(i: Int): Short
+
+ /**
+ * Returns the value at position i as a primitive byte.
+ * Throws an exception if the type mismatches or if the value is null.
+ */
+ def getByte(i: Int): Byte
+
+ /**
+ * Returns the value at position i as a String object.
+ * Throws an exception if the type mismatches or if the value is null.
+ */
+ def getString(i: Int): String
+
+ /**
+ * Return the value at position i of array type as a Scala Seq.
+ * Throws an exception if the type mismatches.
+ */
+ def getSeq[T](i: Int): Seq[T] = apply(i).asInstanceOf[Seq[T]]
+
+ /**
+ * Return the value at position i of array type as [[java.util.List]].
+ * Throws an exception if the type mismatches.
+ */
+ def getList[T](i: Int): java.util.List[T] = {
+ scala.collection.JavaConversions.seqAsJavaList(getSeq[T](i))
+ }
+
+ /**
+ * Return the value at position i of map type as a Scala Map.
+ * Throws an exception if the type mismatches.
+ */
+ def getMap[K, V](i: Int): scala.collection.Map[K, V] = apply(i).asInstanceOf[Map[K, V]]
+
+ /**
+ * Return the value at position i of array type as a [[java.util.Map]].
+ * Throws an exception if the type mismatches.
+ */
+ def getJavaMap[K, V](i: Int): java.util.Map[K, V] = {
+ scala.collection.JavaConversions.mapAsJavaMap(getMap[K, V](i))
+ }
+
+ /**
+ * Return the value at position i of struct type as an [[Row]] object.
+ * Throws an exception if the type mismatches.
+ */
+ def getStruct(i: Int): Row = getAs[Row](i)
+
+ /**
+ * Returns the value at position i.
+ * Throws an exception if the type mismatches.
+ */
+ def getAs[T](i: Int): T = apply(i).asInstanceOf[T]
+
+ override def toString(): String = s"[${this.mkString(",")}]"
+
+ /**
+ * Make a copy of the current [[Row]] object.
+ */
+ def copy(): Row
+
+ /** Returns true if there are any NULL values in this row. */
+ def anyNull: Boolean = {
+ val l = length
+ var i = 0
+ while (i < l) {
+ if (isNullAt(i)) { return true }
+ i += 1
+ }
+ false
+ }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
index 55d95991c5..fbc97b2e75 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
@@ -49,6 +49,10 @@ package org.apache.spark.sql.catalyst
*/
package object expressions {
+ type Row = org.apache.spark.sql.Row
+
+ val Row = org.apache.spark.sql.Row
+
/**
* Converts a [[Row]] to another Row given a sequence of expression that define each column of the
* new row. If the schema of the input row is specified, then the given expression will be bound
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Row.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala
index dcda53bb71..c22b842684 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Row.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala
@@ -19,68 +19,6 @@ package org.apache.spark.sql.catalyst.expressions
import org.apache.spark.sql.types.NativeType
-object Row {
- /**
- * This method can be used to extract fields from a [[Row]] object in a pattern match. Example:
- * {{{
- * import org.apache.spark.sql._
- *
- * val pairs = sql("SELECT key, value FROM src").rdd.map {
- * case Row(key: Int, value: String) =>
- * key -> value
- * }
- * }}}
- */
- def unapplySeq(row: Row): Some[Seq[Any]] = Some(row)
-
- /**
- * This method can be used to construct a [[Row]] with the given values.
- */
- def apply(values: Any*): Row = new GenericRow(values.toArray)
-
- /**
- * This method can be used to construct a [[Row]] from a [[Seq]] of values.
- */
- def fromSeq(values: Seq[Any]): Row = new GenericRow(values.toArray)
-}
-
-/**
- * Represents one row of output from a relational operator. Allows both generic access by ordinal,
- * which will incur boxing overhead for primitives, as well as native primitive access.
- *
- * It is invalid to use the native primitive interface to retrieve a value that is null, instead a
- * user must check [[isNullAt]] before attempting to retrieve a value that might be null.
- */
-trait Row extends Seq[Any] with Serializable {
- def apply(i: Int): Any
-
- def isNullAt(i: Int): Boolean
-
- def getInt(i: Int): Int
- def getLong(i: Int): Long
- def getDouble(i: Int): Double
- def getFloat(i: Int): Float
- def getBoolean(i: Int): Boolean
- def getShort(i: Int): Short
- def getByte(i: Int): Byte
- def getString(i: Int): String
- def getAs[T](i: Int): T = apply(i).asInstanceOf[T]
-
- override def toString() =
- s"[${this.mkString(",")}]"
-
- def copy(): Row
-
- /** Returns true if there are any NULL values in this row. */
- def anyNull: Boolean = {
- var i = 0
- while (i < length) {
- if (isNullAt(i)) { return true }
- i += 1
- }
- false
- }
-}
/**
* An extended interface to [[Row]] that allows the values for each column to be updated. Setting
@@ -105,22 +43,19 @@ trait MutableRow extends Row {
* A row with no data. Calling any methods will result in an error. Can be used as a placeholder.
*/
object EmptyRow extends Row {
- def apply(i: Int): Any = throw new UnsupportedOperationException
-
- def iterator = Iterator.empty
- def length = 0
- def isNullAt(i: Int): Boolean = throw new UnsupportedOperationException
-
- def getInt(i: Int): Int = throw new UnsupportedOperationException
- def getLong(i: Int): Long = throw new UnsupportedOperationException
- def getDouble(i: Int): Double = throw new UnsupportedOperationException
- def getFloat(i: Int): Float = throw new UnsupportedOperationException
- def getBoolean(i: Int): Boolean = throw new UnsupportedOperationException
- def getShort(i: Int): Short = throw new UnsupportedOperationException
- def getByte(i: Int): Byte = throw new UnsupportedOperationException
- def getString(i: Int): String = throw new UnsupportedOperationException
+ override def apply(i: Int): Any = throw new UnsupportedOperationException
+ override def iterator = Iterator.empty
+ override def length = 0
+ override def isNullAt(i: Int): Boolean = throw new UnsupportedOperationException
+ override def getInt(i: Int): Int = throw new UnsupportedOperationException
+ override def getLong(i: Int): Long = throw new UnsupportedOperationException
+ override def getDouble(i: Int): Double = throw new UnsupportedOperationException
+ override def getFloat(i: Int): Float = throw new UnsupportedOperationException
+ override def getBoolean(i: Int): Boolean = throw new UnsupportedOperationException
+ override def getShort(i: Int): Short = throw new UnsupportedOperationException
+ override def getByte(i: Int): Byte = throw new UnsupportedOperationException
+ override def getString(i: Int): String = throw new UnsupportedOperationException
override def getAs[T](i: Int): T = throw new UnsupportedOperationException
-
def copy() = this
}
@@ -135,50 +70,50 @@ class GenericRow(protected[sql] val values: Array[Any]) extends Row {
def this(size: Int) = this(new Array[Any](size))
- def iterator = values.iterator
+ override def iterator = values.iterator
- def length = values.length
+ override def length = values.length
- def apply(i: Int) = values(i)
+ override def apply(i: Int) = values(i)
- def isNullAt(i: Int) = values(i) == null
+ override def isNullAt(i: Int) = values(i) == null
- def getInt(i: Int): Int = {
+ override def getInt(i: Int): Int = {
if (values(i) == null) sys.error("Failed to check null bit for primitive int value.")
values(i).asInstanceOf[Int]
}
- def getLong(i: Int): Long = {
+ override def getLong(i: Int): Long = {
if (values(i) == null) sys.error("Failed to check null bit for primitive long value.")
values(i).asInstanceOf[Long]
}
- def getDouble(i: Int): Double = {
+ override def getDouble(i: Int): Double = {
if (values(i) == null) sys.error("Failed to check null bit for primitive double value.")
values(i).asInstanceOf[Double]
}
- def getFloat(i: Int): Float = {
+ override def getFloat(i: Int): Float = {
if (values(i) == null) sys.error("Failed to check null bit for primitive float value.")
values(i).asInstanceOf[Float]
}
- def getBoolean(i: Int): Boolean = {
+ override def getBoolean(i: Int): Boolean = {
if (values(i) == null) sys.error("Failed to check null bit for primitive boolean value.")
values(i).asInstanceOf[Boolean]
}
- def getShort(i: Int): Short = {
+ override def getShort(i: Int): Short = {
if (values(i) == null) sys.error("Failed to check null bit for primitive short value.")
values(i).asInstanceOf[Short]
}
- def getByte(i: Int): Byte = {
+ override def getByte(i: Int): Byte = {
if (values(i) == null) sys.error("Failed to check null bit for primitive byte value.")
values(i).asInstanceOf[Byte]
}
- def getString(i: Int): String = {
+ override def getString(i: Int): String = {
if (values(i) == null) sys.error("Failed to check null bit for primitive String value.")
values(i).asInstanceOf[String]
}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/dataTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/dataTypes.scala
index fa0a355ebc..e38ad63f2e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/dataTypes.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/dataTypes.scala
@@ -727,7 +727,7 @@ object StructType {
* // StructType(List(StructField(b,LongType,false), StructField(c,BooleanType,false)))
* }}}
*
- * A [[org.apache.spark.sql.catalyst.expressions.Row]] object is used as a value of the StructType.
+ * A [[org.apache.spark.sql.Row]] object is used as a value of the StructType.
* Example:
* {{{
* import org.apache.spark.sql._