aboutsummaryrefslogtreecommitdiff
path: root/sql/catalyst
diff options
context:
space:
mode:
authorReynold Xin <rxin@databricks.com>2015-05-13 16:15:31 -0700
committerMichael Armbrust <michael@databricks.com>2015-05-13 16:15:31 -0700
commite683182c3e6347afdac0e5658487f80e5e054ef4 (patch)
tree31f3a8403afc574c041e7674f6e80cf6cbe58034 /sql/catalyst
parent59250fe51486908f9e3f3d9ef10aadbcb9b4d62d (diff)
downloadspark-e683182c3e6347afdac0e5658487f80e5e054ef4.tar.gz
spark-e683182c3e6347afdac0e5658487f80e5e054ef4.tar.bz2
spark-e683182c3e6347afdac0e5658487f80e5e054ef4.zip
[SQL] Move some classes into packages that are more appropriate.
JavaTypeInference into catalyst types.DateUtils into catalyst CacheManager into execution DefaultParserDialect into catalyst Author: Reynold Xin <rxin@databricks.com> Closes #6108 from rxin/sql-rename and squashes the following commits: 3fc9613 [Reynold Xin] Fixed import ordering. 83d9ff4 [Reynold Xin] Fixed codegen tests. e271e86 [Reynold Xin] mima f4e24a6 [Reynold Xin] [SQL] Move some classes into packages that are more appropriate.
Diffstat (limited to 'sql/catalyst')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala1
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala109
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ParserDialect.scala36
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala1
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala1
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateUtils.scala (renamed from sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateUtils.scala)2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/types/UTF8String.scala17
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala1
9 files changed, 163 insertions, 7 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
index a13e2f36a1..75a493b248 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
@@ -23,6 +23,7 @@ import java.util.{Map => JavaMap}
import scala.collection.mutable.HashMap
import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.util.DateUtils
import org.apache.spark.sql.types._
/**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
new file mode 100644
index 0000000000..625c8d3a62
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst
+
+import java.beans.Introspector
+import java.lang.{Iterable => JIterable}
+import java.util.{Iterator => JIterator, Map => JMap}
+
+import scala.language.existentials
+
+import com.google.common.reflect.TypeToken
+import org.apache.spark.sql.types._
+
+/**
+ * Type-inference utilities for POJOs and Java collections.
+ */
+private [sql] object JavaTypeInference {
+
+ private val iterableType = TypeToken.of(classOf[JIterable[_]])
+ private val mapType = TypeToken.of(classOf[JMap[_, _]])
+ private val iteratorReturnType = classOf[JIterable[_]].getMethod("iterator").getGenericReturnType
+ private val nextReturnType = classOf[JIterator[_]].getMethod("next").getGenericReturnType
+ private val keySetReturnType = classOf[JMap[_, _]].getMethod("keySet").getGenericReturnType
+ private val valuesReturnType = classOf[JMap[_, _]].getMethod("values").getGenericReturnType
+
+ /**
+ * Infers the corresponding SQL data type of a Java type.
+ * @param typeToken Java type
+ * @return (SQL data type, nullable)
+ */
+ private [sql] def inferDataType(typeToken: TypeToken[_]): (DataType, Boolean) = {
+ // TODO: All of this could probably be moved to Catalyst as it is mostly not Spark specific.
+ typeToken.getRawType match {
+ case c: Class[_] if c.isAnnotationPresent(classOf[SQLUserDefinedType]) =>
+ (c.getAnnotation(classOf[SQLUserDefinedType]).udt().newInstance(), true)
+
+ case c: Class[_] if c == classOf[java.lang.String] => (StringType, true)
+ case c: Class[_] if c == java.lang.Short.TYPE => (ShortType, false)
+ case c: Class[_] if c == java.lang.Integer.TYPE => (IntegerType, false)
+ case c: Class[_] if c == java.lang.Long.TYPE => (LongType, false)
+ case c: Class[_] if c == java.lang.Double.TYPE => (DoubleType, false)
+ case c: Class[_] if c == java.lang.Byte.TYPE => (ByteType, false)
+ case c: Class[_] if c == java.lang.Float.TYPE => (FloatType, false)
+ case c: Class[_] if c == java.lang.Boolean.TYPE => (BooleanType, false)
+
+ case c: Class[_] if c == classOf[java.lang.Short] => (ShortType, true)
+ case c: Class[_] if c == classOf[java.lang.Integer] => (IntegerType, true)
+ case c: Class[_] if c == classOf[java.lang.Long] => (LongType, true)
+ case c: Class[_] if c == classOf[java.lang.Double] => (DoubleType, true)
+ case c: Class[_] if c == classOf[java.lang.Byte] => (ByteType, true)
+ case c: Class[_] if c == classOf[java.lang.Float] => (FloatType, true)
+ case c: Class[_] if c == classOf[java.lang.Boolean] => (BooleanType, true)
+
+ case c: Class[_] if c == classOf[java.math.BigDecimal] => (DecimalType(), true)
+ case c: Class[_] if c == classOf[java.sql.Date] => (DateType, true)
+ case c: Class[_] if c == classOf[java.sql.Timestamp] => (TimestampType, true)
+
+ case _ if typeToken.isArray =>
+ val (dataType, nullable) = inferDataType(typeToken.getComponentType)
+ (ArrayType(dataType, nullable), true)
+
+ case _ if iterableType.isAssignableFrom(typeToken) =>
+ val (dataType, nullable) = inferDataType(elementType(typeToken))
+ (ArrayType(dataType, nullable), true)
+
+ case _ if mapType.isAssignableFrom(typeToken) =>
+ val typeToken2 = typeToken.asInstanceOf[TypeToken[_ <: JMap[_, _]]]
+ val mapSupertype = typeToken2.getSupertype(classOf[JMap[_, _]])
+ val keyType = elementType(mapSupertype.resolveType(keySetReturnType))
+ val valueType = elementType(mapSupertype.resolveType(valuesReturnType))
+ val (keyDataType, _) = inferDataType(keyType)
+ val (valueDataType, nullable) = inferDataType(valueType)
+ (MapType(keyDataType, valueDataType, nullable), true)
+
+ case _ =>
+ val beanInfo = Introspector.getBeanInfo(typeToken.getRawType)
+ val properties = beanInfo.getPropertyDescriptors.filterNot(_.getName == "class")
+ val fields = properties.map { property =>
+ val returnType = typeToken.method(property.getReadMethod).getReturnType
+ val (dataType, nullable) = inferDataType(returnType)
+ new StructField(property.getName, dataType, nullable)
+ }
+ (new StructType(fields), true)
+ }
+ }
+
+ private def elementType(typeToken: TypeToken[_]): TypeToken[_] = {
+ val typeToken2 = typeToken.asInstanceOf[TypeToken[_ <: JIterable[_]]]
+ val iterableSupertype = typeToken2.getSupertype(classOf[JIterable[_]])
+ val iteratorType = iterableSupertype.resolveType(iteratorReturnType)
+ val itemType = iteratorType.resolveType(nextReturnType)
+ itemType
+ }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ParserDialect.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ParserDialect.scala
index 05a92b06f9..554fb4eb25 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ParserDialect.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ParserDialect.scala
@@ -31,3 +31,39 @@ abstract class ParserDialect {
// this is the main function that will be implemented by sql parser.
def parse(sqlText: String): LogicalPlan
}
+
+/**
+ * Currently we support the default dialect named "sql", associated with the class
+ * [[DefaultParserDialect]]
+ *
+ * And we can also provide custom SQL Dialect, for example in Spark SQL CLI:
+ * {{{
+ *-- switch to "hiveql" dialect
+ * spark-sql>SET spark.sql.dialect=hiveql;
+ * spark-sql>SELECT * FROM src LIMIT 1;
+ *
+ *-- switch to "sql" dialect
+ * spark-sql>SET spark.sql.dialect=sql;
+ * spark-sql>SELECT * FROM src LIMIT 1;
+ *
+ *-- register the new SQL dialect
+ * spark-sql> SET spark.sql.dialect=com.xxx.xxx.SQL99Dialect;
+ * spark-sql> SELECT * FROM src LIMIT 1;
+ *
+ *-- register the non-exist SQL dialect
+ * spark-sql> SET spark.sql.dialect=NotExistedClass;
+ * spark-sql> SELECT * FROM src LIMIT 1;
+ *
+ *-- Exception will be thrown and switch to dialect
+ *-- "sql" (for SQLContext) or
+ *-- "hiveql" (for HiveContext)
+ * }}}
+ */
+private[spark] class DefaultParserDialect extends ParserDialect {
+ @transient
+ protected val sqlParser = new SqlParser
+
+ override def parse(sqlText: String): LogicalPlan = {
+ sqlParser.parse(sqlText)
+ }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index adf941ab2a..d8cf2b2e32 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -21,6 +21,7 @@ import java.sql.{Date, Timestamp}
import java.text.{DateFormat, SimpleDateFormat}
import org.apache.spark.Logging
+import org.apache.spark.sql.catalyst.util.DateUtils
import org.apache.spark.sql.types._
/** Cast the child expression to the target data type. */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index d17af0e7ff..ecb4c4b68f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -250,7 +250,7 @@ abstract class CodeGenerator[InType <: AnyRef, OutType <: AnyRef] extends Loggin
case Cast(child @ DateType(), StringType) =>
child.castOrNull(c =>
q"""org.apache.spark.sql.types.UTF8String(
- org.apache.spark.sql.types.DateUtils.toString($c))""",
+ org.apache.spark.sql.catalyst.util.DateUtils.toString($c))""",
StringType)
case Cast(child @ NumericType(), IntegerType) =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
index 18cba4cc46..5f8c7354ae 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.expressions
import java.sql.{Date, Timestamp}
import org.apache.spark.sql.catalyst.CatalystTypeConverters
+import org.apache.spark.sql.catalyst.util.DateUtils
import org.apache.spark.sql.types._
object Literal {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateUtils.scala
index d36a49159b..3f92be4a55 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateUtils.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.spark.sql.types
+package org.apache.spark.sql.catalyst.util
import java.sql.Date
import java.text.SimpleDateFormat
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UTF8String.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UTF8String.scala
index fc02ba6c9c..bc9c37bf2d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UTF8String.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UTF8String.scala
@@ -19,15 +19,18 @@ package org.apache.spark.sql.types
import java.util.Arrays
+import org.apache.spark.annotation.DeveloperApi
+
/**
- * A UTF-8 String, as internal representation of StringType in SparkSQL
+ * :: DeveloperApi ::
+ * A UTF-8 String, as internal representation of StringType in SparkSQL
*
- * A String encoded in UTF-8 as an Array[Byte], which can be used for comparison,
- * search, see http://en.wikipedia.org/wiki/UTF-8 for details.
+ * A String encoded in UTF-8 as an Array[Byte], which can be used for comparison,
+ * search, see http://en.wikipedia.org/wiki/UTF-8 for details.
*
- * Note: This is not designed for general use cases, should not be used outside SQL.
+ * Note: This is not designed for general use cases, should not be used outside SQL.
*/
-
+@DeveloperApi
final class UTF8String extends Ordered[UTF8String] with Serializable {
private[this] var bytes: Array[Byte] = _
@@ -180,6 +183,10 @@ final class UTF8String extends Ordered[UTF8String] with Serializable {
}
}
+/**
+ * :: DeveloperApi ::
+ */
+@DeveloperApi
object UTF8String {
// number of tailing bytes in a UTF8 sequence for a code point
// see http://en.wikipedia.org/wiki/UTF-8, 192-256 of Byte 1
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
index 04fd261d16..5c4a1527c2 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
@@ -29,6 +29,7 @@ import org.apache.spark.sql.catalyst.CatalystTypeConverters
import org.apache.spark.sql.catalyst.analysis.UnresolvedExtractValue
import org.apache.spark.sql.catalyst.dsl.expressions._
import org.apache.spark.sql.catalyst.expressions.mathfuncs._
+import org.apache.spark.sql.catalyst.util.DateUtils
import org.apache.spark.sql.types._