[SQL] SPARK-1333 First draft of java API

WIP: Some work remains... * [x] Hive support * [x] Tests * [x] Update docs Feedback welcome! Author: Michael Armbrust <michael@databricks.com> Closes #248 from marmbrus/javaSchemaRDD and squashes the following commits: b393913 [Michael Armbrust] @srowen 's java style suggestions. f531eb1 [Michael Armbrust] Address matei's comments. 33a1b1a [Michael Armbrust] Ignore JavaHiveSuite. 822f626 [Michael Armbrust] improve docs. ab91750 [Michael Armbrust] Improve Java SQL API: * Change JavaRow => Row * Add support for querying RDDs of JavaBeans * Docs * Tests * Hive support 0b859c8 [Michael Armbrust] First draft of java API.
author: Michael Armbrust <michael@databricks.com> 2014-04-03 15:45:34 -0700
committer: Matei Zaharia <matei@databricks.com> 2014-04-03 15:45:34 -0700
commit: b8f534196f9a8c99f75728a06e62282d139dee28 (patch)
tree: 426e03d88695719bc58ff6f0aaa4794bd2fb40a2 /sql
parent: c1ea3afb516c204925259f0928dfb17d0fa89621 (diff)
download: spark-b8f534196f9a8c99f75728a06e62282d139dee28.tar.gz
spark-b8f534196f9a8c99f75728a06e62282d139dee28.tar.bz2
spark-b8f534196f9a8c99f75728a06e62282d139dee28.zip
9 files changed, 460 insertions, 37 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala
index 770cabcb31..a62cb8aa13 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala
@@ -17,13 +17,13 @@
 
 package org.apache.spark.sql
 
+import org.apache.spark.{Dependency, OneToOneDependency, Partition, TaskContext}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.plans.{Inner, JoinType}
 import org.apache.spark.sql.catalyst.types.BooleanType
-import org.apache.spark.{Dependency, OneToOneDependency, Partition, TaskContext}
 
 /**
  * <span class="badge" style="float: right; background-color: darkblue;">ALPHA COMPONENT</span>
@@ -92,23 +92,10 @@ import org.apache.spark.{Dependency, OneToOneDependency, Partition, TaskContext}
  */
 class SchemaRDD(
     @transient val sqlContext: SQLContext,
-    @transient val logicalPlan: LogicalPlan)
-    extends RDD[Row](sqlContext.sparkContext, Nil) {
+    @transient protected[spark] val logicalPlan: LogicalPlan)
+  extends RDD[Row](sqlContext.sparkContext, Nil) with SchemaRDDLike {
 
-  /**
-   * A lazily computed query execution workflow.  All other RDD operations are passed
-   * through to the RDD that is produced by this workflow.
-   *
-   * We want this to be lazy because invoking the whole query optimization pipeline can be
-   * expensive.
-   */
-  @transient
-  protected[spark] lazy val queryExecution = sqlContext.executePlan(logicalPlan)
-
-  override def toString =
-    s"""${super.toString}
-       |== Query Plan ==
-       |${queryExecution.executedPlan}""".stripMargin.trim
+  def baseSchemaRDD = this
 
   // =========================================================================================
   // RDD functions: Copy the interal row representation so we present immutable data to users.
@@ -313,30 +300,11 @@ class SchemaRDD(
       InsertIntoTable(UnresolvedRelation(None, tableName), Map.empty, logicalPlan, overwrite))
 
   /**
-   * Saves the contents of this `SchemaRDD` as a parquet file, preserving the schema.  Files that
-   * are written out using this method can be read back in as a SchemaRDD using the ``function
-   *
-   * @group schema
-   */
-  def saveAsParquetFile(path: String): Unit = {
-    sqlContext.executePlan(WriteToFile(path, logicalPlan)).toRdd
-  }
-
-  /**
-   * Registers this RDD as a temporary table using the given name.  The lifetime of this temporary
-   * table is tied to the [[SQLContext]] that was used to create this SchemaRDD.
-   *
-   * @group schema
-   */
-  def registerAsTable(tableName: String): Unit = {
-    sqlContext.registerRDDAsTable(this, tableName)
-  }
-
-  /**
    * Returns this RDD as a SchemaRDD.
    * @group schema
    */
   def toSchemaRDD = this
 
+  /** FOR INTERNAL USE ONLY */
   def analyze = sqlContext.analyzer(logicalPlan)
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDDLike.scala b/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDDLike.scala
new file mode 100644
index 0000000000..840803a52c
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDDLike.scala
@@ -0,0 +1,66 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*    http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.spark.sql
+
+import org.apache.spark.sql.catalyst.plans.logical._
+
+/**
+ * Contains functions that are shared between all SchemaRDD types (i.e., Scala, Java)
+ */
+trait SchemaRDDLike {
+  @transient val sqlContext: SQLContext
+  @transient protected[spark] val logicalPlan: LogicalPlan
+
+  private[sql] def baseSchemaRDD: SchemaRDD
+
+  /**
+   * A lazily computed query execution workflow.  All other RDD operations are passed
+   * through to the RDD that is produced by this workflow.
+   *
+   * We want this to be lazy because invoking the whole query optimization pipeline can be
+   * expensive.
+   */
+  @transient
+  protected[spark] lazy val queryExecution = sqlContext.executePlan(logicalPlan)
+
+  override def toString =
+    s"""${super.toString}
+       |== Query Plan ==
+       |${queryExecution.executedPlan}""".stripMargin.trim
+
+
+  /**
+   * Saves the contents of this `SchemaRDD` as a parquet file, preserving the schema.  Files that
+   * are written out using this method can be read back in as a SchemaRDD using the ``function
+   *
+   * @group schema
+   */
+  def saveAsParquetFile(path: String): Unit = {
+    sqlContext.executePlan(WriteToFile(path, logicalPlan)).toRdd
+  }
+
+  /**
+   * Registers this RDD as a temporary table using the given name.  The lifetime of this temporary
+   * table is tied to the [[SQLContext]] that was used to create this SchemaRDD.
+   *
+   * @group schema
+   */
+  def registerAsTable(tableName: String): Unit = {
+    sqlContext.registerRDDAsTable(baseSchemaRDD, tableName)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/java/JavaSQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/java/JavaSQLContext.scala
new file mode 100644
index 0000000000..7b41aa1f1b
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/api/java/JavaSQLContext.scala
@@ -0,0 +1,100 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*    http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.spark.sql.api.java
+
+import java.beans.{Introspector, PropertyDescriptor}
+
+import org.apache.spark.api.java.{JavaRDD, JavaSparkContext}
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, GenericRow, Row => ScalaRow}
+import org.apache.spark.sql.catalyst.types._
+import org.apache.spark.sql.parquet.ParquetRelation
+import org.apache.spark.sql.execution.{ExistingRdd, SparkLogicalPlan}
+
+/**
+ * The entry point for executing Spark SQL queries from a Java program.
+ */
+class JavaSQLContext(sparkContext: JavaSparkContext) {
+
+  val sqlContext = new SQLContext(sparkContext.sc)
+
+  /**
+   * Executes a query expressed in SQL, returning the result as a JavaSchemaRDD
+   */
+  def sql(sqlQuery: String): JavaSchemaRDD = {
+    val result = new JavaSchemaRDD(sqlContext, sqlContext.parseSql(sqlQuery))
+    // We force query optimization to happen right away instead of letting it happen lazily like
+    // when using the query DSL.  This is so DDL commands behave as expected.  This is only
+    // generates the RDD lineage for DML queries, but do not perform any execution.
+    result.queryExecution.toRdd
+    result
+  }
+
+  /**
+   * Applies a schema to an RDD of Java Beans.
+   */
+  def applySchema(rdd: JavaRDD[_], beanClass: Class[_]): JavaSchemaRDD = {
+    // TODO: All of this could probably be moved to Catalyst as it is mostly not Spark specific.
+    val beanInfo = Introspector.getBeanInfo(beanClass)
+
+    val fields = beanInfo.getPropertyDescriptors.filterNot(_.getName == "class")
+    val schema = fields.map { property =>
+      val dataType = property.getPropertyType match {
+        case c: Class[_] if c == classOf[java.lang.String] => StringType
+        case c: Class[_] if c == java.lang.Short.TYPE => ShortType
+        case c: Class[_] if c == java.lang.Integer.TYPE => IntegerType
+        case c: Class[_] if c == java.lang.Long.TYPE => LongType
+        case c: Class[_] if c == java.lang.Double.TYPE => DoubleType
+        case c: Class[_] if c == java.lang.Byte.TYPE => ByteType
+        case c: Class[_] if c == java.lang.Float.TYPE => FloatType
+        case c: Class[_] if c == java.lang.Boolean.TYPE => BooleanType
+      }
+
+      AttributeReference(property.getName, dataType, true)()
+    }
+
+    val className = beanClass.getCanonicalName
+    val rowRdd = rdd.rdd.mapPartitions { iter =>
+      // BeanInfo is not serializable so we must rediscover it remotely for each partition.
+      val localBeanInfo = Introspector.getBeanInfo(Class.forName(className))
+      val extractors =
+        localBeanInfo.getPropertyDescriptors.filterNot(_.getName == "class").map(_.getReadMethod)
+
+      iter.map { row =>
+        new GenericRow(extractors.map(e => e.invoke(row)).toArray[Any]): ScalaRow
+      }
+    }
+    new JavaSchemaRDD(sqlContext, SparkLogicalPlan(ExistingRdd(schema, rowRdd)))
+  }
+
+
+  /**
+   * Loads a parquet file, returning the result as a [[JavaSchemaRDD]].
+   */
+  def parquetFile(path: String): JavaSchemaRDD =
+    new JavaSchemaRDD(sqlContext, ParquetRelation("ParquetFile", path))
+
+
+  /**
+   * Registers the given RDD as a temporary table in the catalog.  Temporary tables exist only
+   * during the lifetime of this instance of SQLContext.
+   */
+  def registerRDDAsTable(rdd: JavaSchemaRDD, tableName: String): Unit = {
+    sqlContext.registerRDDAsTable(rdd.baseSchemaRDD, tableName)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/java/JavaSchemaRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/java/JavaSchemaRDD.scala
new file mode 100644
index 0000000000..d43d672938
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/api/java/JavaSchemaRDD.scala
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api.java
+
+import org.apache.spark.api.java.{JavaRDDLike, JavaRDD}
+import org.apache.spark.sql.{SQLContext, SchemaRDD, SchemaRDDLike}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.rdd.RDD
+
+/**
+ * An RDD of [[Row]] objects that is returned as the result of a Spark SQL query.  In addition to
+ * standard RDD operations, a JavaSchemaRDD can also be registered as a table in the JavaSQLContext
+ * that was used to create. Registering a JavaSchemaRDD allows its contents to be queried in
+ * future SQL statement.
+ *
+ * @groupname schema SchemaRDD Functions
+ * @groupprio schema -1
+ * @groupname Ungrouped Base RDD Functions
+ */
+class JavaSchemaRDD(
+     @transient val sqlContext: SQLContext,
+     @transient protected[spark] val logicalPlan: LogicalPlan)
+  extends JavaRDDLike[Row, JavaRDD[Row]]
+  with SchemaRDDLike {
+
+  private[sql] val baseSchemaRDD = new SchemaRDD(sqlContext, logicalPlan)
+
+  override val classTag = scala.reflect.classTag[Row]
+
+  override def wrapRDD(rdd: RDD[Row]): JavaRDD[Row] = JavaRDD.fromRDD(rdd)
+
+  val rdd = baseSchemaRDD.map(new Row(_))
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/java/Row.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/java/Row.scala
new file mode 100644
index 0000000000..362fe76958
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/api/java/Row.scala
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api.java
+
+import org.apache.spark.sql.catalyst.expressions.{Row => ScalaRow}
+
+/**
+ * A result row from a SparkSQL query.
+ */
+class Row(row: ScalaRow) extends Serializable {
+
+  /** Returns the number of columns present in this Row. */
+  def length: Int = row.length
+
+  /** Returns the value of column `i`. */
+  def get(i: Int): Any =
+    row(i)
+
+  /** Returns true if value at column `i` is NULL. */
+  def isNullAt(i: Int) = get(i) == null
+
+  /**
+   * Returns the value of column `i` as an int.  This function will throw an exception if the value
+   * is at `i` is not an integer, or if it is null.
+   */
+  def getInt(i: Int): Int =
+    row.getInt(i)
+
+  /**
+   * Returns the value of column `i` as a long.  This function will throw an exception if the value
+   * is at `i` is not a long, or if it is null.
+   */
+  def getLong(i: Int): Long =
+    row.getLong(i)
+
+  /**
+   * Returns the value of column `i` as a double.  This function will throw an exception if the
+   * value is at `i` is not a double, or if it is null.
+   */
+  def getDouble(i: Int): Double =
+    row.getDouble(i)
+
+  /**
+   * Returns the value of column `i` as a bool.  This function will throw an exception if the value
+   * is at `i` is not a boolean, or if it is null.
+   */
+  def getBoolean(i: Int): Boolean =
+    row.getBoolean(i)
+
+  /**
+   * Returns the value of column `i` as a short.  This function will throw an exception if the value
+   * is at `i` is not a short, or if it is null.
+   */
+  def getShort(i: Int): Short =
+    row.getShort(i)
+
+  /**
+   * Returns the value of column `i` as a byte.  This function will throw an exception if the value
+   * is at `i` is not a byte, or if it is null.
+   */
+  def getByte(i: Int): Byte =
+    row.getByte(i)
+
+  /**
+   * Returns the value of column `i` as a float.  This function will throw an exception if the value
+   * is at `i` is not a float, or if it is null.
+   */
+  def getFloat(i: Int): Float =
+    row.getFloat(i)
+
+  /**
+   * Returns the value of column `i` as a String.  This function will throw an exception if the
+   * value is at `i` is not a String.
+   */
+  def getString(i: Int): String =
+    row.getString(i)
+}
+
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/api/java/JavaSQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/api/java/JavaSQLSuite.scala
new file mode 100644
index 0000000000..def0e046a3
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/api/java/JavaSQLSuite.scala
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api.java
+
+import scala.beans.BeanProperty
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.api.java.JavaSparkContext
+import org.apache.spark.sql.test.TestSQLContext
+
+// Implicits
+import scala.collection.JavaConversions._
+
+class PersonBean extends Serializable {
+  @BeanProperty
+  var name: String = _
+
+  @BeanProperty
+  var age: Int = _
+}
+
+class JavaSQLSuite extends FunSuite {
+  val javaCtx = new JavaSparkContext(TestSQLContext.sparkContext)
+  val javaSqlCtx = new JavaSQLContext(javaCtx)
+
+  test("schema from JavaBeans") {
+    val person = new PersonBean
+    person.setName("Michael")
+    person.setAge(29)
+
+    val rdd = javaCtx.parallelize(person :: Nil)
+    val schemaRDD = javaSqlCtx.applySchema(rdd, classOf[PersonBean])
+
+    schemaRDD.registerAsTable("people")
+    javaSqlCtx.sql("SELECT * FROM people").collect()
+  }
+}
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
index 46febbfad0..ff8eaacded 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
@@ -71,6 +71,18 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
   override def executePlan(plan: LogicalPlan): this.QueryExecution =
     new this.QueryExecution { val logical = plan }
 
+  /**
+   * Executes a query expressed in HiveQL using Spark, returning the result as a SchemaRDD.
+   */
+  def hql(hqlQuery: String): SchemaRDD = {
+    val result = new SchemaRDD(this, HiveQl.parseSql(hqlQuery))
+    // We force query optimization to happen right away instead of letting it happen lazily like
+    // when using the query DSL.  This is so DDL commands behave as expected.  This is only
+    // generates the RDD lineage for DML queries, but do not perform any execution.
+    result.queryExecution.toRdd
+    result
+  }
+
   // Circular buffer to hold what hive prints to STDOUT and ERR.  Only printed when failures occur.
   @transient
   protected val outputBuffer =  new java.io.OutputStream {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/api/java/JavaHiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/api/java/JavaHiveContext.scala
new file mode 100644
index 0000000000..6df76fa825
--- /dev/null
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/api/java/JavaHiveContext.scala
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.api.java
+
+import org.apache.spark.api.java.JavaSparkContext
+import org.apache.spark.sql.api.java.{JavaSQLContext, JavaSchemaRDD}
+import org.apache.spark.sql.hive.{HiveContext, HiveQl}
+
+/**
+ * The entry point for executing Spark SQL queries from a Java program.
+ */
+class JavaHiveContext(sparkContext: JavaSparkContext) extends JavaSQLContext(sparkContext) {
+
+  override val sqlContext = new HiveContext(sparkContext)
+
+  /**
+    * Executes a query expressed in HiveQL, returning the result as a JavaSchemaRDD.
+    */
+  def hql(hqlQuery: String): JavaSchemaRDD = {
+    val result = new JavaSchemaRDD(sqlContext, HiveQl.parseSql(hqlQuery))
+    // We force query optimization to happen right away instead of letting it happen lazily like
+    // when using the query DSL.  This is so DDL commands behave as expected.  This is only
+    // generates the RDD lineage for DML queries, but do not perform any execution.
+    result.queryExecution.toRdd
+    result
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/api/java/JavaHiveSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/api/java/JavaHiveSuite.scala
new file mode 100644
index 0000000000..8137f99b22
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/api/java/JavaHiveSuite.scala
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.api.java
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.api.java.JavaSparkContext
+import org.apache.spark.sql.test.TestSQLContext
+import org.apache.spark.sql.hive.TestHive
+
+// Implicits
+import scala.collection.JavaConversions._
+
+class JavaHiveSQLSuite extends FunSuite {
+  ignore("SELECT * FROM src") {
+    val javaCtx = new JavaSparkContext(TestSQLContext.sparkContext)
+    // There is a little trickery here to avoid instantiating two HiveContexts in the same JVM
+    val javaSqlCtx = new JavaHiveContext(javaCtx) {
+      override val sqlContext = TestHive
+    }
+
+    assert(
+      javaSqlCtx.hql("SELECT * FROM src").collect().map(_.getInt(0)) ===
+        TestHive.sql("SELECT * FROM src").collect().map(_.getInt(0)).toSeq)
+  }
+}
author	Michael Armbrust <michael@databricks.com>	2014-04-03 15:45:34 -0700
committer	Matei Zaharia <matei@databricks.com>	2014-04-03 15:45:34 -0700
commit	b8f534196f9a8c99f75728a06e62282d139dee28 (patch)
tree	426e03d88695719bc58ff6f0aaa4794bd2fb40a2 /sql
parent	c1ea3afb516c204925259f0928dfb17d0fa89621 (diff)
download	spark-b8f534196f9a8c99f75728a06e62282d139dee28.tar.gz spark-b8f534196f9a8c99f75728a06e62282d139dee28.tar.bz2 spark-b8f534196f9a8c99f75728a06e62282d139dee28.zip