aboutsummaryrefslogtreecommitdiff
path: root/sql/hive
diff options
context:
space:
mode:
authorhyukjinkwon <gurwls223@gmail.com>2017-02-27 14:33:02 -0800
committerWenchen Fan <wenchen@databricks.com>2017-02-27 14:33:02 -0800
commit8a5a58506c35f35f41cd1366ee693abec2916153 (patch)
treee683da358468c1948b7d7b0415ab5962c78c3bf9 /sql/hive
parent4ba9c6c453606f5e5a1e324d5f933d2c9307a604 (diff)
downloadspark-8a5a58506c35f35f41cd1366ee693abec2916153.tar.gz
spark-8a5a58506c35f35f41cd1366ee693abec2916153.tar.bz2
spark-8a5a58506c35f35f41cd1366ee693abec2916153.zip
[SPARK-15615][SQL][BUILD][FOLLOW-UP] Replace deprecated usage of json(RDD[String]) API
## What changes were proposed in this pull request? This PR proposes to replace the deprecated `json(RDD[String])` usage to `json(Dataset[String])`. This currently produces so many warnings. ## How was this patch tested? Fixed tests. Author: hyukjinkwon <gurwls223@gmail.com> Closes #17071 from HyukjinKwon/SPARK-15615-followup.
Diffstat (limited to 'sql/hive')
-rw-r--r--sql/hive/src/test/java/org/apache/spark/sql/hive/JavaDataFrameSuite.java6
-rw-r--r--sql/hive/src/test/java/org/apache/spark/sql/hive/JavaMetastoreDataSourcesSuite.java6
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala4
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala5
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala8
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala27
6 files changed, 26 insertions, 30 deletions
diff --git a/sql/hive/src/test/java/org/apache/spark/sql/hive/JavaDataFrameSuite.java b/sql/hive/src/test/java/org/apache/spark/sql/hive/JavaDataFrameSuite.java
index f664d5a4cd..aefc9cc77d 100644
--- a/sql/hive/src/test/java/org/apache/spark/sql/hive/JavaDataFrameSuite.java
+++ b/sql/hive/src/test/java/org/apache/spark/sql/hive/JavaDataFrameSuite.java
@@ -26,7 +26,6 @@ import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
-import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.*;
import org.apache.spark.sql.expressions.Window;
import org.apache.spark.sql.expressions.UserDefinedAggregateFunction;
@@ -35,7 +34,6 @@ import org.apache.spark.sql.hive.test.TestHive$;
import org.apache.spark.sql.hive.aggregate.MyDoubleSum;
public class JavaDataFrameSuite {
- private transient JavaSparkContext sc;
private transient SQLContext hc;
Dataset<Row> df;
@@ -50,13 +48,11 @@ public class JavaDataFrameSuite {
@Before
public void setUp() throws IOException {
hc = TestHive$.MODULE$;
- sc = new JavaSparkContext(hc.sparkContext());
-
List<String> jsonObjects = new ArrayList<>(10);
for (int i = 0; i < 10; i++) {
jsonObjects.add("{\"key\":" + i + ", \"value\":\"str" + i + "\"}");
}
- df = hc.read().json(sc.parallelize(jsonObjects));
+ df = hc.read().json(hc.createDataset(jsonObjects, Encoders.STRING()));
df.createOrReplaceTempView("window_table");
}
diff --git a/sql/hive/src/test/java/org/apache/spark/sql/hive/JavaMetastoreDataSourcesSuite.java b/sql/hive/src/test/java/org/apache/spark/sql/hive/JavaMetastoreDataSourcesSuite.java
index 061c7431a6..0b157a45e6 100644
--- a/sql/hive/src/test/java/org/apache/spark/sql/hive/JavaMetastoreDataSourcesSuite.java
+++ b/sql/hive/src/test/java/org/apache/spark/sql/hive/JavaMetastoreDataSourcesSuite.java
@@ -31,9 +31,9 @@ import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
-import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.QueryTest$;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SQLContext;
@@ -81,8 +81,8 @@ public class JavaMetastoreDataSourcesSuite {
for (int i = 0; i < 10; i++) {
jsonObjects.add("{\"a\":" + i + ", \"b\":\"str" + i + "\"}");
}
- JavaRDD<String> rdd = sc.parallelize(jsonObjects);
- df = sqlContext.read().json(rdd);
+ Dataset<String> ds = sqlContext.createDataset(jsonObjects, Encoders.STRING());
+ df = sqlContext.read().json(ds);
df.createOrReplaceTempView("jsonTable");
}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
index e951bbe1dc..03ea0c8c77 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
@@ -511,9 +511,9 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
test("create external table") {
withTempPath { tempPath =>
withTable("savedJsonTable", "createdJsonTable") {
- val df = read.json(sparkContext.parallelize((1 to 10).map { i =>
+ val df = read.json((1 to 10).map { i =>
s"""{ "a": $i, "b": "str$i" }"""
- }))
+ }.toDS())
withSQLConf(SQLConf.DEFAULT_DATA_SOURCE_NAME.key -> "not a source name") {
df.write
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala
index cfca1d7983..8a37bc3665 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.test.SQLTestUtils
* A set of tests that validates support for Hive Explain command.
*/
class HiveExplainSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
+ import testImplicits._
test("show cost in explain command") {
// Only has sizeInBytes before ANALYZE command
@@ -92,8 +93,8 @@ class HiveExplainSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
test("SPARK-17409: The EXPLAIN output of CTAS only shows the analyzed plan") {
withTempView("jt") {
- val rdd = sparkContext.parallelize((1 to 10).map(i => s"""{"a":$i, "b":"str$i"}"""))
- spark.read.json(rdd).createOrReplaceTempView("jt")
+ val ds = (1 to 10).map(i => s"""{"a":$i, "b":"str$i"}""").toDS()
+ spark.read.json(ds).createOrReplaceTempView("jt")
val outputs = sql(
s"""
|EXPLAIN EXTENDED
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala
index b2f19d7753..ce92fbf349 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala
@@ -31,15 +31,15 @@ case class Data(a: Int, B: Int, n: Nested, nestedArray: Seq[Nested])
class HiveResolutionSuite extends HiveComparisonTest {
test("SPARK-3698: case insensitive test for nested data") {
- read.json(sparkContext.makeRDD(
- """{"a": [{"a": {"a": 1}}]}""" :: Nil)).createOrReplaceTempView("nested")
+ read.json(Seq("""{"a": [{"a": {"a": 1}}]}""").toDS())
+ .createOrReplaceTempView("nested")
// This should be successfully analyzed
sql("SELECT a[0].A.A from nested").queryExecution.analyzed
}
test("SPARK-5278: check ambiguous reference to fields") {
- read.json(sparkContext.makeRDD(
- """{"a": [{"b": 1, "B": 2}]}""" :: Nil)).createOrReplaceTempView("nested")
+ read.json(Seq("""{"a": [{"b": 1, "B": 2}]}""").toDS())
+ .createOrReplaceTempView("nested")
// there are 2 filed matching field name "b", we should report Ambiguous reference error
val exception = intercept[AnalysisException] {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index faed8b5046..9f6176339e 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -973,30 +973,30 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
}
test("SPARK-4296 Grouping field with Hive UDF as sub expression") {
- val rdd = sparkContext.makeRDD( """{"a": "str", "b":"1", "c":"1970-01-01 00:00:00"}""" :: Nil)
- read.json(rdd).createOrReplaceTempView("data")
+ val ds = Seq("""{"a": "str", "b":"1", "c":"1970-01-01 00:00:00"}""").toDS()
+ read.json(ds).createOrReplaceTempView("data")
checkAnswer(
sql("SELECT concat(a, '-', b), year(c) FROM data GROUP BY concat(a, '-', b), year(c)"),
Row("str-1", 1970))
dropTempTable("data")
- read.json(rdd).createOrReplaceTempView("data")
+ read.json(ds).createOrReplaceTempView("data")
checkAnswer(sql("SELECT year(c) + 1 FROM data GROUP BY year(c) + 1"), Row(1971))
dropTempTable("data")
}
test("resolve udtf in projection #1") {
- val rdd = sparkContext.makeRDD((1 to 5).map(i => s"""{"a":[$i, ${i + 1}]}"""))
- read.json(rdd).createOrReplaceTempView("data")
+ val ds = (1 to 5).map(i => s"""{"a":[$i, ${i + 1}]}""").toDS()
+ read.json(ds).createOrReplaceTempView("data")
val df = sql("SELECT explode(a) AS val FROM data")
val col = df("val")
}
test("resolve udtf in projection #2") {
- val rdd = sparkContext.makeRDD((1 to 2).map(i => s"""{"a":[$i, ${i + 1}]}"""))
- read.json(rdd).createOrReplaceTempView("data")
+ val ds = (1 to 2).map(i => s"""{"a":[$i, ${i + 1}]}""").toDS()
+ read.json(ds).createOrReplaceTempView("data")
checkAnswer(sql("SELECT explode(map(1, 1)) FROM data LIMIT 1"), Row(1, 1) :: Nil)
checkAnswer(sql("SELECT explode(map(1, 1)) as (k1, k2) FROM data LIMIT 1"), Row(1, 1) :: Nil)
intercept[AnalysisException] {
@@ -1010,8 +1010,8 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
// TGF with non-TGF in project is allowed in Spark SQL, but not in Hive
test("TGF with non-TGF in projection") {
- val rdd = sparkContext.makeRDD( """{"a": "1", "b":"1"}""" :: Nil)
- read.json(rdd).createOrReplaceTempView("data")
+ val ds = Seq("""{"a": "1", "b":"1"}""").toDS()
+ read.json(ds).createOrReplaceTempView("data")
checkAnswer(
sql("SELECT explode(map(a, b)) as (k1, k2), a, b FROM data"),
Row("1", "1", "1", "1") :: Nil)
@@ -1024,8 +1024,8 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
// is not in a valid state (cannot be executed). Because of this bug, the analysis rule of
// PreInsertionCasts will actually start to work before ImplicitGenerate and then
// generates an invalid query plan.
- val rdd = sparkContext.makeRDD((1 to 5).map(i => s"""{"a":[$i, ${i + 1}]}"""))
- read.json(rdd).createOrReplaceTempView("data")
+ val ds = (1 to 5).map(i => s"""{"a":[$i, ${i + 1}]}""").toDS()
+ read.json(ds).createOrReplaceTempView("data")
withSQLConf(SQLConf.CONVERT_CTAS.key -> "false") {
sql("CREATE TABLE explodeTest (key bigInt)")
@@ -1262,9 +1262,8 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
}
test("SPARK-9371: fix the support for special chars in column names for hive context") {
- read.json(sparkContext.makeRDD(
- """{"a": {"c.b": 1}, "b.$q": [{"a@!.q": 1}], "q.w": {"w.i&": [1]}}""" :: Nil))
- .createOrReplaceTempView("t")
+ val ds = Seq("""{"a": {"c.b": 1}, "b.$q": [{"a@!.q": 1}], "q.w": {"w.i&": [1]}}""").toDS()
+ read.json(ds).createOrReplaceTempView("t")
checkAnswer(sql("SELECT a.`c.b`, `b.$q`[0].`a@!.q`, `q.w`.`w.i&`[0] FROM t"), Row(1, 1, 1))
}