aboutsummaryrefslogtreecommitdiff
path: root/sql/core/src/test
diff options
context:
space:
mode:
authorReynold Xin <rxin@databricks.com>2015-10-23 13:04:06 -0700
committerYin Huai <yhuai@databricks.com>2015-10-23 13:04:06 -0700
commite1a897b657eb62e837026f7b3efafb9a6424ec4f (patch)
tree6c9553d281c65342db5ff692cbe3f4814a539eed /sql/core/src/test
parent4e38defae13b2b13e196b4d172722ef5e6266c66 (diff)
downloadspark-e1a897b657eb62e837026f7b3efafb9a6424ec4f.tar.gz
spark-e1a897b657eb62e837026f7b3efafb9a6424ec4f.tar.bz2
spark-e1a897b657eb62e837026f7b3efafb9a6424ec4f.zip
[SPARK-11274] [SQL] Text data source support for Spark SQL.
This adds API for reading and writing text files, similar to SparkContext.textFile and RDD.saveAsTextFile. ``` SQLContext.read.text("/path/to/something.txt") DataFrame.write.text("/path/to/write.txt") ``` Using the new Dataset API, this also supports ``` val ds: Dataset[String] = SQLContext.read.text("/path/to/something.txt").as[String] ``` Author: Reynold Xin <rxin@databricks.com> Closes #9240 from rxin/SPARK-11274.
Diffstat (limited to 'sql/core/src/test')
-rw-r--r--sql/core/src/test/resources/text-suite.txt4
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/TextSuite.scala81
2 files changed, 85 insertions, 0 deletions
diff --git a/sql/core/src/test/resources/text-suite.txt b/sql/core/src/test/resources/text-suite.txt
new file mode 100644
index 0000000000..e8fd967197
--- /dev/null
+++ b/sql/core/src/test/resources/text-suite.txt
@@ -0,0 +1,4 @@
+This is a test file for the text data source
+1+1
+数据砖头
+"doh"
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/TextSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/TextSuite.scala
new file mode 100644
index 0000000000..0a2306c066
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/TextSuite.scala
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.text
+
+import org.apache.spark.sql.test.SharedSQLContext
+import org.apache.spark.sql.types.{StringType, StructType}
+import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, Row}
+import org.apache.spark.util.Utils
+
+
+class TextSuite extends QueryTest with SharedSQLContext {
+
+ test("reading text file") {
+ verifyFrame(sqlContext.read.format("text").load(testFile))
+ }
+
+ test("SQLContext.read.text() API") {
+ verifyFrame(sqlContext.read.text(testFile))
+ }
+
+ test("writing") {
+ val df = sqlContext.read.text(testFile)
+
+ val tempFile = Utils.createTempDir()
+ tempFile.delete()
+ df.write.text(tempFile.getCanonicalPath)
+ verifyFrame(sqlContext.read.text(tempFile.getCanonicalPath))
+
+ Utils.deleteRecursively(tempFile)
+ }
+
+ test("error handling for invalid schema") {
+ val tempFile = Utils.createTempDir()
+ tempFile.delete()
+
+ val df = sqlContext.range(2)
+ intercept[AnalysisException] {
+ df.write.text(tempFile.getCanonicalPath)
+ }
+
+ intercept[AnalysisException] {
+ sqlContext.range(2).select(df("id"), df("id") + 1).write.text(tempFile.getCanonicalPath)
+ }
+ }
+
+ private def testFile: String = {
+ Thread.currentThread().getContextClassLoader.getResource("text-suite.txt").toString
+ }
+
+ /** Verifies data and schema. */
+ private def verifyFrame(df: DataFrame): Unit = {
+ // schema
+ assert(df.schema == new StructType().add("text", StringType))
+
+ // verify content
+ val data = df.collect()
+ assert(data(0) == Row("This is a test file for the text data source"))
+ assert(data(1) == Row("1+1"))
+ // non ascii characters are not allowed in the code, so we disable the scalastyle here.
+ // scalastyle:off
+ assert(data(2) == Row("数据砖头"))
+ // scalastyle:on
+ assert(data(3) == Row("\"doh\""))
+ assert(data.length == 4)
+ }
+}