diff options
author | Reynold Xin <rxin@databricks.com> | 2015-10-23 13:04:06 -0700 |
---|---|---|
committer | Yin Huai <yhuai@databricks.com> | 2015-10-23 13:04:06 -0700 |
commit | e1a897b657eb62e837026f7b3efafb9a6424ec4f (patch) | |
tree | 6c9553d281c65342db5ff692cbe3f4814a539eed /sql/core/src/test | |
parent | 4e38defae13b2b13e196b4d172722ef5e6266c66 (diff) | |
download | spark-e1a897b657eb62e837026f7b3efafb9a6424ec4f.tar.gz spark-e1a897b657eb62e837026f7b3efafb9a6424ec4f.tar.bz2 spark-e1a897b657eb62e837026f7b3efafb9a6424ec4f.zip |
[SPARK-11274] [SQL] Text data source support for Spark SQL.
This adds API for reading and writing text files, similar to SparkContext.textFile and RDD.saveAsTextFile.
```
SQLContext.read.text("/path/to/something.txt")
DataFrame.write.text("/path/to/write.txt")
```
Using the new Dataset API, this also supports
```
val ds: Dataset[String] = SQLContext.read.text("/path/to/something.txt").as[String]
```
Author: Reynold Xin <rxin@databricks.com>
Closes #9240 from rxin/SPARK-11274.
Diffstat (limited to 'sql/core/src/test')
-rw-r--r-- | sql/core/src/test/resources/text-suite.txt | 4 | ||||
-rw-r--r-- | sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/TextSuite.scala | 81 |
2 files changed, 85 insertions, 0 deletions
diff --git a/sql/core/src/test/resources/text-suite.txt b/sql/core/src/test/resources/text-suite.txt new file mode 100644 index 0000000000..e8fd967197 --- /dev/null +++ b/sql/core/src/test/resources/text-suite.txt @@ -0,0 +1,4 @@ +This is a test file for the text data source +1+1 +数据砖头 +"doh" diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/TextSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/TextSuite.scala new file mode 100644 index 0000000000..0a2306c066 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/TextSuite.scala @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.datasources.text + +import org.apache.spark.sql.test.SharedSQLContext +import org.apache.spark.sql.types.{StringType, StructType} +import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, Row} +import org.apache.spark.util.Utils + + +class TextSuite extends QueryTest with SharedSQLContext { + + test("reading text file") { + verifyFrame(sqlContext.read.format("text").load(testFile)) + } + + test("SQLContext.read.text() API") { + verifyFrame(sqlContext.read.text(testFile)) + } + + test("writing") { + val df = sqlContext.read.text(testFile) + + val tempFile = Utils.createTempDir() + tempFile.delete() + df.write.text(tempFile.getCanonicalPath) + verifyFrame(sqlContext.read.text(tempFile.getCanonicalPath)) + + Utils.deleteRecursively(tempFile) + } + + test("error handling for invalid schema") { + val tempFile = Utils.createTempDir() + tempFile.delete() + + val df = sqlContext.range(2) + intercept[AnalysisException] { + df.write.text(tempFile.getCanonicalPath) + } + + intercept[AnalysisException] { + sqlContext.range(2).select(df("id"), df("id") + 1).write.text(tempFile.getCanonicalPath) + } + } + + private def testFile: String = { + Thread.currentThread().getContextClassLoader.getResource("text-suite.txt").toString + } + + /** Verifies data and schema. */ + private def verifyFrame(df: DataFrame): Unit = { + // schema + assert(df.schema == new StructType().add("text", StringType)) + + // verify content + val data = df.collect() + assert(data(0) == Row("This is a test file for the text data source")) + assert(data(1) == Row("1+1")) + // non ascii characters are not allowed in the code, so we disable the scalastyle here. + // scalastyle:off + assert(data(2) == Row("数据砖头")) + // scalastyle:on + assert(data(3) == Row("\"doh\"")) + assert(data.length == 4) + } +} |