diff options
author | Davies Liu <davies@databricks.com> | 2016-03-08 17:42:52 -0800 |
---|---|---|
committer | Michael Armbrust <michael@databricks.com> | 2016-03-08 17:42:52 -0800 |
commit | 982ef2b87e3a9e0f3f252a1a0f30970cafe58c52 (patch) | |
tree | b8c2f4129cd3eb5bf5434b124f6adaaaba8fd6d0 /sql/core/src | |
parent | d8813fa043e8b8f7cbf6921d4c7ec889634f7abd (diff) | |
download | spark-982ef2b87e3a9e0f3f252a1a0f30970cafe58c52.tar.gz spark-982ef2b87e3a9e0f3f252a1a0f30970cafe58c52.tar.bz2 spark-982ef2b87e3a9e0f3f252a1a0f30970cafe58c52.zip |
[SPARK-13750][SQL] fix sizeInBytes of HadoopFsRelation
## What changes were proposed in this pull request?
This PR fix the sizeInBytes of HadoopFsRelation.
## How was this patch tested?
Added regression test for that.
Author: Davies Liu <davies@databricks.com>
Closes #11590 from davies/fix_sizeInBytes.
Diffstat (limited to 'sql/core/src')
-rw-r--r-- | sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala | 2 | ||||
-rw-r--r-- | sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/HadoopFsRelationSuite.scala | 42 |
2 files changed, 44 insertions, 0 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala index 60b0c64c7f..e251b52f6c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala @@ -419,6 +419,8 @@ case class HadoopFsRelation( /** Returns the list of files that will be read when scanning this relation. */ override def inputFiles: Array[String] = location.allFiles().map(_.getPath.toUri.toString).toArray + + override def sizeInBytes: Long = location.allFiles().map(_.getLen).sum } /** diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/HadoopFsRelationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/HadoopFsRelationSuite.scala new file mode 100644 index 0000000000..297731c70c --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/HadoopFsRelationSuite.scala @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.datasources + +import java.io.{File, FilenameFilter} + +import org.apache.spark.sql.QueryTest +import org.apache.spark.sql.test.SharedSQLContext + +class HadoopFsRelationSuite extends QueryTest with SharedSQLContext { + + test("sizeInBytes should be the total size of all files") { + withTempDir{ dir => + dir.delete() + sqlContext.range(1000).write.parquet(dir.toString) + // ignore hidden files + val allFiles = dir.listFiles(new FilenameFilter { + override def accept(dir: File, name: String): Boolean = { + !name.startsWith(".") + } + }) + val totalSize = allFiles.map(_.length()).sum + val df = sqlContext.read.parquet(dir.toString) + assert(df.queryExecution.logical.statistics.sizeInBytes === BigInt(totalSize)) + } + } +} |