diff options
-rw-r--r-- | sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala | 2 | ||||
-rw-r--r-- | sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/HadoopFsRelationSuite.scala | 42 |
2 files changed, 44 insertions, 0 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala index 60b0c64c7f..e251b52f6c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala @@ -419,6 +419,8 @@ case class HadoopFsRelation( /** Returns the list of files that will be read when scanning this relation. */ override def inputFiles: Array[String] = location.allFiles().map(_.getPath.toUri.toString).toArray + + override def sizeInBytes: Long = location.allFiles().map(_.getLen).sum } /** diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/HadoopFsRelationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/HadoopFsRelationSuite.scala new file mode 100644 index 0000000000..297731c70c --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/HadoopFsRelationSuite.scala @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.datasources + +import java.io.{File, FilenameFilter} + +import org.apache.spark.sql.QueryTest +import org.apache.spark.sql.test.SharedSQLContext + +class HadoopFsRelationSuite extends QueryTest with SharedSQLContext { + + test("sizeInBytes should be the total size of all files") { + withTempDir{ dir => + dir.delete() + sqlContext.range(1000).write.parquet(dir.toString) + // ignore hidden files + val allFiles = dir.listFiles(new FilenameFilter { + override def accept(dir: File, name: String): Boolean = { + !name.startsWith(".") + } + }) + val totalSize = allFiles.map(_.length()).sum + val df = sqlContext.read.parquet(dir.toString) + assert(df.queryExecution.logical.statistics.sizeInBytes === BigInt(totalSize)) + } + } +} |