[SPARK-13750][SQL] fix sizeInBytes of HadoopFsRelation

## What changes were proposed in this pull request? This PR fix the sizeInBytes of HadoopFsRelation. ## How was this patch tested? Added regression test for that. Author: Davies Liu <davies@databricks.com> Closes #11590 from davies/fix_sizeInBytes.
author: Davies Liu <davies@databricks.com> 2016-03-08 17:42:52 -0800
committer: Michael Armbrust <michael@databricks.com> 2016-03-08 17:42:52 -0800
commit: 982ef2b87e3a9e0f3f252a1a0f30970cafe58c52 (patch)
tree: b8c2f4129cd3eb5bf5434b124f6adaaaba8fd6d0
parent: d8813fa043e8b8f7cbf6921d4c7ec889634f7abd (diff)
download: spark-982ef2b87e3a9e0f3f252a1a0f30970cafe58c52.tar.gz
spark-982ef2b87e3a9e0f3f252a1a0f30970cafe58c52.tar.bz2
spark-982ef2b87e3a9e0f3f252a1a0f30970cafe58c52.zip
2 files changed, 44 insertions, 0 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
index 60b0c64c7f..e251b52f6c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
@@ -419,6 +419,8 @@ case class HadoopFsRelation(
   /** Returns the list of files that will be read when scanning this relation. */
   override def inputFiles: Array[String] =
     location.allFiles().map(_.getPath.toUri.toString).toArray
+
+  override def sizeInBytes: Long = location.allFiles().map(_.getLen).sum
 }
 
 /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/HadoopFsRelationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/HadoopFsRelationSuite.scala
new file mode 100644
index 0000000000..297731c70c
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/HadoopFsRelationSuite.scala
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources
+
+import java.io.{File, FilenameFilter}
+
+import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.test.SharedSQLContext
+
+class HadoopFsRelationSuite extends QueryTest with SharedSQLContext {
+
+  test("sizeInBytes should be the total size of all files") {
+    withTempDir{ dir =>
+      dir.delete()
+      sqlContext.range(1000).write.parquet(dir.toString)
+      // ignore hidden files
+      val allFiles = dir.listFiles(new FilenameFilter {
+        override def accept(dir: File, name: String): Boolean = {
+          !name.startsWith(".")
+        }
+      })
+      val totalSize = allFiles.map(_.length()).sum
+      val df = sqlContext.read.parquet(dir.toString)
+      assert(df.queryExecution.logical.statistics.sizeInBytes === BigInt(totalSize))
+    }
+  }
+}
author	Davies Liu <davies@databricks.com>	2016-03-08 17:42:52 -0800
committer	Michael Armbrust <michael@databricks.com>	2016-03-08 17:42:52 -0800
commit	982ef2b87e3a9e0f3f252a1a0f30970cafe58c52 (patch)
tree	b8c2f4129cd3eb5bf5434b124f6adaaaba8fd6d0
parent	d8813fa043e8b8f7cbf6921d4c7ec889634f7abd (diff)
download	spark-982ef2b87e3a9e0f3f252a1a0f30970cafe58c52.tar.gz spark-982ef2b87e3a9e0f3f252a1a0f30970cafe58c52.tar.bz2 spark-982ef2b87e3a9e0f3f252a1a0f30970cafe58c52.zip