From 54cda0deb6bebf1470f16ba5bcc6c4fb842bdac1 Mon Sep 17 00:00:00 2001 From: Davies Liu Date: Thu, 27 Aug 2015 16:38:00 -0700 Subject: [SPARK-10321] sizeInBytes in HadoopFsRelation Having sizeInBytes in HadoopFsRelation to enable broadcast join. cc marmbrus Author: Davies Liu Closes #8490 from davies/sizeInByte. --- sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala index dff726b33f..7b030b7d73 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala @@ -518,6 +518,8 @@ abstract class HadoopFsRelation private[sql](maybePartitionSpec: Option[Partitio override def inputFiles: Array[String] = cachedLeafStatuses().map(_.getPath.toString).toArray + override def sizeInBytes: Long = cachedLeafStatuses().map(_.getLen).sum + /** * Partition columns. Can be either defined by [[userDefinedPartitionColumns]] or automatically * discovered. Note that they should always be nullable. -- cgit v1.2.3