aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHerman van Hovell <hvanhovell@questtec.nl>2016-01-04 12:41:57 -0800
committerMichael Armbrust <michael@databricks.com>2016-01-04 12:41:57 -0800
commit0171b71e9511cef512e96a759e407207037f3c49 (patch)
tree320a7ee7e2e4e37d3a75351d5be85ea4dd366be7
parent40d03960d79debdff5cef21997417c4f8a8ce2e9 (diff)
downloadspark-0171b71e9511cef512e96a759e407207037f3c49.tar.gz
spark-0171b71e9511cef512e96a759e407207037f3c49.tar.bz2
spark-0171b71e9511cef512e96a759e407207037f3c49.zip
[SPARK-12421][SQL] Prevent Internal/External row from exposing state.
It is currently possible to change the values of the supposedly immutable ```GenericRow``` and ```GenericInternalRow``` classes. This is caused by the fact that scala's ArrayOps ```toArray``` (returned by calling ```toSeq```) will return the backing array instead of a copy. This PR fixes this problem. This PR was inspired by https://github.com/apache/spark/pull/10374 by apo1. cc apo1 sarutak marmbrus cloud-fan nongli (everyone in the previous conversation). Author: Herman van Hovell <hvanhovell@questtec.nl> Closes #10553 from hvanhovell/SPARK-12421.
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala8
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/RowTest.scala30
2 files changed, 34 insertions, 4 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala
index cfc68fc00b..814b3c22f8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala
@@ -199,9 +199,9 @@ class GenericRow(protected[sql] val values: Array[Any]) extends Row {
override def get(i: Int): Any = values(i)
- override def toSeq: Seq[Any] = values.toSeq
+ override def toSeq: Seq[Any] = values.clone()
- override def copy(): Row = this
+ override def copy(): GenericRow = this
}
class GenericRowWithSchema(values: Array[Any], override val schema: StructType)
@@ -226,11 +226,11 @@ class GenericInternalRow(private[sql] val values: Array[Any]) extends BaseGeneri
override protected def genericGet(ordinal: Int) = values(ordinal)
- override def toSeq(fieldTypes: Seq[DataType]): Seq[Any] = values
+ override def toSeq(fieldTypes: Seq[DataType]): Seq[Any] = values.clone()
override def numFields: Int = values.length
- override def copy(): InternalRow = new GenericInternalRow(values.clone())
+ override def copy(): GenericInternalRow = this
}
/**
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/RowTest.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/RowTest.scala
index 5c22a72192..72624e7cbc 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/RowTest.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RowTest.scala
@@ -104,4 +104,34 @@ class RowTest extends FunSpec with Matchers {
internalRow shouldEqual internalRow2
}
}
+
+ describe("row immutability") {
+ val values = Seq(1, 2, "3", "IV", 6L)
+ val externalRow = Row.fromSeq(values)
+ val internalRow = InternalRow.fromSeq(values)
+
+ def modifyValues(values: Seq[Any]): Seq[Any] = {
+ val array = values.toArray
+ array(2) = "42"
+ array
+ }
+
+ it("copy should return same ref for external rows") {
+ externalRow should be theSameInstanceAs externalRow.copy()
+ }
+
+ it("copy should return same ref for interal rows") {
+ internalRow should be theSameInstanceAs internalRow.copy()
+ }
+
+ it("toSeq should not expose internal state for external rows") {
+ val modifiedValues = modifyValues(externalRow.toSeq)
+ externalRow.toSeq should not equal modifiedValues
+ }
+
+ it("toSeq should not expose internal state for internal rows") {
+ val modifiedValues = modifyValues(internalRow.toSeq(Seq.empty))
+ internalRow.toSeq(Seq.empty) should not equal modifiedValues
+ }
+ }
}