diff options
author | Herman van Hovell <hvanhovell@questtec.nl> | 2016-01-04 12:41:57 -0800 |
---|---|---|
committer | Michael Armbrust <michael@databricks.com> | 2016-01-04 12:41:57 -0800 |
commit | 0171b71e9511cef512e96a759e407207037f3c49 (patch) | |
tree | 320a7ee7e2e4e37d3a75351d5be85ea4dd366be7 /sql | |
parent | 40d03960d79debdff5cef21997417c4f8a8ce2e9 (diff) | |
download | spark-0171b71e9511cef512e96a759e407207037f3c49.tar.gz spark-0171b71e9511cef512e96a759e407207037f3c49.tar.bz2 spark-0171b71e9511cef512e96a759e407207037f3c49.zip |
[SPARK-12421][SQL] Prevent Internal/External row from exposing state.
It is currently possible to change the values of the supposedly immutable ```GenericRow``` and ```GenericInternalRow``` classes. This is caused by the fact that scala's ArrayOps ```toArray``` (returned by calling ```toSeq```) will return the backing array instead of a copy. This PR fixes this problem.
This PR was inspired by https://github.com/apache/spark/pull/10374 by apo1.
cc apo1 sarutak marmbrus cloud-fan nongli (everyone in the previous conversation).
Author: Herman van Hovell <hvanhovell@questtec.nl>
Closes #10553 from hvanhovell/SPARK-12421.
Diffstat (limited to 'sql')
-rw-r--r-- | sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala | 8 | ||||
-rw-r--r-- | sql/catalyst/src/test/scala/org/apache/spark/sql/RowTest.scala | 30 |
2 files changed, 34 insertions, 4 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala index cfc68fc00b..814b3c22f8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala @@ -199,9 +199,9 @@ class GenericRow(protected[sql] val values: Array[Any]) extends Row { override def get(i: Int): Any = values(i) - override def toSeq: Seq[Any] = values.toSeq + override def toSeq: Seq[Any] = values.clone() - override def copy(): Row = this + override def copy(): GenericRow = this } class GenericRowWithSchema(values: Array[Any], override val schema: StructType) @@ -226,11 +226,11 @@ class GenericInternalRow(private[sql] val values: Array[Any]) extends BaseGeneri override protected def genericGet(ordinal: Int) = values(ordinal) - override def toSeq(fieldTypes: Seq[DataType]): Seq[Any] = values + override def toSeq(fieldTypes: Seq[DataType]): Seq[Any] = values.clone() override def numFields: Int = values.length - override def copy(): InternalRow = new GenericInternalRow(values.clone()) + override def copy(): GenericInternalRow = this } /** diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/RowTest.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/RowTest.scala index 5c22a72192..72624e7cbc 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/RowTest.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RowTest.scala @@ -104,4 +104,34 @@ class RowTest extends FunSpec with Matchers { internalRow shouldEqual internalRow2 } } + + describe("row immutability") { + val values = Seq(1, 2, "3", "IV", 6L) + val externalRow = Row.fromSeq(values) + val internalRow = InternalRow.fromSeq(values) + + def modifyValues(values: Seq[Any]): Seq[Any] = { + val array = values.toArray + array(2) = "42" + array + } + + it("copy should return same ref for external rows") { + externalRow should be theSameInstanceAs externalRow.copy() + } + + it("copy should return same ref for interal rows") { + internalRow should be theSameInstanceAs internalRow.copy() + } + + it("toSeq should not expose internal state for external rows") { + val modifiedValues = modifyValues(externalRow.toSeq) + externalRow.toSeq should not equal modifiedValues + } + + it("toSeq should not expose internal state for internal rows") { + val modifiedValues = modifyValues(internalRow.toSeq(Seq.empty)) + internalRow.toSeq(Seq.empty) should not equal modifiedValues + } + } } |