aboutsummaryrefslogtreecommitdiff
path: root/mllib/src/test
diff options
context:
space:
mode:
authorGrzegorz Chilkiewicz <grzegorz.chilkiewicz@codilime.com>2016-02-02 11:16:24 -0800
committerJoseph K. Bradley <joseph@databricks.com>2016-02-02 11:16:24 -0800
commitb1835d727234fdff42aa8cadd17ddcf43b0bed15 (patch)
tree9cd6c3ed62a9212c15bee65e48c5f228ff6e3bdf /mllib/src/test
parent358300c795025735c3b2f96c5447b1b227d4abc1 (diff)
downloadspark-b1835d727234fdff42aa8cadd17ddcf43b0bed15.tar.gz
spark-b1835d727234fdff42aa8cadd17ddcf43b0bed15.tar.bz2
spark-b1835d727234fdff42aa8cadd17ddcf43b0bed15.zip
[SPARK-12711][ML] ML StopWordsRemover does not protect itself from column name duplication
Fixes problem and verifies fix by test suite. Also - adds optional parameter: nullable (Boolean) to: SchemaUtils.appendColumn and deduplicates SchemaUtils.appendColumn functions. Author: Grzegorz Chilkiewicz <grzegorz.chilkiewicz@codilime.com> Closes #10741 from grzegorz-chilkiewicz/master.
Diffstat (limited to 'mllib/src/test')
-rw-r--r--mllib/src/test/scala/org/apache/spark/ml/feature/StopWordsRemoverSuite.scala15
1 files changed, 15 insertions, 0 deletions
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/StopWordsRemoverSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/StopWordsRemoverSuite.scala
index fb217e0c1d..a5b24c1856 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/StopWordsRemoverSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/StopWordsRemoverSuite.scala
@@ -89,4 +89,19 @@ class StopWordsRemoverSuite
.setCaseSensitive(true)
testDefaultReadWrite(t)
}
+
+ test("StopWordsRemover output column already exists") {
+ val outputCol = "expected"
+ val remover = new StopWordsRemover()
+ .setInputCol("raw")
+ .setOutputCol(outputCol)
+ val dataSet = sqlContext.createDataFrame(Seq(
+ (Seq("The", "the", "swift"), Seq("swift"))
+ )).toDF("raw", outputCol)
+
+ val thrown = intercept[IllegalArgumentException] {
+ testStopWordsRemover(remover, dataSet)
+ }
+ assert(thrown.getMessage == s"requirement failed: Column $outputCol already exists.")
+ }
}