From 14e2700de29d06460179a94cc9816bcd37344cf7 Mon Sep 17 00:00:00 2001 From: Yu ISHIKAWA Date: Thu, 25 Feb 2016 13:21:33 -0800 Subject: [SPARK-12874][ML] ML StringIndexer does not protect itself from column name duplication ## What changes were proposed in this pull request? ML StringIndexer does not protect itself from column name duplication. We should still improve a way to validate a schema of `StringIndexer` and `StringIndexerModel`. However, it would be great to fix at another issue. ## How was this patch tested? unit test Author: Yu ISHIKAWA Closes #11370 from yu-iskw/SPARK-12874. --- .../org/apache/spark/ml/feature/StringIndexerSuite.scala | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'mllib/src/test/scala') diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala index 5d199ca9b5..0dbaed2522 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala @@ -118,6 +118,17 @@ class StringIndexerSuite assert(indexerModel.transform(df).eq(df)) } + test("StringIndexerModel can't overwrite output column") { + val df = sqlContext.createDataFrame(Seq((1, 2), (3, 4))).toDF("input", "output") + val indexer = new StringIndexer() + .setInputCol("input") + .setOutputCol("output") + .fit(df) + intercept[IllegalArgumentException] { + indexer.transform(df) + } + } + test("StringIndexer read/write") { val t = new StringIndexer() .setInputCol("myInputCol") -- cgit v1.2.3