From 1995c2e6482bf4af5a4be087bfc156311c1bec19 Mon Sep 17 00:00:00 2001 From: Xiangrui Meng Date: Tue, 12 Apr 2016 11:30:09 -0700 Subject: [SPARK-14563][ML] use a random table name instead of __THIS__ in SQLTransformer ## What changes were proposed in this pull request? Use a random table name instead of `__THIS__` in SQLTransformer, and add a test for `transformSchema`. The problems of using `__THIS__` are: * It doesn't work under HiveContext (in Spark 1.6) * Race conditions ## How was this patch tested? * Manual test with HiveContext. * Added a unit test for `transformSchema` to improve coverage. cc: yhuai Author: Xiangrui Meng Closes #12330 from mengxr/SPARK-14563. --- .../org/apache/spark/ml/feature/SQLTransformerSuite.scala | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'mllib/src/test/scala') diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/SQLTransformerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/SQLTransformerSuite.scala index 553e0b8702..e213e17d0d 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/SQLTransformerSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/SQLTransformerSuite.scala @@ -21,6 +21,7 @@ import org.apache.spark.SparkFunSuite import org.apache.spark.ml.param.ParamsSuite import org.apache.spark.ml.util.DefaultReadWriteTest import org.apache.spark.mllib.util.MLlibTestSparkContext +import org.apache.spark.sql.types.{LongType, StructField, StructType} class SQLTransformerSuite extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest { @@ -49,4 +50,13 @@ class SQLTransformerSuite .setStatement("select * from __THIS__") testDefaultReadWrite(t) } + + test("transformSchema") { + val df = sqlContext.range(10) + val outputSchema = new SQLTransformer() + .setStatement("SELECT id + 1 AS id1 FROM __THIS__") + .transformSchema(df.schema) + val expected = StructType(Seq(StructField("id1", LongType, nullable = false))) + assert(outputSchema === expected) + } } -- cgit v1.2.3