aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
authorGrzegorz Chilkiewicz <grzegorz.chilkiewicz@codilime.com>2016-02-23 10:30:02 -0800
committerXiangrui Meng <meng@databricks.com>2016-02-23 10:30:02 -0800
commit5d69eaf097bfb9fad9f6e4433c6cd40ba0552a56 (patch)
tree8599e2ad1cdd1a1d1ad0441facefa42eb0d0c2b9 /mllib
parent4d1e5f92e10f713e52b5e2dcaa6d8982669c1988 (diff)
downloadspark-5d69eaf097bfb9fad9f6e4433c6cd40ba0552a56.tar.gz
spark-5d69eaf097bfb9fad9f6e4433c6cd40ba0552a56.tar.bz2
spark-5d69eaf097bfb9fad9f6e4433c6cd40ba0552a56.zip
[SPARK-13338][ML] Allow setting 'degree' parameter to 1 for PolynomialExpansion
Author: Grzegorz Chilkiewicz <grzegorz.chilkiewicz@codilime.com> Closes #11216 from grzegorz-chilkiewicz/master.
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala2
-rw-r--r--mllib/src/test/scala/org/apache/spark/ml/feature/PolynomialExpansionSuite.scala72
2 files changed, 41 insertions, 33 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala
index 42b26c8ee8..0a9b9719c1 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala
@@ -46,7 +46,7 @@ class PolynomialExpansion(override val uid: String)
* @group param
*/
val degree = new IntParam(this, "degree", "the polynomial degree to expand (>= 1)",
- ParamValidators.gt(1))
+ ParamValidators.gtEq(1))
setDefault(degree -> 2)
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/PolynomialExpansionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/PolynomialExpansionSuite.scala
index dfdc5792c6..86dbee1cf4 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/PolynomialExpansionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/PolynomialExpansionSuite.scala
@@ -34,22 +34,31 @@ class PolynomialExpansionSuite
ParamsSuite.checkParams(new PolynomialExpansion)
}
- test("Polynomial expansion with default parameter") {
- val data = Array(
- Vectors.sparse(3, Seq((0, -2.0), (1, 2.3))),
- Vectors.dense(-2.0, 2.3),
- Vectors.dense(0.0, 0.0, 0.0),
- Vectors.dense(0.6, -1.1, -3.0),
- Vectors.sparse(3, Seq())
- )
-
- val twoDegreeExpansion: Array[Vector] = Array(
- Vectors.sparse(9, Array(0, 1, 2, 3, 4), Array(-2.0, 4.0, 2.3, -4.6, 5.29)),
- Vectors.dense(-2.0, 4.0, 2.3, -4.6, 5.29),
- Vectors.dense(new Array[Double](9)),
- Vectors.dense(0.6, 0.36, -1.1, -0.66, 1.21, -3.0, -1.8, 3.3, 9.0),
- Vectors.sparse(9, Array.empty, Array.empty))
+ private val data = Array(
+ Vectors.sparse(3, Seq((0, -2.0), (1, 2.3))),
+ Vectors.dense(-2.0, 2.3),
+ Vectors.dense(0.0, 0.0, 0.0),
+ Vectors.dense(0.6, -1.1, -3.0),
+ Vectors.sparse(3, Seq())
+ )
+
+ private val twoDegreeExpansion: Array[Vector] = Array(
+ Vectors.sparse(9, Array(0, 1, 2, 3, 4), Array(-2.0, 4.0, 2.3, -4.6, 5.29)),
+ Vectors.dense(-2.0, 4.0, 2.3, -4.6, 5.29),
+ Vectors.dense(new Array[Double](9)),
+ Vectors.dense(0.6, 0.36, -1.1, -0.66, 1.21, -3.0, -1.8, 3.3, 9.0),
+ Vectors.sparse(9, Array.empty, Array.empty))
+
+ private val threeDegreeExpansion: Array[Vector] = Array(
+ Vectors.sparse(19, Array(0, 1, 2, 3, 4, 5, 6, 7, 8),
+ Array(-2.0, 4.0, -8.0, 2.3, -4.6, 9.2, 5.29, -10.58, 12.17)),
+ Vectors.dense(-2.0, 4.0, -8.0, 2.3, -4.6, 9.2, 5.29, -10.58, 12.17),
+ Vectors.dense(new Array[Double](19)),
+ Vectors.dense(0.6, 0.36, 0.216, -1.1, -0.66, -0.396, 1.21, 0.726, -1.331, -3.0, -1.8,
+ -1.08, 3.3, 1.98, -3.63, 9.0, 5.4, -9.9, -27.0),
+ Vectors.sparse(19, Array.empty, Array.empty))
+ test("Polynomial expansion with default parameter") {
val df = sqlContext.createDataFrame(data.zip(twoDegreeExpansion)).toDF("features", "expected")
val polynomialExpansion = new PolynomialExpansion()
@@ -67,23 +76,6 @@ class PolynomialExpansionSuite
}
test("Polynomial expansion with setter") {
- val data = Array(
- Vectors.sparse(3, Seq((0, -2.0), (1, 2.3))),
- Vectors.dense(-2.0, 2.3),
- Vectors.dense(0.0, 0.0, 0.0),
- Vectors.dense(0.6, -1.1, -3.0),
- Vectors.sparse(3, Seq())
- )
-
- val threeDegreeExpansion: Array[Vector] = Array(
- Vectors.sparse(19, Array(0, 1, 2, 3, 4, 5, 6, 7, 8),
- Array(-2.0, 4.0, -8.0, 2.3, -4.6, 9.2, 5.29, -10.58, 12.17)),
- Vectors.dense(-2.0, 4.0, -8.0, 2.3, -4.6, 9.2, 5.29, -10.58, 12.17),
- Vectors.dense(new Array[Double](19)),
- Vectors.dense(0.6, 0.36, 0.216, -1.1, -0.66, -0.396, 1.21, 0.726, -1.331, -3.0, -1.8,
- -1.08, 3.3, 1.98, -3.63, 9.0, 5.4, -9.9, -27.0),
- Vectors.sparse(19, Array.empty, Array.empty))
-
val df = sqlContext.createDataFrame(data.zip(threeDegreeExpansion)).toDF("features", "expected")
val polynomialExpansion = new PolynomialExpansion()
@@ -101,6 +93,22 @@ class PolynomialExpansionSuite
}
}
+ test("Polynomial expansion with degree 1 is identity on vectors") {
+ val df = sqlContext.createDataFrame(data.zip(data)).toDF("features", "expected")
+
+ val polynomialExpansion = new PolynomialExpansion()
+ .setInputCol("features")
+ .setOutputCol("polyFeatures")
+ .setDegree(1)
+
+ polynomialExpansion.transform(df).select("polyFeatures", "expected").collect().foreach {
+ case Row(expanded: Vector, expected: Vector) =>
+ assert(expanded ~== expected absTol 1e-1)
+ case _ =>
+ throw new TestFailedException("Unmatched data types after polynomial expansion", 0)
+ }
+ }
+
test("read/write") {
val t = new PolynomialExpansion()
.setInputCol("myInputCol")