aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
authorXusen Yin <yinxusen@gmail.com>2015-05-19 00:06:33 -0700
committerJoseph K. Bradley <joseph@databricks.com>2015-05-19 00:06:33 -0700
commit6008ec14ed6491d0a854bb50548c46f2f9709269 (patch)
tree661a44f7a89879250117e943fe4a21f051f3912d /mllib
parent23cf897112624ece19a3b5e5394cdf71b9c3c8b3 (diff)
downloadspark-6008ec14ed6491d0a854bb50548c46f2f9709269.tar.gz
spark-6008ec14ed6491d0a854bb50548c46f2f9709269.tar.bz2
spark-6008ec14ed6491d0a854bb50548c46f2f9709269.zip
[SPARK-7581] [ML] [DOC] User guide for spark.ml PolynomialExpansion
JIRA [here](https://issues.apache.org/jira/browse/SPARK-7581). CC jkbradley Author: Xusen Yin <yinxusen@gmail.com> Closes #6113 from yinxusen/SPARK-7581 and squashes the following commits: 1a7d80d [Xusen Yin] merge with master 892a8e9 [Xusen Yin] fix python 3 compatibility ec935bf [Xusen Yin] small fix 3e9fa1d [Xusen Yin] delete note 69fcf85 [Xusen Yin] simplify and add python example 81d21dc [Xusen Yin] add programming guide for Polynomial Expansion 40babfb [Xusen Yin] add java test suite for PolynomialExpansion
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/test/java/org/apache/spark/ml/feature/JavaPolynomialExpansionSuite.java91
1 files changed, 91 insertions, 0 deletions
diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaPolynomialExpansionSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaPolynomialExpansionSuite.java
new file mode 100644
index 0000000000..5e8211c2c5
--- /dev/null
+++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaPolynomialExpansionSuite.java
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.feature;
+
+import com.google.common.collect.Lists;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.mllib.linalg.Vector;
+import org.apache.spark.mllib.linalg.VectorUDT;
+import org.apache.spark.mllib.linalg.Vectors;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+
+public class JavaPolynomialExpansionSuite {
+ private transient JavaSparkContext jsc;
+ private transient SQLContext jsql;
+
+ @Before
+ public void setUp() {
+ jsc = new JavaSparkContext("local", "JavaPolynomialExpansionSuite");
+ jsql = new SQLContext(jsc);
+ }
+
+ @After
+ public void tearDown() {
+ jsc.stop();
+ jsc = null;
+ }
+
+ @Test
+ public void polynomialExpansionTest() {
+ PolynomialExpansion polyExpansion = new PolynomialExpansion()
+ .setInputCol("features")
+ .setOutputCol("polyFeatures")
+ .setDegree(3);
+
+ JavaRDD<Row> data = jsc.parallelize(Lists.newArrayList(
+ RowFactory.create(
+ Vectors.dense(-2.0, 2.3),
+ Vectors.dense(-2.0, 4.0, -8.0, 2.3, -4.6, 9.2, 5.29, -10.58, 12.17)
+ ),
+ RowFactory.create(Vectors.dense(0.0, 0.0), Vectors.dense(new double[9])),
+ RowFactory.create(
+ Vectors.dense(0.6, -1.1),
+ Vectors.dense(0.6, 0.36, 0.216, -1.1, -0.66, -0.396, 1.21, 0.726, -1.331)
+ )
+ ));
+
+ StructType schema = new StructType(new StructField[] {
+ new StructField("features", new VectorUDT(), false, Metadata.empty()),
+ new StructField("expected", new VectorUDT(), false, Metadata.empty())
+ });
+
+ DataFrame dataset = jsql.createDataFrame(data, schema);
+
+ Row[] pairs = polyExpansion.transform(dataset)
+ .select("polyFeatures", "expected")
+ .collect();
+
+ for (Row r : pairs) {
+ double[] polyFeatures = ((Vector)r.get(0)).toArray();
+ double[] expected = ((Vector)r.get(1)).toArray();
+ Assert.assertArrayEquals(polyFeatures, expected, 1e-1);
+ }
+ }
+}