aboutsummaryrefslogtreecommitdiff
path: root/mllib/src/main
diff options
context:
space:
mode:
authorOctavian Geagla <ogeagla@gmail.com>2015-05-07 14:49:55 -0700
committerJoseph K. Bradley <joseph@databricks.com>2015-05-07 14:50:04 -0700
commit76e58b5d8897fa62a21745d3cfb4a0efa52de1e8 (patch)
tree1f8c811f5472c207dbd738fbd6e1bb9c0aa5f21c /mllib/src/main
parent4436e26e43034c8f259cbae50775ebfe594eca2e (diff)
downloadspark-76e58b5d8897fa62a21745d3cfb4a0efa52de1e8.tar.gz
spark-76e58b5d8897fa62a21745d3cfb4a0efa52de1e8.tar.bz2
spark-76e58b5d8897fa62a21745d3cfb4a0efa52de1e8.zip
[SPARK-5726] [MLLIB] Elementwise (Hadamard) Vector Product Transformer
See https://issues.apache.org/jira/browse/SPARK-5726 Author: Octavian Geagla <ogeagla@gmail.com> Author: Joseph K. Bradley <joseph@databricks.com> Closes #4580 from ogeagla/spark-mllib-weighting and squashes the following commits: fac12ad [Octavian Geagla] [SPARK-5726] [MLLIB] Use new createTransformFunc. 90f7e39 [Joseph K. Bradley] small cleanups 4595165 [Octavian Geagla] [SPARK-5726] [MLLIB] Remove erroneous test case. ded3ac6 [Octavian Geagla] [SPARK-5726] [MLLIB] Pass style checks. 37d4705 [Octavian Geagla] [SPARK-5726] [MLLIB] Incorporated feedback. 1dffeee [Octavian Geagla] [SPARK-5726] [MLLIB] Pass style checks. e436896 [Octavian Geagla] [SPARK-5726] [MLLIB] Remove 'TF' from 'ElementwiseProductTF' cb520e6 [Octavian Geagla] [SPARK-5726] [MLLIB] Rename HadamardProduct to ElementwiseProduct 4922722 [Octavian Geagla] [SPARK-5726] [MLLIB] Hadamard Vector Product Transformer (cherry picked from commit 658a478d3f86456df09d0fbb1ba438fb36d8725c) Signed-off-by: Joseph K. Bradley <joseph@databricks.com>
Diffstat (limited to 'mllib/src/main')
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala55
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/feature/ElementwiseProduct.scala64
2 files changed, 119 insertions, 0 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala
new file mode 100644
index 0000000000..f8b56293e3
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.feature
+
+import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.ml.UnaryTransformer
+import org.apache.spark.ml.param.Param
+import org.apache.spark.mllib.feature
+import org.apache.spark.mllib.linalg.{Vector, VectorUDT}
+import org.apache.spark.sql.types.DataType
+
+/**
+ * :: AlphaComponent ::
+ * Outputs the Hadamard product (i.e., the element-wise product) of each input vector with a
+ * provided "weight" vector. In other words, it scales each column of the dataset by a scalar
+ * multiplier.
+ */
+@AlphaComponent
+class ElementwiseProduct extends UnaryTransformer[Vector, Vector, ElementwiseProduct] {
+
+ /**
+ * the vector to multiply with input vectors
+ * @group param
+ */
+ val scalingVec: Param[Vector] = new Param(this, "scalingVector", "vector for hadamard product")
+
+ /** @group setParam */
+ def setScalingVec(value: Vector): this.type = set(scalingVec, value)
+
+ /** @group getParam */
+ def getScalingVec: Vector = getOrDefault(scalingVec)
+
+ override protected def createTransformFunc: Vector => Vector = {
+ require(params.contains(scalingVec), s"transformation requires a weight vector")
+ val elemScaler = new feature.ElementwiseProduct($(scalingVec))
+ elemScaler.transform
+ }
+
+ override protected def outputDataType: DataType = new VectorUDT()
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/ElementwiseProduct.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/ElementwiseProduct.scala
new file mode 100644
index 0000000000..b0985baf9b
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/ElementwiseProduct.scala
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.feature
+
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.mllib.linalg._
+
+/**
+ * :: Experimental ::
+ * Outputs the Hadamard product (i.e., the element-wise product) of each input vector with a
+ * provided "weight" vector. In other words, it scales each column of the dataset by a scalar
+ * multiplier.
+ * @param scalingVector The values used to scale the reference vector's individual components.
+ */
+@Experimental
+class ElementwiseProduct(val scalingVector: Vector) extends VectorTransformer {
+
+ /**
+ * Does the hadamard product transformation.
+ *
+ * @param vector vector to be transformed.
+ * @return transformed vector.
+ */
+ override def transform(vector: Vector): Vector = {
+ require(vector.size == scalingVector.size,
+ s"vector sizes do not match: Expected ${scalingVector.size} but found ${vector.size}")
+ vector match {
+ case dv: DenseVector =>
+ val values: Array[Double] = dv.values.clone()
+ val dim = scalingVector.size
+ var i = 0
+ while (i < dim) {
+ values(i) *= scalingVector(i)
+ i += 1
+ }
+ Vectors.dense(values)
+ case SparseVector(size, indices, vs) =>
+ val values = vs.clone()
+ val dim = values.length
+ var i = 0
+ while (i < dim) {
+ values(i) *= scalingVector(indices(i))
+ i += 1
+ }
+ Vectors.sparse(size, indices, values)
+ case v => throw new IllegalArgumentException("Does not support vector type " + v.getClass)
+ }
+ }
+}