[SPARK-5726] [MLLIB] Elementwise (Hadamard) Vector Product Transformer

See https://issues.apache.org/jira/browse/SPARK-5726 Author: Octavian Geagla <ogeagla@gmail.com> Author: Joseph K. Bradley <joseph@databricks.com> Closes #4580 from ogeagla/spark-mllib-weighting and squashes the following commits: fac12ad [Octavian Geagla] [SPARK-5726] [MLLIB] Use new createTransformFunc. 90f7e39 [Joseph K. Bradley] small cleanups 4595165 [Octavian Geagla] [SPARK-5726] [MLLIB] Remove erroneous test case. ded3ac6 [Octavian Geagla] [SPARK-5726] [MLLIB] Pass style checks. 37d4705 [Octavian Geagla] [SPARK-5726] [MLLIB] Incorporated feedback. 1dffeee [Octavian Geagla] [SPARK-5726] [MLLIB] Pass style checks. e436896 [Octavian Geagla] [SPARK-5726] [MLLIB] Remove 'TF' from 'ElementwiseProductTF' cb520e6 [Octavian Geagla] [SPARK-5726] [MLLIB] Rename HadamardProduct to ElementwiseProduct 4922722 [Octavian Geagla] [SPARK-5726] [MLLIB] Hadamard Vector Product Transformer (cherry picked from commit 658a478d3f86456df09d0fbb1ba438fb36d8725c) Signed-off-by: Joseph K. Bradley <joseph@databricks.com>
author: Octavian Geagla <ogeagla@gmail.com> 2015-05-07 14:49:55 -0700
committer: Joseph K. Bradley <joseph@databricks.com> 2015-05-07 14:50:04 -0700
commit: 76e58b5d8897fa62a21745d3cfb4a0efa52de1e8 (patch)
tree: 1f8c811f5472c207dbd738fbd6e1bb9c0aa5f21c /mllib/src/main
parent: 4436e26e43034c8f259cbae50775ebfe594eca2e (diff)
download: spark-76e58b5d8897fa62a21745d3cfb4a0efa52de1e8.tar.gz
spark-76e58b5d8897fa62a21745d3cfb4a0efa52de1e8.tar.bz2
spark-76e58b5d8897fa62a21745d3cfb4a0efa52de1e8.zip
2 files changed, 119 insertions, 0 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala
new file mode 100644
index 0000000000..f8b56293e3
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.feature
+
+import org.apache.spark.annotation.AlphaComponent
+import org.apache.spark.ml.UnaryTransformer
+import org.apache.spark.ml.param.Param
+import org.apache.spark.mllib.feature
+import org.apache.spark.mllib.linalg.{Vector, VectorUDT}
+import org.apache.spark.sql.types.DataType
+
+/**
+ * :: AlphaComponent ::
+ * Outputs the Hadamard product (i.e., the element-wise product) of each input vector with a
+ * provided "weight" vector.  In other words, it scales each column of the dataset by a scalar
+ * multiplier.
+ */
+@AlphaComponent
+class ElementwiseProduct extends UnaryTransformer[Vector, Vector, ElementwiseProduct] {
+
+  /**
+    * the vector to multiply with input vectors
+    * @group param
+    */
+  val scalingVec: Param[Vector] = new Param(this, "scalingVector", "vector for hadamard product")
+
+  /** @group setParam */
+  def setScalingVec(value: Vector): this.type = set(scalingVec, value)
+
+  /** @group getParam */
+  def getScalingVec: Vector = getOrDefault(scalingVec)
+
+  override protected def createTransformFunc: Vector => Vector = {
+    require(params.contains(scalingVec), s"transformation requires a weight vector")
+    val elemScaler = new feature.ElementwiseProduct($(scalingVec))
+    elemScaler.transform
+  }
+
+  override protected def outputDataType: DataType = new VectorUDT()
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/ElementwiseProduct.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/ElementwiseProduct.scala
new file mode 100644
index 0000000000..b0985baf9b
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/ElementwiseProduct.scala
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.feature
+
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.mllib.linalg._
+
+/**
+ * :: Experimental ::
+ * Outputs the Hadamard product (i.e., the element-wise product) of each input vector with a
+ * provided "weight" vector. In other words, it scales each column of the dataset by a scalar
+ * multiplier.
+ * @param scalingVector The values used to scale the reference vector's individual components.
+ */
+@Experimental
+class ElementwiseProduct(val scalingVector: Vector) extends VectorTransformer {
+
+  /**
+   * Does the hadamard product transformation.
+   *
+   * @param vector vector to be transformed.
+   * @return transformed vector.
+   */
+  override def transform(vector: Vector): Vector = {
+    require(vector.size == scalingVector.size,
+      s"vector sizes do not match: Expected ${scalingVector.size} but found ${vector.size}")
+    vector match {
+      case dv: DenseVector =>
+        val values: Array[Double] = dv.values.clone()
+        val dim = scalingVector.size
+        var i = 0
+        while (i < dim) {
+          values(i) *= scalingVector(i)
+          i += 1
+        }
+        Vectors.dense(values)
+      case SparseVector(size, indices, vs) =>
+        val values = vs.clone()
+        val dim = values.length
+        var i = 0
+        while (i < dim) {
+          values(i) *= scalingVector(indices(i))
+          i += 1
+        }
+        Vectors.sparse(size, indices, values)
+      case v => throw new IllegalArgumentException("Does not support vector type " + v.getClass)
+    }
+  }
+}
author	Octavian Geagla <ogeagla@gmail.com>	2015-05-07 14:49:55 -0700
committer	Joseph K. Bradley <joseph@databricks.com>	2015-05-07 14:50:04 -0700
commit	76e58b5d8897fa62a21745d3cfb4a0efa52de1e8 (patch)
tree	1f8c811f5472c207dbd738fbd6e1bb9c0aa5f21c /mllib/src/main
parent	4436e26e43034c8f259cbae50775ebfe594eca2e (diff)
download	spark-76e58b5d8897fa62a21745d3cfb4a0efa52de1e8.tar.gz spark-76e58b5d8897fa62a21745d3cfb4a0efa52de1e8.tar.bz2 spark-76e58b5d8897fa62a21745d3cfb4a0efa52de1e8.zip