aboutsummaryrefslogtreecommitdiff
path: root/mllib/src/main/scala/org/apache/spark/ml/ann/LossFunction.scala
diff options
context:
space:
mode:
Diffstat (limited to 'mllib/src/main/scala/org/apache/spark/ml/ann/LossFunction.scala')
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/ann/LossFunction.scala124
1 files changed, 124 insertions, 0 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/ml/ann/LossFunction.scala b/mllib/src/main/scala/org/apache/spark/ml/ann/LossFunction.scala
new file mode 100644
index 0000000000..32d78e9b22
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/ann/LossFunction.scala
@@ -0,0 +1,124 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.ann
+
+import java.util.Random
+
+import breeze.linalg.{sum => Bsum, DenseMatrix => BDM, DenseVector => BDV}
+import breeze.numerics.{log => brzlog}
+
+/**
+ * Trait for loss function
+ */
+private[ann] trait LossFunction {
+ /**
+ * Returns the value of loss function.
+ * Computes loss based on target and output.
+ * Writes delta (error) to delta in place.
+ * Delta is allocated based on the outputSize
+ * of model implementation.
+ *
+ * @param output actual output
+ * @param target target output
+ * @param delta delta (updated in place)
+ * @return loss
+ */
+ def loss(output: BDM[Double], target: BDM[Double], delta: BDM[Double]): Double
+}
+
+private[ann] class SigmoidLayerWithSquaredError extends Layer {
+ override val weightSize = 0
+ override val inPlace = true
+
+ override def getOutputSize(inputSize: Int): Int = inputSize
+ override def createModel(weights: BDV[Double]): LayerModel =
+ new SigmoidLayerModelWithSquaredError()
+ override def initModel(weights: BDV[Double], random: Random): LayerModel =
+ new SigmoidLayerModelWithSquaredError()
+}
+
+private[ann] class SigmoidLayerModelWithSquaredError
+ extends FunctionalLayerModel(new FunctionalLayer(new SigmoidFunction)) with LossFunction {
+ override def loss(output: BDM[Double], target: BDM[Double], delta: BDM[Double]): Double = {
+ ApplyInPlace(output, target, delta, (o: Double, t: Double) => o - t)
+ val error = Bsum(delta :* delta) / 2 / output.cols
+ ApplyInPlace(delta, output, delta, (x: Double, o: Double) => x * (o - o * o))
+ error
+ }
+}
+
+private[ann] class SoftmaxLayerWithCrossEntropyLoss extends Layer {
+ override val weightSize = 0
+ override val inPlace = true
+
+ override def getOutputSize(inputSize: Int): Int = inputSize
+ override def createModel(weights: BDV[Double]): LayerModel =
+ new SoftmaxLayerModelWithCrossEntropyLoss()
+ override def initModel(weights: BDV[Double], random: Random): LayerModel =
+ new SoftmaxLayerModelWithCrossEntropyLoss()
+}
+
+private[ann] class SoftmaxLayerModelWithCrossEntropyLoss extends LayerModel with LossFunction {
+
+ // loss layer models do not have weights
+ val weights = new BDV[Double](0)
+
+ override def eval(data: BDM[Double], output: BDM[Double]): Unit = {
+ var j = 0
+ // find max value to make sure later that exponent is computable
+ while (j < data.cols) {
+ var i = 0
+ var max = Double.MinValue
+ while (i < data.rows) {
+ if (data(i, j) > max) {
+ max = data(i, j)
+ }
+ i += 1
+ }
+ var sum = 0.0
+ i = 0
+ while (i < data.rows) {
+ val res = math.exp(data(i, j) - max)
+ output(i, j) = res
+ sum += res
+ i += 1
+ }
+ i = 0
+ while (i < data.rows) {
+ output(i, j) /= sum
+ i += 1
+ }
+ j += 1
+ }
+ }
+ override def computePrevDelta(
+ nextDelta: BDM[Double],
+ input: BDM[Double],
+ delta: BDM[Double]): Unit = {
+ /* loss layer model computes delta in loss function */
+ }
+
+ override def grad(delta: BDM[Double], input: BDM[Double], cumGrad: BDV[Double]): Unit = {
+ /* loss layer model does not have weights */
+ }
+
+ override def loss(output: BDM[Double], target: BDM[Double], delta: BDM[Double]): Double = {
+ ApplyInPlace(output, target, delta, (o: Double, t: Double) => o - t)
+ -Bsum( target :* brzlog(output)) / output.cols
+ }
+}