aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorXiangrui Meng <meng@databricks.com>2015-03-28 15:08:05 -0700
committerJoseph K. Bradley <joseph@databricks.com>2015-03-28 15:08:05 -0700
commitf75f633b21faaf911f04aeff847f25749b1ecd89 (patch)
tree3cfa55e9e403283b4a10b2c5c56c4ca9e5289c5b
parent99631438c0ec777d6a77974b148dbbd3e890260e (diff)
downloadspark-f75f633b21faaf911f04aeff847f25749b1ecd89.tar.gz
spark-f75f633b21faaf911f04aeff847f25749b1ecd89.tar.bz2
spark-f75f633b21faaf911f04aeff847f25749b1ecd89.zip
[SPARK-6571][MLLIB] use wrapper in MatrixFactorizationModel.load
This fixes `predictAll` after load. jkbradley Author: Xiangrui Meng <meng@databricks.com> Closes #5243 from mengxr/SPARK-6571 and squashes the following commits: 82dcaa7 [Xiangrui Meng] use wrapper in MatrixFactorizationModel.load
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/api/python/MatrixFactorizationModelWrapper.scala40
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala18
-rw-r--r--python/pyspark/mllib/recommendation.py8
3 files changed, 48 insertions, 18 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/MatrixFactorizationModelWrapper.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/MatrixFactorizationModelWrapper.scala
new file mode 100644
index 0000000000..ecd3b16598
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/MatrixFactorizationModelWrapper.scala
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.api.python
+
+import org.apache.spark.api.java.JavaRDD
+import org.apache.spark.mllib.recommendation.{MatrixFactorizationModel, Rating}
+import org.apache.spark.rdd.RDD
+
+/**
+ * A Wrapper of MatrixFactorizationModel to provide helper method for Python.
+ */
+private[python] class MatrixFactorizationModelWrapper(model: MatrixFactorizationModel)
+ extends MatrixFactorizationModel(model.rank, model.userFeatures, model.productFeatures) {
+
+ def predict(userAndProducts: JavaRDD[Array[Any]]): RDD[Rating] =
+ predict(SerDe.asTupleRDD(userAndProducts.rdd))
+
+ def getUserFeatures: RDD[Array[Any]] = {
+ SerDe.fromTuple2RDD(userFeatures.asInstanceOf[RDD[(Any, Any)]])
+ }
+
+ def getProductFeatures: RDD[Array[Any]] = {
+ SerDe.fromTuple2RDD(productFeatures.asInstanceOf[RDD[(Any, Any)]])
+ }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
index e391567347..22fa684fd2 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
@@ -58,7 +58,6 @@ import org.apache.spark.util.Utils
*/
private[python] class PythonMLLibAPI extends Serializable {
-
/**
* Loads and serializes labeled points saved with `RDD#saveAsTextFile`.
* @param jsc Java SparkContext
@@ -346,24 +345,7 @@ private[python] class PythonMLLibAPI extends Serializable {
model.predictSoft(data)
}
- /**
- * A Wrapper of MatrixFactorizationModel to provide helpfer method for Python
- */
- private[python] class MatrixFactorizationModelWrapper(model: MatrixFactorizationModel)
- extends MatrixFactorizationModel(model.rank, model.userFeatures, model.productFeatures) {
- def predict(userAndProducts: JavaRDD[Array[Any]]): RDD[Rating] =
- predict(SerDe.asTupleRDD(userAndProducts.rdd))
-
- def getUserFeatures: RDD[Array[Any]] = {
- SerDe.fromTuple2RDD(userFeatures.asInstanceOf[RDD[(Any, Any)]])
- }
-
- def getProductFeatures: RDD[Array[Any]] = {
- SerDe.fromTuple2RDD(productFeatures.asInstanceOf[RDD[(Any, Any)]])
- }
-
- }
/**
* Java stub for Python mllib ALS.train(). This stub returns a handle
diff --git a/python/pyspark/mllib/recommendation.py b/python/pyspark/mllib/recommendation.py
index 1a4527b12c..b094e50856 100644
--- a/python/pyspark/mllib/recommendation.py
+++ b/python/pyspark/mllib/recommendation.py
@@ -90,6 +90,8 @@ class MatrixFactorizationModel(JavaModelWrapper, JavaSaveable, JavaLoader):
>>> sameModel = MatrixFactorizationModel.load(sc, path)
>>> sameModel.predict(2,2)
0.43...
+ >>> sameModel.predictAll(testset).collect()
+ [Rating(...
>>> try:
... os.removedirs(path)
... except OSError:
@@ -111,6 +113,12 @@ class MatrixFactorizationModel(JavaModelWrapper, JavaSaveable, JavaLoader):
def productFeatures(self):
return self.call("getProductFeatures")
+ @classmethod
+ def load(cls, sc, path):
+ model = cls._load_java(sc, path)
+ wrapper = sc._jvm.MatrixFactorizationModelWrapper(model)
+ return MatrixFactorizationModel(wrapper)
+
class ALS(object):