aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
authorBryan Cutler <bjcutler@us.ibm.com>2015-12-20 09:08:23 +0000
committerSean Owen <sowen@cloudera.com>2015-12-20 09:08:23 +0000
commitce1798b3af8de326bf955b51ed955a924b019b4e (patch)
treed7f5a9b81218cf50109ad05b7adf0dc68979ad6f /mllib
parent284e29a870bbb62f59988a5d88cd12f1b0b6f9d3 (diff)
downloadspark-ce1798b3af8de326bf955b51ed955a924b019b4e.tar.gz
spark-ce1798b3af8de326bf955b51ed955a924b019b4e.tar.bz2
spark-ce1798b3af8de326bf955b51ed955a924b019b4e.zip
[SPARK-10158][PYSPARK][MLLIB] ALS better error message when using Long IDs
Added catch for casting Long to Int exception when PySpark ALS Ratings are serialized. It is easy to accidentally use Long IDs for user/product and before, it would fail with a somewhat cryptic "ClassCastException: java.lang.Long cannot be cast to java.lang.Integer." Now if this is done, a more descriptive error is shown, e.g. "PickleException: Ratings id 1205640308657491975 exceeds max integer value of 2147483647." Author: Bryan Cutler <bjcutler@us.ibm.com> Closes #9361 from BryanCutler/als-pyspark-long-id-error-SPARK-10158.
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala12
1 files changed, 11 insertions, 1 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
index 29160a10e1..f6826ddbfa 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
@@ -1438,9 +1438,19 @@ private[spark] object SerDe extends Serializable {
if (args.length != 3) {
throw new PickleException("should be 3")
}
- new Rating(args(0).asInstanceOf[Int], args(1).asInstanceOf[Int],
+ new Rating(ratingsIdCheckLong(args(0)), ratingsIdCheckLong(args(1)),
args(2).asInstanceOf[Double])
}
+
+ private def ratingsIdCheckLong(obj: Object): Int = {
+ try {
+ obj.asInstanceOf[Int]
+ } catch {
+ case ex: ClassCastException =>
+ throw new PickleException(s"Ratings id ${obj.toString} exceeds " +
+ s"max integer value of ${Int.MaxValue}", ex)
+ }
+ }
}
var initialized = false