aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/mllib/util.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/pyspark/mllib/util.py')
-rw-r--r--python/pyspark/mllib/util.py14
1 files changed, 6 insertions, 8 deletions
diff --git a/python/pyspark/mllib/util.py b/python/pyspark/mllib/util.py
index d94900cefd..639cda6350 100644
--- a/python/pyspark/mllib/util.py
+++ b/python/pyspark/mllib/util.py
@@ -16,6 +16,7 @@
#
import numpy as np
+import warnings
from pyspark.mllib.linalg import Vectors, SparseVector
from pyspark.mllib.regression import LabeledPoint
@@ -29,9 +30,9 @@ class MLUtils:
Helper methods to load, save and pre-process data used in MLlib.
"""
- @deprecated
@staticmethod
def _parse_libsvm_line(line, multiclass):
+ warnings.warn("deprecated", DeprecationWarning)
return _parse_libsvm_line(line)
@staticmethod
@@ -67,9 +68,9 @@ class MLUtils:
" but got " % type(v))
return " ".join(items)
- @deprecated
@staticmethod
def loadLibSVMFile(sc, path, multiclass=False, numFeatures=-1, minPartitions=None):
+ warnings.warn("deprecated", DeprecationWarning)
return loadLibSVMFile(sc, path, numFeatures, minPartitions)
@staticmethod
@@ -106,7 +107,6 @@ class MLUtils:
>>> tempFile.write("+1 1:1.0 3:2.0 5:3.0\\n-1\\n-1 2:4.0 4:5.0 6:6.0")
>>> tempFile.flush()
>>> examples = MLUtils.loadLibSVMFile(sc, tempFile.name).collect()
- >>> multiclass_examples = MLUtils.loadLibSVMFile(sc, tempFile.name).collect()
>>> tempFile.close()
>>> type(examples[0]) == LabeledPoint
True
@@ -115,20 +115,18 @@ class MLUtils:
>>> type(examples[1]) == LabeledPoint
True
>>> print examples[1]
- (0.0,(6,[],[]))
+ (-1.0,(6,[],[]))
>>> type(examples[2]) == LabeledPoint
True
>>> print examples[2]
- (0.0,(6,[1,3,5],[4.0,5.0,6.0]))
- >>> multiclass_examples[1].label
- -1.0
+ (-1.0,(6,[1,3,5],[4.0,5.0,6.0]))
"""
lines = sc.textFile(path, minPartitions)
parsed = lines.map(lambda l: MLUtils._parse_libsvm_line(l))
if numFeatures <= 0:
parsed.cache()
- numFeatures = parsed.map(lambda x: 0 if x[1].size == 0 else x[1][-1]).reduce(max) + 1
+ numFeatures = parsed.map(lambda x: -1 if x[1].size == 0 else x[1][-1]).reduce(max) + 1
return parsed.map(lambda x: LabeledPoint(x[0], Vectors.sparse(numFeatures, x[1], x[2])))
@staticmethod