aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/mllib/util.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/pyspark/mllib/util.py')
-rw-r--r--python/pyspark/mllib/util.py26
1 files changed, 12 insertions, 14 deletions
diff --git a/python/pyspark/mllib/util.py b/python/pyspark/mllib/util.py
index c5c3468eb9..16a90db146 100644
--- a/python/pyspark/mllib/util.py
+++ b/python/pyspark/mllib/util.py
@@ -15,10 +15,14 @@
# limitations under the License.
#
+import sys
import numpy as np
import warnings
-from pyspark.mllib.common import callMLlibFunc, JavaModelWrapper, inherit_doc
+if sys.version > '3':
+ xrange = range
+
+from pyspark.mllib.common import callMLlibFunc, inherit_doc
from pyspark.mllib.linalg import Vectors, SparseVector, _convert_to_vector
@@ -94,22 +98,16 @@ class MLUtils(object):
>>> from pyspark.mllib.util import MLUtils
>>> from pyspark.mllib.regression import LabeledPoint
>>> tempFile = NamedTemporaryFile(delete=True)
- >>> tempFile.write("+1 1:1.0 3:2.0 5:3.0\\n-1\\n-1 2:4.0 4:5.0 6:6.0")
+ >>> _ = tempFile.write(b"+1 1:1.0 3:2.0 5:3.0\\n-1\\n-1 2:4.0 4:5.0 6:6.0")
>>> tempFile.flush()
>>> examples = MLUtils.loadLibSVMFile(sc, tempFile.name).collect()
>>> tempFile.close()
- >>> type(examples[0]) == LabeledPoint
- True
- >>> print examples[0]
- (1.0,(6,[0,2,4],[1.0,2.0,3.0]))
- >>> type(examples[1]) == LabeledPoint
- True
- >>> print examples[1]
- (-1.0,(6,[],[]))
- >>> type(examples[2]) == LabeledPoint
- True
- >>> print examples[2]
- (-1.0,(6,[1,3,5],[4.0,5.0,6.0]))
+ >>> examples[0]
+ LabeledPoint(1.0, (6,[0,2,4],[1.0,2.0,3.0]))
+ >>> examples[1]
+ LabeledPoint(-1.0, (6,[],[]))
+ >>> examples[2]
+ LabeledPoint(-1.0, (6,[1,3,5],[4.0,5.0,6.0]))
"""
from pyspark.mllib.regression import LabeledPoint
if multiclass is not None: