aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/mllib
diff options
context:
space:
mode:
authorWeichenXu <WeichenXu123@outlook.com>2016-05-23 18:14:48 -0700
committerAndrew Or <andrew@databricks.com>2016-05-23 18:14:48 -0700
commita15ca5533db91fefaf3248255a59c4d94eeda1a9 (patch)
tree80867e08b17b01d96611a9a695cbb1f01c0198f6 /python/pyspark/mllib
parent5afd927a47aa7ede3039234f2f7262e2247aa2ae (diff)
downloadspark-a15ca5533db91fefaf3248255a59c4d94eeda1a9.tar.gz
spark-a15ca5533db91fefaf3248255a59c4d94eeda1a9.tar.bz2
spark-a15ca5533db91fefaf3248255a59c4d94eeda1a9.zip
[SPARK-15464][ML][MLLIB][SQL][TESTS] Replace SQLContext and SparkContext with SparkSession using builder pattern in python test code
## What changes were proposed in this pull request? Replace SQLContext and SparkContext with SparkSession using builder pattern in python test code. ## How was this patch tested? Existing test. Author: WeichenXu <WeichenXu123@outlook.com> Closes #13242 from WeichenXu123/python_doctest_update_sparksession.
Diffstat (limited to 'python/pyspark/mllib')
-rw-r--r--python/pyspark/mllib/classification.py10
-rw-r--r--python/pyspark/mllib/evaluation.py10
-rw-r--r--python/pyspark/mllib/feature.py10
-rw-r--r--python/pyspark/mllib/fpm.py9
-rw-r--r--python/pyspark/mllib/linalg/distributed.py12
-rw-r--r--python/pyspark/mllib/random.py10
-rw-r--r--python/pyspark/mllib/regression.py10
-rw-r--r--python/pyspark/mllib/stat/_statistics.py10
-rw-r--r--python/pyspark/mllib/tree.py9
-rw-r--r--python/pyspark/mllib/util.py10
10 files changed, 70 insertions, 30 deletions
diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py
index fe5b6844bf..f186217031 100644
--- a/python/pyspark/mllib/classification.py
+++ b/python/pyspark/mllib/classification.py
@@ -756,12 +756,16 @@ class StreamingLogisticRegressionWithSGD(StreamingLinearAlgorithm):
def _test():
import doctest
- from pyspark import SparkContext
+ from pyspark.sql import SparkSession
import pyspark.mllib.classification
globs = pyspark.mllib.classification.__dict__.copy()
- globs['sc'] = SparkContext('local[4]', 'PythonTest', batchSize=2)
+ spark = SparkSession.builder\
+ .master("local[4]")\
+ .appName("mllib.classification tests")\
+ .getOrCreate()
+ globs['sc'] = spark.sparkContext
(failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
- globs['sc'].stop()
+ spark.stop()
if failure_count:
exit(-1)
diff --git a/python/pyspark/mllib/evaluation.py b/python/pyspark/mllib/evaluation.py
index 22e68ea5b4..5f32f092c7 100644
--- a/python/pyspark/mllib/evaluation.py
+++ b/python/pyspark/mllib/evaluation.py
@@ -516,12 +516,16 @@ class MultilabelMetrics(JavaModelWrapper):
def _test():
import doctest
- from pyspark import SparkContext
+ from pyspark.sql import SparkSession
import pyspark.mllib.evaluation
globs = pyspark.mllib.evaluation.__dict__.copy()
- globs['sc'] = SparkContext('local[4]', 'PythonTest')
+ spark = SparkSession.builder\
+ .master("local[4]")\
+ .appName("mllib.evaluation tests")\
+ .getOrCreate()
+ globs['sc'] = spark.sparkContext
(failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
- globs['sc'].stop()
+ spark.stop()
if failure_count:
exit(-1)
diff --git a/python/pyspark/mllib/feature.py b/python/pyspark/mllib/feature.py
index 90559f6cfb..e31c75c1e8 100644
--- a/python/pyspark/mllib/feature.py
+++ b/python/pyspark/mllib/feature.py
@@ -732,11 +732,15 @@ class ElementwiseProduct(VectorTransformer):
def _test():
import doctest
- from pyspark import SparkContext
+ from pyspark.sql import SparkSession
globs = globals().copy()
- globs['sc'] = SparkContext('local[4]', 'PythonTest', batchSize=2)
+ spark = SparkSession.builder\
+ .master("local[4]")\
+ .appName("mllib.feature tests")\
+ .getOrCreate()
+ globs['sc'] = spark.sparkContext
(failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
- globs['sc'].stop()
+ spark.stop()
if failure_count:
exit(-1)
diff --git a/python/pyspark/mllib/fpm.py b/python/pyspark/mllib/fpm.py
index f339e50891..ab4066f7d6 100644
--- a/python/pyspark/mllib/fpm.py
+++ b/python/pyspark/mllib/fpm.py
@@ -183,16 +183,21 @@ class PrefixSpan(object):
def _test():
import doctest
+ from pyspark.sql import SparkSession
import pyspark.mllib.fpm
globs = pyspark.mllib.fpm.__dict__.copy()
- globs['sc'] = SparkContext('local[4]', 'PythonTest')
+ spark = SparkSession.builder\
+ .master("local[4]")\
+ .appName("mllib.fpm tests")\
+ .getOrCreate()
+ globs['sc'] = spark.sparkContext
import tempfile
temp_path = tempfile.mkdtemp()
globs['temp_path'] = temp_path
try:
(failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
- globs['sc'].stop()
+ spark.stop()
finally:
from shutil import rmtree
try:
diff --git a/python/pyspark/mllib/linalg/distributed.py b/python/pyspark/mllib/linalg/distributed.py
index af34ce346b..ea4f27cf4f 100644
--- a/python/pyspark/mllib/linalg/distributed.py
+++ b/python/pyspark/mllib/linalg/distributed.py
@@ -1184,16 +1184,18 @@ class BlockMatrix(DistributedMatrix):
def _test():
import doctest
- from pyspark import SparkContext
- from pyspark.sql import SQLContext
+ from pyspark.sql import SparkSession
from pyspark.mllib.linalg import Matrices
import pyspark.mllib.linalg.distributed
globs = pyspark.mllib.linalg.distributed.__dict__.copy()
- globs['sc'] = SparkContext('local[2]', 'PythonTest', batchSize=2)
- globs['sqlContext'] = SQLContext(globs['sc'])
+ spark = SparkSession.builder\
+ .master("local[2]")\
+ .appName("mllib.linalg.distributed tests")\
+ .getOrCreate()
+ globs['sc'] = spark.sparkContext
globs['Matrices'] = Matrices
(failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
- globs['sc'].stop()
+ spark.stop()
if failure_count:
exit(-1)
diff --git a/python/pyspark/mllib/random.py b/python/pyspark/mllib/random.py
index 6a3c643b66..61213ddf62 100644
--- a/python/pyspark/mllib/random.py
+++ b/python/pyspark/mllib/random.py
@@ -409,13 +409,17 @@ class RandomRDDs(object):
def _test():
import doctest
- from pyspark.context import SparkContext
+ from pyspark.sql import SparkSession
globs = globals().copy()
# The small batch size here ensures that we see multiple batches,
# even in these small test examples:
- globs['sc'] = SparkContext('local[2]', 'PythonTest', batchSize=2)
+ spark = SparkSession.builder\
+ .master("local[2]")\
+ .appName("mllib.random tests")\
+ .getOrCreate()
+ globs['sc'] = spark.sparkContext
(failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
- globs['sc'].stop()
+ spark.stop()
if failure_count:
exit(-1)
diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py
index 639c5eabaa..43d9072a24 100644
--- a/python/pyspark/mllib/regression.py
+++ b/python/pyspark/mllib/regression.py
@@ -824,12 +824,16 @@ class StreamingLinearRegressionWithSGD(StreamingLinearAlgorithm):
def _test():
import doctest
- from pyspark import SparkContext
+ from pyspark.sql import SparkSession
import pyspark.mllib.regression
globs = pyspark.mllib.regression.__dict__.copy()
- globs['sc'] = SparkContext('local[2]', 'PythonTest', batchSize=2)
+ spark = SparkSession.builder\
+ .master("local[2]")\
+ .appName("mllib.regression tests")\
+ .getOrCreate()
+ globs['sc'] = spark.sparkContext
(failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
- globs['sc'].stop()
+ spark.stop()
if failure_count:
exit(-1)
diff --git a/python/pyspark/mllib/stat/_statistics.py b/python/pyspark/mllib/stat/_statistics.py
index 36c8f48a4a..b0a85240b2 100644
--- a/python/pyspark/mllib/stat/_statistics.py
+++ b/python/pyspark/mllib/stat/_statistics.py
@@ -306,11 +306,15 @@ class Statistics(object):
def _test():
import doctest
- from pyspark import SparkContext
+ from pyspark.sql import SparkSession
globs = globals().copy()
- globs['sc'] = SparkContext('local[4]', 'PythonTest', batchSize=2)
+ spark = SparkSession.builder\
+ .master("local[4]")\
+ .appName("mllib.stat.statistics tests")\
+ .getOrCreate()
+ globs['sc'] = spark.sparkContext
(failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
- globs['sc'].stop()
+ spark.stop()
if failure_count:
exit(-1)
diff --git a/python/pyspark/mllib/tree.py b/python/pyspark/mllib/tree.py
index f7ea466b43..8be76fcefe 100644
--- a/python/pyspark/mllib/tree.py
+++ b/python/pyspark/mllib/tree.py
@@ -657,9 +657,14 @@ class GradientBoostedTrees(object):
def _test():
import doctest
globs = globals().copy()
- globs['sc'] = SparkContext('local[4]', 'PythonTest', batchSize=2)
+ from pyspark.sql import SparkSession
+ spark = SparkSession.builder\
+ .master("local[4]")\
+ .appName("mllib.tree tests")\
+ .getOrCreate()
+ globs['sc'] = spark.sparkContext
(failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
- globs['sc'].stop()
+ spark.stop()
if failure_count:
exit(-1)
diff --git a/python/pyspark/mllib/util.py b/python/pyspark/mllib/util.py
index 39bc6586dd..a316ee1ad4 100644
--- a/python/pyspark/mllib/util.py
+++ b/python/pyspark/mllib/util.py
@@ -347,13 +347,17 @@ class LinearDataGenerator(object):
def _test():
import doctest
- from pyspark.context import SparkContext
+ from pyspark.sql import SparkSession
globs = globals().copy()
# The small batch size here ensures that we see multiple batches,
# even in these small test examples:
- globs['sc'] = SparkContext('local[2]', 'PythonTest', batchSize=2)
+ spark = SparkSession.builder\
+ .master("local[2]")\
+ .appName("mllib.util tests")\
+ .getOrCreate()
+ globs['sc'] = spark.sparkContext
(failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
- globs['sc'].stop()
+ spark.stop()
if failure_count:
exit(-1)