aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/ml/tests.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/pyspark/ml/tests.py')
-rwxr-xr-xpython/pyspark/ml/tests.py32
1 files changed, 32 insertions, 0 deletions
diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py
index 9d46cc3b4a..c0f0d40735 100755
--- a/python/pyspark/ml/tests.py
+++ b/python/pyspark/ml/tests.py
@@ -1097,6 +1097,38 @@ class TrainingSummaryTest(SparkSessionTestCase):
sameSummary = model.evaluate(df)
self.assertAlmostEqual(sameSummary.areaUnderROC, s.areaUnderROC)
+ def test_gaussian_mixture_summary(self):
+ data = [(Vectors.dense(1.0),), (Vectors.dense(5.0),), (Vectors.dense(10.0),),
+ (Vectors.sparse(1, [], []),)]
+ df = self.spark.createDataFrame(data, ["features"])
+ gmm = GaussianMixture(k=2)
+ model = gmm.fit(df)
+ self.assertTrue(model.hasSummary)
+ s = model.summary
+ self.assertTrue(isinstance(s.predictions, DataFrame))
+ self.assertEqual(s.probabilityCol, "probability")
+ self.assertTrue(isinstance(s.probability, DataFrame))
+ self.assertEqual(s.featuresCol, "features")
+ self.assertEqual(s.predictionCol, "prediction")
+ self.assertTrue(isinstance(s.cluster, DataFrame))
+ self.assertEqual(len(s.clusterSizes), 2)
+ self.assertEqual(s.k, 2)
+
+ def test_bisecting_kmeans_summary(self):
+ data = [(Vectors.dense(1.0),), (Vectors.dense(5.0),), (Vectors.dense(10.0),),
+ (Vectors.sparse(1, [], []),)]
+ df = self.spark.createDataFrame(data, ["features"])
+ bkm = BisectingKMeans(k=2)
+ model = bkm.fit(df)
+ self.assertTrue(model.hasSummary)
+ s = model.summary
+ self.assertTrue(isinstance(s.predictions, DataFrame))
+ self.assertEqual(s.featuresCol, "features")
+ self.assertEqual(s.predictionCol, "prediction")
+ self.assertTrue(isinstance(s.cluster, DataFrame))
+ self.assertEqual(len(s.clusterSizes), 2)
+ self.assertEqual(s.k, 2)
+
class OneVsRestTests(SparkSessionTestCase):