From 5ffd5d3838da40ad408a6f40071fe6f4dcacf2a1 Mon Sep 17 00:00:00 2001 From: "Joseph K. Bradley" Date: Fri, 15 Jul 2016 13:38:23 -0700 Subject: [SPARK-14817][ML][MLLIB][DOC] Made DataFrame-based API primary in MLlib guide ## What changes were proposed in this pull request? Made DataFrame-based API primary * Spark doc menu bar and other places now link to ml-guide.html, not mllib-guide.html * mllib-guide.html keeps RDD-specific list of features, with a link at the top redirecting people to ml-guide.html * ml-guide.html includes a "maintenance mode" announcement about the RDD-based API * **Reviewers: please check this carefully** * (minor) Titles for DF API no longer include "- spark.ml" suffix. Titles for RDD API have "- RDD-based API" suffix * Moved migration guide to ml-guide from mllib-guide * Also moved past guides from mllib-migration-guides to ml-migration-guides, with a redirect link on mllib-migration-guides * **Reviewers**: I did not change any of the content of the migration guides. Reorganized DataFrame-based guide: * ml-guide.html mimics the old mllib-guide.html page in terms of content: overview, migration guide, etc. * Moved Pipeline description into ml-pipeline.html and moved tuning into ml-tuning.html * **Reviewers**: I did not change the content of these guides, except some intro text. * Sidebar remains the same, but with pipeline and tuning sections added Other: * ml-classification-regression.html: Moved text about linear methods to new section in page ## How was this patch tested? Generated docs locally Author: Joseph K. Bradley Closes #14213 from jkbradley/ml-guide-2.0. --- python/pyspark/ml/__init__.py | 4 ++-- python/pyspark/ml/tests.py | 2 +- python/pyspark/mllib/__init__.py | 5 ++++- 3 files changed, 7 insertions(+), 4 deletions(-) (limited to 'python') diff --git a/python/pyspark/ml/__init__.py b/python/pyspark/ml/__init__.py index 05f3be5f0d..1d42d49a88 100644 --- a/python/pyspark/ml/__init__.py +++ b/python/pyspark/ml/__init__.py @@ -16,8 +16,8 @@ # """ -Spark ML is a component that adds a new set of machine learning APIs to let users quickly -assemble and configure practical machine learning pipelines. +DataFrame-based machine learning APIs to let users quickly assemble and configure practical +machine learning pipelines. """ from pyspark.ml.base import Estimator, Model, Transformer from pyspark.ml.pipeline import Pipeline, PipelineModel diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py index 24efce812b..4bcb2c400c 100755 --- a/python/pyspark/ml/tests.py +++ b/python/pyspark/ml/tests.py @@ -16,7 +16,7 @@ # """ -Unit tests for Spark ML Python APIs. +Unit tests for MLlib Python DataFrame-based APIs. """ import sys if sys.version > '3': diff --git a/python/pyspark/mllib/__init__.py b/python/pyspark/mllib/__init__.py index acba3a717d..ae26521ea9 100644 --- a/python/pyspark/mllib/__init__.py +++ b/python/pyspark/mllib/__init__.py @@ -16,7 +16,10 @@ # """ -Python bindings for MLlib. +RDD-based machine learning APIs for Python (in maintenance mode). + +The `pyspark.mllib` package is in maintenance mode as of the Spark 2.0.0 release to encourage +migration to the DataFrame-based APIs under the `pyspark.ml` package. """ from __future__ import absolute_import -- cgit v1.2.3