aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/mllib/__init__.py
diff options
context:
space:
mode:
authorTor Myklebust <tmyklebu@gmail.com>2013-12-25 00:08:05 -0500
committerTor Myklebust <tmyklebu@gmail.com>2013-12-25 00:08:05 -0500
commit05163057a1810f0a32b722e8c93e5435240636d9 (patch)
tree861c55ac76964502cba2e7e330e3630a4d279e4b /python/pyspark/mllib/__init__.py
parent86e38c49420098da422a17e7c098efa34c94c35b (diff)
downloadspark-05163057a1810f0a32b722e8c93e5435240636d9.tar.gz
spark-05163057a1810f0a32b722e8c93e5435240636d9.tar.bz2
spark-05163057a1810f0a32b722e8c93e5435240636d9.zip
Split the mllib bindings into a whole bunch of modules and rename some things.
Diffstat (limited to 'python/pyspark/mllib/__init__.py')
-rw-r--r--python/pyspark/mllib/__init__.py46
1 files changed, 46 insertions, 0 deletions
diff --git a/python/pyspark/mllib/__init__.py b/python/pyspark/mllib/__init__.py
new file mode 100644
index 0000000000..6037a3aa63
--- /dev/null
+++ b/python/pyspark/mllib/__init__.py
@@ -0,0 +1,46 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+PySpark is the Python API for Spark.
+
+Public classes:
+
+ - L{SparkContext<pyspark.context.SparkContext>}
+ Main entry point for Spark functionality.
+ - L{RDD<pyspark.rdd.RDD>}
+ A Resilient Distributed Dataset (RDD), the basic abstraction in Spark.
+ - L{Broadcast<pyspark.broadcast.Broadcast>}
+ A broadcast variable that gets reused across tasks.
+ - L{Accumulator<pyspark.accumulators.Accumulator>}
+ An "add-only" shared variable that tasks can only add values to.
+ - L{SparkFiles<pyspark.files.SparkFiles>}
+ Access files shipped with jobs.
+ - L{StorageLevel<pyspark.storagelevel.StorageLevel>}
+ Finer-grained cache persistence levels.
+"""
+import sys
+import os
+sys.path.insert(0, os.path.join(os.environ["SPARK_HOME"], "python/lib/py4j0.7.egg"))
+
+from pyspark.mllib.regression import LinearRegressionModel, LassoModel, RidgeRegressionModel, LinearRegressionWithSGD, LassoWithSGD, RidgeRegressionWithSGD
+from pyspark.mllib.classification import LogisticRegressionModel, SVMModel, LogisticRegressionWithSGD, SVMWithSGD
+from pyspark.mllib.recommendation import MatrixFactorizationModel, ALS
+from pyspark.mllib.clustering import KMeansModel, KMeans
+
+
+__all__ = ["LinearRegressionModel", "LassoModel", "RidgeRegressionModel", "LinearRegressionWithSGD", "LassoWithSGD", "RidgeRegressionWithSGD", "LogisticRegressionModel", "SVMModel", "LogisticRegressionWithSGD", "SVMWithSGD", "MatrixFactorizationModel", "ALS", "KMeansModel", "KMeans"]