aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
authorYanbo Liang <ybliang8@gmail.com>2015-06-25 08:13:17 -0700
committerXiangrui Meng <meng@databricks.com>2015-06-25 08:13:17 -0700
commit2519dcc33bde3a6d341790d73b5d292ea7af961a (patch)
tree8dbc6ff4dc5de6a3e3589b981dc7521ca926a4d5 /python
parentf9b397f54d1c491680d70aba210bb8211fd249c1 (diff)
downloadspark-2519dcc33bde3a6d341790d73b5d292ea7af961a.tar.gz
spark-2519dcc33bde3a6d341790d73b5d292ea7af961a.tar.bz2
spark-2519dcc33bde3a6d341790d73b5d292ea7af961a.zip
[MINOR] [MLLIB] rename some functions of PythonMLLibAPI
Keep the same naming conventions for PythonMLLibAPI. Only the following three functions is different from others ```scala trainNaiveBayes trainGaussianMixture trainWord2Vec ``` So change them to ```scala trainNaiveBayesModel trainGaussianMixtureModel trainWord2VecModel ``` It does not affect any users and public APIs, only to make better understand for developer and code hacker. Author: Yanbo Liang <ybliang8@gmail.com> Closes #7011 from yanboliang/py-mllib-api-rename and squashes the following commits: 771ffec [Yanbo Liang] rename some functions of PythonMLLibAPI
Diffstat (limited to 'python')
-rw-r--r--python/pyspark/mllib/classification.py2
-rw-r--r--python/pyspark/mllib/clustering.py6
-rw-r--r--python/pyspark/mllib/feature.py2
3 files changed, 5 insertions, 5 deletions
diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py
index 2698f10d06..735d45ba03 100644
--- a/python/pyspark/mllib/classification.py
+++ b/python/pyspark/mllib/classification.py
@@ -581,7 +581,7 @@ class NaiveBayes(object):
first = data.first()
if not isinstance(first, LabeledPoint):
raise ValueError("`data` should be an RDD of LabeledPoint")
- labels, pi, theta = callMLlibFunc("trainNaiveBayes", data, lambda_)
+ labels, pi, theta = callMLlibFunc("trainNaiveBayesModel", data, lambda_)
return NaiveBayesModel(labels.toArray(), pi.toArray(), numpy.array(theta))
diff --git a/python/pyspark/mllib/clustering.py b/python/pyspark/mllib/clustering.py
index e6ef72942c..8bc0654c76 100644
--- a/python/pyspark/mllib/clustering.py
+++ b/python/pyspark/mllib/clustering.py
@@ -265,9 +265,9 @@ class GaussianMixture(object):
initialModelWeights = initialModel.weights
initialModelMu = [initialModel.gaussians[i].mu for i in range(initialModel.k)]
initialModelSigma = [initialModel.gaussians[i].sigma for i in range(initialModel.k)]
- weight, mu, sigma = callMLlibFunc("trainGaussianMixture", rdd.map(_convert_to_vector), k,
- convergenceTol, maxIterations, seed, initialModelWeights,
- initialModelMu, initialModelSigma)
+ weight, mu, sigma = callMLlibFunc("trainGaussianMixtureModel", rdd.map(_convert_to_vector),
+ k, convergenceTol, maxIterations, seed,
+ initialModelWeights, initialModelMu, initialModelSigma)
mvg_obj = [MultivariateGaussian(mu[i], sigma[i]) for i in range(k)]
return GaussianMixtureModel(weight, mvg_obj)
diff --git a/python/pyspark/mllib/feature.py b/python/pyspark/mllib/feature.py
index 334f5b86cd..f00bb93b7b 100644
--- a/python/pyspark/mllib/feature.py
+++ b/python/pyspark/mllib/feature.py
@@ -549,7 +549,7 @@ class Word2Vec(object):
"""
if not isinstance(data, RDD):
raise TypeError("data should be an RDD of list of string")
- jmodel = callMLlibFunc("trainWord2Vec", data, int(self.vectorSize),
+ jmodel = callMLlibFunc("trainWord2VecModel", data, int(self.vectorSize),
float(self.learningRate), int(self.numPartitions),
int(self.numIterations), int(self.seed),
int(self.minCount))