diff options
author | Yanbo Liang <ybliang8@gmail.com> | 2015-06-25 08:13:17 -0700 |
---|---|---|
committer | Xiangrui Meng <meng@databricks.com> | 2015-06-25 08:13:17 -0700 |
commit | 2519dcc33bde3a6d341790d73b5d292ea7af961a (patch) | |
tree | 8dbc6ff4dc5de6a3e3589b981dc7521ca926a4d5 /python/pyspark | |
parent | f9b397f54d1c491680d70aba210bb8211fd249c1 (diff) | |
download | spark-2519dcc33bde3a6d341790d73b5d292ea7af961a.tar.gz spark-2519dcc33bde3a6d341790d73b5d292ea7af961a.tar.bz2 spark-2519dcc33bde3a6d341790d73b5d292ea7af961a.zip |
[MINOR] [MLLIB] rename some functions of PythonMLLibAPI
Keep the same naming conventions for PythonMLLibAPI.
Only the following three functions is different from others
```scala
trainNaiveBayes
trainGaussianMixture
trainWord2Vec
```
So change them to
```scala
trainNaiveBayesModel
trainGaussianMixtureModel
trainWord2VecModel
```
It does not affect any users and public APIs, only to make better understand for developer and code hacker.
Author: Yanbo Liang <ybliang8@gmail.com>
Closes #7011 from yanboliang/py-mllib-api-rename and squashes the following commits:
771ffec [Yanbo Liang] rename some functions of PythonMLLibAPI
Diffstat (limited to 'python/pyspark')
-rw-r--r-- | python/pyspark/mllib/classification.py | 2 | ||||
-rw-r--r-- | python/pyspark/mllib/clustering.py | 6 | ||||
-rw-r--r-- | python/pyspark/mllib/feature.py | 2 |
3 files changed, 5 insertions, 5 deletions
diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py index 2698f10d06..735d45ba03 100644 --- a/python/pyspark/mllib/classification.py +++ b/python/pyspark/mllib/classification.py @@ -581,7 +581,7 @@ class NaiveBayes(object): first = data.first() if not isinstance(first, LabeledPoint): raise ValueError("`data` should be an RDD of LabeledPoint") - labels, pi, theta = callMLlibFunc("trainNaiveBayes", data, lambda_) + labels, pi, theta = callMLlibFunc("trainNaiveBayesModel", data, lambda_) return NaiveBayesModel(labels.toArray(), pi.toArray(), numpy.array(theta)) diff --git a/python/pyspark/mllib/clustering.py b/python/pyspark/mllib/clustering.py index e6ef72942c..8bc0654c76 100644 --- a/python/pyspark/mllib/clustering.py +++ b/python/pyspark/mllib/clustering.py @@ -265,9 +265,9 @@ class GaussianMixture(object): initialModelWeights = initialModel.weights initialModelMu = [initialModel.gaussians[i].mu for i in range(initialModel.k)] initialModelSigma = [initialModel.gaussians[i].sigma for i in range(initialModel.k)] - weight, mu, sigma = callMLlibFunc("trainGaussianMixture", rdd.map(_convert_to_vector), k, - convergenceTol, maxIterations, seed, initialModelWeights, - initialModelMu, initialModelSigma) + weight, mu, sigma = callMLlibFunc("trainGaussianMixtureModel", rdd.map(_convert_to_vector), + k, convergenceTol, maxIterations, seed, + initialModelWeights, initialModelMu, initialModelSigma) mvg_obj = [MultivariateGaussian(mu[i], sigma[i]) for i in range(k)] return GaussianMixtureModel(weight, mvg_obj) diff --git a/python/pyspark/mllib/feature.py b/python/pyspark/mllib/feature.py index 334f5b86cd..f00bb93b7b 100644 --- a/python/pyspark/mllib/feature.py +++ b/python/pyspark/mllib/feature.py @@ -549,7 +549,7 @@ class Word2Vec(object): """ if not isinstance(data, RDD): raise TypeError("data should be an RDD of list of string") - jmodel = callMLlibFunc("trainWord2Vec", data, int(self.vectorSize), + jmodel = callMLlibFunc("trainWord2VecModel", data, int(self.vectorSize), float(self.learningRate), int(self.numPartitions), int(self.numIterations), int(self.seed), int(self.minCount)) |