From f5ff4a84c4c75143086aae7d38730156bee35933 Mon Sep 17 00:00:00 2001 From: Burak Yavuz Date: Fri, 8 May 2015 11:14:39 -0700 Subject: [SPARK-7383] [ML] Feature Parity in PySpark for ml.features Implemented python wrappers for Scala functions that don't exist in `ml.features` Author: Burak Yavuz Closes #5991 from brkyvz/ml-feat-PR and squashes the following commits: adcca55 [Burak Yavuz] add regex tokenizer to __all__ b91cb44 [Burak Yavuz] addressed comments bd39fd2 [Burak Yavuz] remove addition b82bd7c [Burak Yavuz] Parity in PySpark for ml.features --- .../main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala | 2 +- mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'mllib') diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala index 63e190c8aa..9e6177ca27 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala @@ -31,7 +31,7 @@ import org.apache.spark.sql.types.DataType * which is available at [[http://en.wikipedia.org/wiki/Polynomial_expansion]], "In mathematics, an * expansion of a product of sums expresses it as a sum of products by using the fact that * multiplication distributes over addition". Take a 2-variable feature vector as an example: - * `(x, y)`, if we want to expand it with degree 2, then we get `(x, y, x * x, x * y, y * y)`. + * `(x, y)`, if we want to expand it with degree 2, then we get `(x, x * x, y, x * y, y * y)`. */ @AlphaComponent class PolynomialExpansion extends UnaryTransformer[Vector, Vector, PolynomialExpansion] { diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala index 2863b76215..649c217b16 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala @@ -42,7 +42,7 @@ class Tokenizer extends UnaryTransformer[String, Seq[String], Tokenizer] { /** * :: AlphaComponent :: - * A regex based tokenizer that extracts tokens either by repeatedly matching the regex(default) + * A regex based tokenizer that extracts tokens either by repeatedly matching the regex(default) * or using it to split the text (set matching to false). Optional parameters also allow filtering * tokens using a minimal length. * It returns an array of strings that can be empty. -- cgit v1.2.3