From 81464f2a8243c6ae2a39bac7ebdc50d4f60af451 Mon Sep 17 00:00:00 2001 From: Xiangrui Meng Date: Thu, 30 Jul 2015 09:45:17 -0700 Subject: [MINOR] [MLLIB] fix doc for RegexTokenizer This is #7791 for Python. hhbyyh Author: Xiangrui Meng Closes #7798 from mengxr/regex-tok-py and squashes the following commits: baa2dcd [Xiangrui Meng] fix doc for RegexTokenizer --- python/pyspark/ml/feature.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'python/pyspark/ml/feature.py') diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py index 86e654dd07..015e7a9d49 100644 --- a/python/pyspark/ml/feature.py +++ b/python/pyspark/ml/feature.py @@ -525,7 +525,7 @@ class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol): """ A regex based tokenizer that extracts tokens either by using the provided regex pattern (in Java dialect) to split the text - (default) or repeatedly matching the regex (if gaps is true). + (default) or repeatedly matching the regex (if gaps is false). Optional parameters also allow filtering tokens using a minimal length. It returns an array of strings that can be empty. -- cgit v1.2.3