From 922338812c03eba43f2f1a6c414d1b6b049811cf Mon Sep 17 00:00:00 2001 From: Eric Liang Date: Fri, 25 Sep 2015 00:43:22 -0700 Subject: [SPARK-9681] [ML] Support R feature interactions in RFormula This integrates the Interaction feature transformer with SparkR R formula support (i.e. support `:`). To generate reasonable ML attribute names for feature interactions, it was necessary to add the ability to read attribute the original attribute names back from `StructField`, and also to specify custom group prefixes in `VectorAssembler`. This also has the side-benefit of cleaning up the double-underscores in the attributes generated for non-interaction terms. mengxr Author: Eric Liang Closes #8830 from ericl/interaction-2. --- python/pyspark/ml/feature.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'python/pyspark/ml/feature.py') diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py index f41d72f877..a4e60f916b 100644 --- a/python/pyspark/ml/feature.py +++ b/python/pyspark/ml/feature.py @@ -1850,7 +1850,7 @@ class RFormula(JavaEstimator, HasFeaturesCol, HasLabelCol): Implements the transforms required for fitting a dataset against an R model formula. Currently we support a limited subset of the R - operators, including '~', '+', '-', and '.'. Also see the R formula + operators, including '~', '.', ':', '+', and '-'. Also see the R formula docs: http://stat.ethz.ch/R-manual/R-patched/library/stats/html/formula.html -- cgit v1.2.3