aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSean Owen <sowen@cloudera.com>2015-12-05 15:52:52 +0000
committerSean Owen <sowen@cloudera.com>2015-12-05 15:52:52 +0000
commit7da674851928ed23eb651a3e2f8233e7a684ac41 (patch)
treea13741bb5dc3766ee8a76a1635927defa1c85357
parente9c9ae22b96e08e5bb40a029e84d342efb1aec0c (diff)
downloadspark-7da674851928ed23eb651a3e2f8233e7a684ac41.tar.gz
spark-7da674851928ed23eb651a3e2f8233e7a684ac41.tar.bz2
spark-7da674851928ed23eb651a3e2f8233e7a684ac41.zip
[SPARK-11988][ML][MLLIB] Update JPMML to 1.2.7
Update JPMML pmml-model to 1.2.7 Author: Sean Owen <sowen@cloudera.com> Closes #9972 from srowen/SPARK-11988.
-rw-r--r--LICENSE3
-rw-r--r--mllib/pom.xml2
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/pmml/export/BinaryClassificationPMMLModelExport.scala32
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/pmml/export/GeneralizedLinearPMMLModelExport.scala26
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/pmml/export/KMeansPMMLModelExport.scala44
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/pmml/export/PMMLModelExport.scala17
6 files changed, 59 insertions, 65 deletions
diff --git a/LICENSE b/LICENSE
index 0db2d14465..a2f75b817a 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,4 +1,3 @@
-
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
@@ -237,7 +236,7 @@ The following components are provided under a BSD-style license. See project lin
The text of each license is also included at licenses/LICENSE-[project].txt.
(BSD 3 Clause) netlib core (com.github.fommil.netlib:core:1.1.2 - https://github.com/fommil/netlib-java/core)
- (BSD 3 Clause) JPMML-Model (org.jpmml:pmml-model:1.1.15 - https://github.com/jpmml/jpmml-model)
+ (BSD 3 Clause) JPMML-Model (org.jpmml:pmml-model:1.2.7 - https://github.com/jpmml/jpmml-model)
(BSD 3-clause style license) jblas (org.jblas:jblas:1.2.4 - http://jblas.org/)
(BSD License) AntLR Parser Generator (antlr:antlr:2.7.7 - http://www.antlr.org/)
(BSD licence) ANTLR ST4 4.0.4 (org.antlr:ST4:4.0.4 - http://www.stringtemplate.org)
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 70139121d8..df50aca1a3 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -109,7 +109,7 @@
<dependency>
<groupId>org.jpmml</groupId>
<artifactId>pmml-model</artifactId>
- <version>1.1.15</version>
+ <version>1.2.7</version>
<exclusions>
<exclusion>
<groupId>com.sun.xml.fastinfoset</groupId>
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/BinaryClassificationPMMLModelExport.scala b/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/BinaryClassificationPMMLModelExport.scala
index 622b53a252..7abb1bf7ce 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/BinaryClassificationPMMLModelExport.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/BinaryClassificationPMMLModelExport.scala
@@ -45,7 +45,7 @@ private[mllib] class BinaryClassificationPMMLModelExport(
val fields = new SArray[FieldName](model.weights.size)
val dataDictionary = new DataDictionary
val miningSchema = new MiningSchema
- val regressionTableYES = new RegressionTable(model.intercept).withTargetCategory("1")
+ val regressionTableYES = new RegressionTable(model.intercept).setTargetCategory("1")
var interceptNO = threshold
if (RegressionNormalizationMethodType.LOGIT == normalizationMethod) {
if (threshold <= 0) {
@@ -56,35 +56,35 @@ private[mllib] class BinaryClassificationPMMLModelExport(
interceptNO = -math.log(1 / threshold - 1)
}
}
- val regressionTableNO = new RegressionTable(interceptNO).withTargetCategory("0")
+ val regressionTableNO = new RegressionTable(interceptNO).setTargetCategory("0")
val regressionModel = new RegressionModel()
- .withFunctionName(MiningFunctionType.CLASSIFICATION)
- .withMiningSchema(miningSchema)
- .withModelName(description)
- .withNormalizationMethod(normalizationMethod)
- .withRegressionTables(regressionTableYES, regressionTableNO)
+ .setFunctionName(MiningFunctionType.CLASSIFICATION)
+ .setMiningSchema(miningSchema)
+ .setModelName(description)
+ .setNormalizationMethod(normalizationMethod)
+ .addRegressionTables(regressionTableYES, regressionTableNO)
for (i <- 0 until model.weights.size) {
fields(i) = FieldName.create("field_" + i)
- dataDictionary.withDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE))
+ dataDictionary.addDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE))
miningSchema
- .withMiningFields(new MiningField(fields(i))
- .withUsageType(FieldUsageType.ACTIVE))
- regressionTableYES.withNumericPredictors(new NumericPredictor(fields(i), model.weights(i)))
+ .addMiningFields(new MiningField(fields(i))
+ .setUsageType(FieldUsageType.ACTIVE))
+ regressionTableYES.addNumericPredictors(new NumericPredictor(fields(i), model.weights(i)))
}
// add target field
val targetField = FieldName.create("target")
dataDictionary
- .withDataFields(new DataField(targetField, OpType.CATEGORICAL, DataType.STRING))
+ .addDataFields(new DataField(targetField, OpType.CATEGORICAL, DataType.STRING))
miningSchema
- .withMiningFields(new MiningField(targetField)
- .withUsageType(FieldUsageType.TARGET))
+ .addMiningFields(new MiningField(targetField)
+ .setUsageType(FieldUsageType.TARGET))
- dataDictionary.withNumberOfFields(dataDictionary.getDataFields.size)
+ dataDictionary.setNumberOfFields(dataDictionary.getDataFields.size)
pmml.setDataDictionary(dataDictionary)
- pmml.withModels(regressionModel)
+ pmml.addModels(regressionModel)
}
}
}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/GeneralizedLinearPMMLModelExport.scala b/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/GeneralizedLinearPMMLModelExport.scala
index 1874786af0..4d951d2973 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/GeneralizedLinearPMMLModelExport.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/GeneralizedLinearPMMLModelExport.scala
@@ -45,31 +45,31 @@ private[mllib] class GeneralizedLinearPMMLModelExport(
val miningSchema = new MiningSchema
val regressionTable = new RegressionTable(model.intercept)
val regressionModel = new RegressionModel()
- .withFunctionName(MiningFunctionType.REGRESSION)
- .withMiningSchema(miningSchema)
- .withModelName(description)
- .withRegressionTables(regressionTable)
+ .setFunctionName(MiningFunctionType.REGRESSION)
+ .setMiningSchema(miningSchema)
+ .setModelName(description)
+ .addRegressionTables(regressionTable)
for (i <- 0 until model.weights.size) {
fields(i) = FieldName.create("field_" + i)
- dataDictionary.withDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE))
+ dataDictionary.addDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE))
miningSchema
- .withMiningFields(new MiningField(fields(i))
- .withUsageType(FieldUsageType.ACTIVE))
- regressionTable.withNumericPredictors(new NumericPredictor(fields(i), model.weights(i)))
+ .addMiningFields(new MiningField(fields(i))
+ .setUsageType(FieldUsageType.ACTIVE))
+ regressionTable.addNumericPredictors(new NumericPredictor(fields(i), model.weights(i)))
}
// for completeness add target field
val targetField = FieldName.create("target")
- dataDictionary.withDataFields(new DataField(targetField, OpType.CONTINUOUS, DataType.DOUBLE))
+ dataDictionary.addDataFields(new DataField(targetField, OpType.CONTINUOUS, DataType.DOUBLE))
miningSchema
- .withMiningFields(new MiningField(targetField)
- .withUsageType(FieldUsageType.TARGET))
+ .addMiningFields(new MiningField(targetField)
+ .setUsageType(FieldUsageType.TARGET))
- dataDictionary.withNumberOfFields(dataDictionary.getDataFields.size)
+ dataDictionary.setNumberOfFields(dataDictionary.getDataFields.size)
pmml.setDataDictionary(dataDictionary)
- pmml.withModels(regressionModel)
+ pmml.addModels(regressionModel)
}
}
}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/KMeansPMMLModelExport.scala b/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/KMeansPMMLModelExport.scala
index 069e7afc9f..b5b824bb9c 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/KMeansPMMLModelExport.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/KMeansPMMLModelExport.scala
@@ -42,42 +42,42 @@ private[mllib] class KMeansPMMLModelExport(model : KMeansModel) extends PMMLMode
val dataDictionary = new DataDictionary
val miningSchema = new MiningSchema
val comparisonMeasure = new ComparisonMeasure()
- .withKind(ComparisonMeasure.Kind.DISTANCE)
- .withMeasure(new SquaredEuclidean())
+ .setKind(ComparisonMeasure.Kind.DISTANCE)
+ .setMeasure(new SquaredEuclidean())
val clusteringModel = new ClusteringModel()
- .withModelName("k-means")
- .withMiningSchema(miningSchema)
- .withComparisonMeasure(comparisonMeasure)
- .withFunctionName(MiningFunctionType.CLUSTERING)
- .withModelClass(ClusteringModel.ModelClass.CENTER_BASED)
- .withNumberOfClusters(model.clusterCenters.length)
+ .setModelName("k-means")
+ .setMiningSchema(miningSchema)
+ .setComparisonMeasure(comparisonMeasure)
+ .setFunctionName(MiningFunctionType.CLUSTERING)
+ .setModelClass(ClusteringModel.ModelClass.CENTER_BASED)
+ .setNumberOfClusters(model.clusterCenters.length)
for (i <- 0 until clusterCenter.size) {
fields(i) = FieldName.create("field_" + i)
- dataDictionary.withDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE))
+ dataDictionary.addDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE))
miningSchema
- .withMiningFields(new MiningField(fields(i))
- .withUsageType(FieldUsageType.ACTIVE))
- clusteringModel.withClusteringFields(
- new ClusteringField(fields(i)).withCompareFunction(CompareFunctionType.ABS_DIFF))
+ .addMiningFields(new MiningField(fields(i))
+ .setUsageType(FieldUsageType.ACTIVE))
+ clusteringModel.addClusteringFields(
+ new ClusteringField(fields(i)).setCompareFunction(CompareFunctionType.ABS_DIFF))
}
- dataDictionary.withNumberOfFields(dataDictionary.getDataFields.size)
+ dataDictionary.setNumberOfFields(dataDictionary.getDataFields.size)
- for (i <- 0 until model.clusterCenters.length) {
+ for (i <- model.clusterCenters.indices) {
val cluster = new Cluster()
- .withName("cluster_" + i)
- .withArray(new org.dmg.pmml.Array()
- .withType(Array.Type.REAL)
- .withN(clusterCenter.size)
- .withValue(model.clusterCenters(i).toArray.mkString(" ")))
+ .setName("cluster_" + i)
+ .setArray(new org.dmg.pmml.Array()
+ .setType(Array.Type.REAL)
+ .setN(clusterCenter.size)
+ .setValue(model.clusterCenters(i).toArray.mkString(" ")))
// we don't have the size of the single cluster but only the centroids (withValue)
// .withSize(value)
- clusteringModel.withClusters(cluster)
+ clusteringModel.addClusters(cluster)
}
pmml.setDataDictionary(dataDictionary)
- pmml.withModels(clusteringModel)
+ pmml.addModels(clusteringModel)
}
}
}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/PMMLModelExport.scala b/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/PMMLModelExport.scala
index 9267e6dbdb..426bb818c9 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/PMMLModelExport.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/PMMLModelExport.scala
@@ -30,19 +30,14 @@ private[mllib] trait PMMLModelExport {
* Holder of the exported model in PMML format
*/
@BeanProperty
- val pmml: PMML = new PMML
-
- pmml.setVersion("4.2")
- setHeader(pmml)
-
- private def setHeader(pmml: PMML): Unit = {
+ val pmml: PMML = {
val version = getClass.getPackage.getImplementationVersion
- val app = new Application().withName("Apache Spark MLlib").withVersion(version)
+ val app = new Application("Apache Spark MLlib").setVersion(version)
val timestamp = new Timestamp()
- .withContent(new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss").format(new Date()))
+ .addContent(new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss").format(new Date()))
val header = new Header()
- .withApplication(app)
- .withTimestamp(timestamp)
- pmml.setHeader(header)
+ .setApplication(app)
+ .setTimestamp(timestamp)
+ new PMML("4.2", header, null)
}
}