aboutsummaryrefslogtreecommitdiff
path: root/examples
diff options
context:
space:
mode:
authorCheng Lian <lian@databricks.com>2015-02-17 23:36:20 -0800
committerReynold Xin <rxin@databricks.com>2015-02-17 23:36:20 -0800
commit61ab08549cb6fceb6de1b5c490c55a89d4bd28fa (patch)
tree172480f72bb84b685b39acd1e744c84a5dc42d40 /examples
parent3912d332464dcd124c60b734724c34d9742466a4 (diff)
downloadspark-61ab08549cb6fceb6de1b5c490c55a89d4bd28fa.tar.gz
spark-61ab08549cb6fceb6de1b5c490c55a89d4bd28fa.tar.bz2
spark-61ab08549cb6fceb6de1b5c490c55a89d4bd28fa.zip
[Minor] [SQL] Cleans up DataFrame variable names and toDF() calls
Although we've migrated to the DataFrame API, lots of code still uses `rdd` or `srdd` as local variable names. This PR tries to address these naming inconsistencies and some other minor DataFrame related style issues. <!-- Reviewable:start --> [<img src="https://reviewable.io/review_button.png" height=40 alt="Review on Reviewable"/>](https://reviewable.io/reviews/apache/spark/4670) <!-- Reviewable:end --> Author: Cheng Lian <lian@databricks.com> Closes #4670 from liancheng/df-cleanup and squashes the following commits: 3e14448 [Cheng Lian] Cleans up DataFrame variable names and toDF() calls
Diffstat (limited to 'examples')
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/ml/CrossValidatorExample.scala2
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala4
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/ml/MovieLensALS.scala6
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/ml/SimpleParamsExample.scala6
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/ml/SimpleTextClassificationPipeline.scala4
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/mllib/DatasetExample.scala2
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala2
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/sql/hive/HiveFromSpark.scala2
8 files changed, 14 insertions, 14 deletions
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/CrossValidatorExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/CrossValidatorExample.scala
index f0241943ef..7ab892cd75 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/CrossValidatorExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/CrossValidatorExample.scala
@@ -90,7 +90,7 @@ object CrossValidatorExample {
crossval.setNumFolds(2) // Use 3+ in practice
// Run cross-validation, and choose the best set of parameters.
- val cvModel = crossval.fit(training.toDF)
+ val cvModel = crossval.fit(training.toDF())
// Prepare test documents, which are unlabeled.
val test = sc.parallelize(Seq(
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala
index 54aadd2288..df26798e41 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala
@@ -58,7 +58,7 @@ object DeveloperApiExample {
lr.setMaxIter(10)
// Learn a LogisticRegression model. This uses the parameters stored in lr.
- val model = lr.fit(training.toDF)
+ val model = lr.fit(training.toDF())
// Prepare test data.
val test = sc.parallelize(Seq(
@@ -67,7 +67,7 @@ object DeveloperApiExample {
LabeledPoint(1.0, Vectors.dense(0.0, 2.2, -1.5))))
// Make predictions on test data.
- val sumPredictions: Double = model.transform(test.toDF)
+ val sumPredictions: Double = model.transform(test.toDF())
.select("features", "label", "prediction")
.collect()
.map { case Row(features: Vector, label: Double, prediction: Double) =>
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/MovieLensALS.scala b/examples/src/main/scala/org/apache/spark/examples/ml/MovieLensALS.scala
index adaf796dc1..96b2dd463e 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/MovieLensALS.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/MovieLensALS.scala
@@ -137,9 +137,9 @@ object MovieLensALS {
.setRegParam(params.regParam)
.setNumBlocks(params.numBlocks)
- val model = als.fit(training.toDF)
+ val model = als.fit(training.toDF())
- val predictions = model.transform(test.toDF).cache()
+ val predictions = model.transform(test.toDF()).cache()
// Evaluate the model.
// TODO: Create an evaluator to compute RMSE.
@@ -158,7 +158,7 @@ object MovieLensALS {
// Inspect false positives.
predictions.registerTempTable("prediction")
- sc.textFile(params.movies).map(Movie.parseMovie).toDF.registerTempTable("movie")
+ sc.textFile(params.movies).map(Movie.parseMovie).toDF().registerTempTable("movie")
sqlContext.sql(
"""
|SELECT userId, prediction.movieId, title, rating, prediction
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/SimpleParamsExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/SimpleParamsExample.scala
index c5bb5515b1..e8af5c1625 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/SimpleParamsExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/SimpleParamsExample.scala
@@ -58,7 +58,7 @@ object SimpleParamsExample {
.setRegParam(0.01)
// Learn a LogisticRegression model. This uses the parameters stored in lr.
- val model1 = lr.fit(training.toDF)
+ val model1 = lr.fit(training.toDF())
// Since model1 is a Model (i.e., a Transformer produced by an Estimator),
// we can view the parameters it used during fit().
// This prints the parameter (name: value) pairs, where names are unique IDs for this
@@ -77,7 +77,7 @@ object SimpleParamsExample {
// Now learn a new model using the paramMapCombined parameters.
// paramMapCombined overrides all parameters set earlier via lr.set* methods.
- val model2 = lr.fit(training.toDF, paramMapCombined)
+ val model2 = lr.fit(training.toDF(), paramMapCombined)
println("Model 2 was fit using parameters: " + model2.fittingParamMap)
// Prepare test data.
@@ -90,7 +90,7 @@ object SimpleParamsExample {
// LogisticRegression.transform will only use the 'features' column.
// Note that model2.transform() outputs a 'myProbability' column instead of the usual
// 'probability' column since we renamed the lr.probabilityCol parameter previously.
- model2.transform(test.toDF)
+ model2.transform(test.toDF())
.select("features", "label", "myProbability", "prediction")
.collect()
.foreach { case Row(features: Vector, label: Double, prob: Vector, prediction: Double) =>
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/SimpleTextClassificationPipeline.scala b/examples/src/main/scala/org/apache/spark/examples/ml/SimpleTextClassificationPipeline.scala
index 8b47f88e48..a11db6fd5c 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/SimpleTextClassificationPipeline.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/SimpleTextClassificationPipeline.scala
@@ -69,7 +69,7 @@ object SimpleTextClassificationPipeline {
.setStages(Array(tokenizer, hashingTF, lr))
// Fit the pipeline to training documents.
- val model = pipeline.fit(training.toDF)
+ val model = pipeline.fit(training.toDF())
// Prepare test documents, which are unlabeled.
val test = sc.parallelize(Seq(
@@ -79,7 +79,7 @@ object SimpleTextClassificationPipeline {
Document(7L, "apache hadoop")))
// Make predictions on test documents.
- model.transform(test.toDF)
+ model.transform(test.toDF())
.select("id", "text", "probability", "prediction")
.collect()
.foreach { case Row(id: Long, text: String, prob: Vector, prediction: Double) =>
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/DatasetExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/DatasetExample.scala
index c98c68a02f..e943d6c889 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/DatasetExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/DatasetExample.scala
@@ -81,7 +81,7 @@ object DatasetExample {
println(s"Loaded ${origData.count()} instances from file: ${params.input}")
// Convert input data to DataFrame explicitly.
- val df: DataFrame = origData.toDF
+ val df: DataFrame = origData.toDF()
println(s"Inferred schema:\n${df.schema.prettyJson}")
println(s"Converted to DataFrame with ${df.count()} records")
diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala b/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala
index 79d3d5a24c..6331d1c006 100644
--- a/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala
@@ -34,7 +34,7 @@ object RDDRelation {
// Importing the SQL context gives access to all the SQL functions and implicit conversions.
import sqlContext.implicits._
- val df = sc.parallelize((1 to 100).map(i => Record(i, s"val_$i"))).toDF
+ val df = sc.parallelize((1 to 100).map(i => Record(i, s"val_$i"))).toDF()
// Any RDD containing case classes can be registered as a table. The schema of the table is
// automatically inferred using scala reflection.
df.registerTempTable("records")
diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/hive/HiveFromSpark.scala b/examples/src/main/scala/org/apache/spark/examples/sql/hive/HiveFromSpark.scala
index 7128deba54..b7ba60ec28 100644
--- a/examples/src/main/scala/org/apache/spark/examples/sql/hive/HiveFromSpark.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/sql/hive/HiveFromSpark.scala
@@ -68,7 +68,7 @@ object HiveFromSpark {
// You can also register RDDs as temporary tables within a HiveContext.
val rdd = sc.parallelize((1 to 100).map(i => Record(i, s"val_$i")))
- rdd.toDF.registerTempTable("records")
+ rdd.toDF().registerTempTable("records")
// Queries can then join RDD data with data stored in Hive.
println("Result of SELECT *:")