aboutsummaryrefslogtreecommitdiff
path: root/examples/src
diff options
context:
space:
mode:
Diffstat (limited to 'examples/src')
-rw-r--r--examples/src/main/java/org/apache/spark/examples/sql/hive/JavaSparkHiveExample.java3
-rw-r--r--examples/src/main/python/mllib/binary_classification_metrics_example.py15
-rw-r--r--examples/src/main/python/mllib/bisecting_k_means_example.py5
-rw-r--r--examples/src/main/python/mllib/elementwise_product_example.py2
-rw-r--r--examples/src/main/python/sql/hive.py4
-rw-r--r--examples/src/main/python/status_api_demo.py6
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/sql/hive/SparkHiveExample.scala4
7 files changed, 18 insertions, 21 deletions
diff --git a/examples/src/main/java/org/apache/spark/examples/sql/hive/JavaSparkHiveExample.java b/examples/src/main/java/org/apache/spark/examples/sql/hive/JavaSparkHiveExample.java
index 8d06d38cf2..2fe1307d8e 100644
--- a/examples/src/main/java/org/apache/spark/examples/sql/hive/JavaSparkHiveExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/sql/hive/JavaSparkHiveExample.java
@@ -17,6 +17,7 @@
package org.apache.spark.examples.sql.hive;
// $example on:spark_hive$
+import java.io.File;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
@@ -56,7 +57,7 @@ public class JavaSparkHiveExample {
public static void main(String[] args) {
// $example on:spark_hive$
// warehouseLocation points to the default location for managed databases and tables
- String warehouseLocation = "spark-warehouse";
+ String warehouseLocation = new File("spark-warehouse").getAbsolutePath();
SparkSession spark = SparkSession
.builder()
.appName("Java Spark Hive Example")
diff --git a/examples/src/main/python/mllib/binary_classification_metrics_example.py b/examples/src/main/python/mllib/binary_classification_metrics_example.py
index 91f8378f29..d14ce7982e 100644
--- a/examples/src/main/python/mllib/binary_classification_metrics_example.py
+++ b/examples/src/main/python/mllib/binary_classification_metrics_example.py
@@ -18,25 +18,20 @@
Binary Classification Metrics Example.
"""
from __future__ import print_function
-from pyspark.sql import SparkSession
+from pyspark import SparkContext
# $example on$
from pyspark.mllib.classification import LogisticRegressionWithLBFGS
from pyspark.mllib.evaluation import BinaryClassificationMetrics
-from pyspark.mllib.regression import LabeledPoint
+from pyspark.mllib.util import MLUtils
# $example off$
if __name__ == "__main__":
- spark = SparkSession\
- .builder\
- .appName("BinaryClassificationMetricsExample")\
- .getOrCreate()
+ sc = SparkContext(appName="BinaryClassificationMetricsExample")
# $example on$
# Several of the methods available in scala are currently missing from pyspark
# Load training data in LIBSVM format
- data = spark\
- .read.format("libsvm").load("data/mllib/sample_binary_classification_data.txt")\
- .rdd.map(lambda row: LabeledPoint(row[0], row[1]))
+ data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_binary_classification_data.txt")
# Split data into training (60%) and test (40%)
training, test = data.randomSplit([0.6, 0.4], seed=11)
@@ -58,4 +53,4 @@ if __name__ == "__main__":
print("Area under ROC = %s" % metrics.areaUnderROC)
# $example off$
- spark.stop()
+ sc.stop()
diff --git a/examples/src/main/python/mllib/bisecting_k_means_example.py b/examples/src/main/python/mllib/bisecting_k_means_example.py
index 7f4d0402d6..31f3e72d7f 100644
--- a/examples/src/main/python/mllib/bisecting_k_means_example.py
+++ b/examples/src/main/python/mllib/bisecting_k_means_example.py
@@ -40,11 +40,6 @@ if __name__ == "__main__":
# Evaluate clustering
cost = model.computeCost(parsedData)
print("Bisecting K-means Cost = " + str(cost))
-
- # Save and load model
- path = "target/org/apache/spark/PythonBisectingKMeansExample/BisectingKMeansModel"
- model.save(sc, path)
- sameModel = BisectingKMeansModel.load(sc, path)
# $example off$
sc.stop()
diff --git a/examples/src/main/python/mllib/elementwise_product_example.py b/examples/src/main/python/mllib/elementwise_product_example.py
index 6d8bf6d42e..8ae9afb1dc 100644
--- a/examples/src/main/python/mllib/elementwise_product_example.py
+++ b/examples/src/main/python/mllib/elementwise_product_example.py
@@ -45,7 +45,7 @@ if __name__ == "__main__":
print(each)
print("transformedData2:")
- for each in transformedData2.collect():
+ for each in transformedData2:
print(each)
sc.stop()
diff --git a/examples/src/main/python/sql/hive.py b/examples/src/main/python/sql/hive.py
index ba01544a5b..1f175d7258 100644
--- a/examples/src/main/python/sql/hive.py
+++ b/examples/src/main/python/sql/hive.py
@@ -18,7 +18,7 @@
from __future__ import print_function
# $example on:spark_hive$
-from os.path import expanduser, join
+from os.path import expanduser, join, abspath
from pyspark.sql import SparkSession
from pyspark.sql import Row
@@ -34,7 +34,7 @@ Run with:
if __name__ == "__main__":
# $example on:spark_hive$
# warehouse_location points to the default location for managed databases and tables
- warehouse_location = 'spark-warehouse'
+ warehouse_location = abspath('spark-warehouse')
spark = SparkSession \
.builder \
diff --git a/examples/src/main/python/status_api_demo.py b/examples/src/main/python/status_api_demo.py
index 49b7902185..8cc8cc820c 100644
--- a/examples/src/main/python/status_api_demo.py
+++ b/examples/src/main/python/status_api_demo.py
@@ -19,7 +19,11 @@ from __future__ import print_function
import time
import threading
-import Queue
+import sys
+if sys.version >= '3':
+ import queue as Queue
+else:
+ import Queue
from pyspark import SparkConf, SparkContext
diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/hive/SparkHiveExample.scala b/examples/src/main/scala/org/apache/spark/examples/sql/hive/SparkHiveExample.scala
index d29ed958fe..3de26364b5 100644
--- a/examples/src/main/scala/org/apache/spark/examples/sql/hive/SparkHiveExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/sql/hive/SparkHiveExample.scala
@@ -17,6 +17,8 @@
package org.apache.spark.examples.sql.hive
// $example on:spark_hive$
+import java.io.File
+
import org.apache.spark.sql.Row
import org.apache.spark.sql.SparkSession
// $example off:spark_hive$
@@ -38,7 +40,7 @@ object SparkHiveExample {
// $example on:spark_hive$
// warehouseLocation points to the default location for managed databases and tables
- val warehouseLocation = "spark-warehouse"
+ val warehouseLocation = new File("spark-warehouse").getAbsolutePath
val spark = SparkSession
.builder()