aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSean Zhong <seanzhong@databricks.com>2016-06-14 09:10:27 -0700
committerYin Huai <yhuai@databricks.com>2016-06-14 09:10:27 -0700
commit6e8cdef0cf36f6e921d9e1a65c61b66196935820 (patch)
treea15e8367d45bcac6308703b35453f7cafece9a7e
parent53bb03084796231f724ff8369490df520e1ee33c (diff)
downloadspark-6e8cdef0cf36f6e921d9e1a65c61b66196935820.tar.gz
spark-6e8cdef0cf36f6e921d9e1a65c61b66196935820.tar.bz2
spark-6e8cdef0cf36f6e921d9e1a65c61b66196935820.zip
[SPARK-15914][SQL] Add deprecated method back to SQLContext for backward source code compatibility
## What changes were proposed in this pull request? Revert partial changes in SPARK-12600, and add some deprecated method back to SQLContext for backward source code compatibility. ## How was this patch tested? Manual test. Author: Sean Zhong <seanzhong@databricks.com> Closes #13637 from clockfly/SPARK-15914.
-rw-r--r--project/MimaExcludes.scala9
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala284
2 files changed, 293 insertions, 0 deletions
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index 9d0d9b1be0..a6209d78e1 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -778,6 +778,15 @@ object MimaExcludes {
) ++ Seq(
ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.mllib.linalg.Vector.asBreeze"),
ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.mllib.linalg.Matrix.asBreeze")
+ ) ++ Seq(
+ // [SPARK-15914] Binary compatibility is broken since consolidation of Dataset and DataFrame
+ // in Spark 2.0. However, source level compatibility is still maintained.
+ ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.SQLContext.load"),
+ ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.SQLContext.jsonRDD"),
+ ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.SQLContext.jsonFile"),
+ ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.SQLContext.jdbc"),
+ ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.SQLContext.parquetFile"),
+ ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.SQLContext.applySchema")
)
case v if v.startsWith("1.6") =>
Seq(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 6fcc9bb447..58b4e6c5f6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -736,6 +736,290 @@ class SQLContext private[sql](val sparkSession: SparkSession)
private[sql] def parseDataType(dataTypeString: String): DataType = {
DataType.fromJson(dataTypeString)
}
+
+ ////////////////////////////////////////////////////////////////////////////
+ ////////////////////////////////////////////////////////////////////////////
+ // Deprecated methods
+ ////////////////////////////////////////////////////////////////////////////
+ ////////////////////////////////////////////////////////////////////////////
+
+ /**
+ * @deprecated As of 1.3.0, replaced by `createDataFrame()`.
+ */
+ @deprecated("Use createDataFrame instead.", "1.3.0")
+ def applySchema(rowRDD: RDD[Row], schema: StructType): DataFrame = {
+ createDataFrame(rowRDD, schema)
+ }
+
+ /**
+ * @deprecated As of 1.3.0, replaced by `createDataFrame()`.
+ */
+ @deprecated("Use createDataFrame instead.", "1.3.0")
+ def applySchema(rowRDD: JavaRDD[Row], schema: StructType): DataFrame = {
+ createDataFrame(rowRDD, schema)
+ }
+
+ /**
+ * @deprecated As of 1.3.0, replaced by `createDataFrame()`.
+ */
+ @deprecated("Use createDataFrame instead.", "1.3.0")
+ def applySchema(rdd: RDD[_], beanClass: Class[_]): DataFrame = {
+ createDataFrame(rdd, beanClass)
+ }
+
+ /**
+ * @deprecated As of 1.3.0, replaced by `createDataFrame()`.
+ */
+ @deprecated("Use createDataFrame instead.", "1.3.0")
+ def applySchema(rdd: JavaRDD[_], beanClass: Class[_]): DataFrame = {
+ createDataFrame(rdd, beanClass)
+ }
+
+ /**
+ * Loads a Parquet file, returning the result as a [[DataFrame]]. This function returns an empty
+ * [[DataFrame]] if no paths are passed in.
+ *
+ * @group specificdata
+ * @deprecated As of 1.4.0, replaced by `read().parquet()`.
+ */
+ @deprecated("Use read.parquet() instead.", "1.4.0")
+ @scala.annotation.varargs
+ def parquetFile(paths: String*): DataFrame = {
+ if (paths.isEmpty) {
+ emptyDataFrame
+ } else {
+ read.parquet(paths : _*)
+ }
+ }
+
+ /**
+ * Loads a JSON file (one object per line), returning the result as a [[DataFrame]].
+ * It goes through the entire dataset once to determine the schema.
+ *
+ * @group specificdata
+ * @deprecated As of 1.4.0, replaced by `read().json()`.
+ */
+ @deprecated("Use read.json() instead.", "1.4.0")
+ def jsonFile(path: String): DataFrame = {
+ read.json(path)
+ }
+
+ /**
+ * Loads a JSON file (one object per line) and applies the given schema,
+ * returning the result as a [[DataFrame]].
+ *
+ * @group specificdata
+ * @deprecated As of 1.4.0, replaced by `read().json()`.
+ */
+ @deprecated("Use read.json() instead.", "1.4.0")
+ def jsonFile(path: String, schema: StructType): DataFrame = {
+ read.schema(schema).json(path)
+ }
+
+ /**
+ * @group specificdata
+ * @deprecated As of 1.4.0, replaced by `read().json()`.
+ */
+ @deprecated("Use read.json() instead.", "1.4.0")
+ def jsonFile(path: String, samplingRatio: Double): DataFrame = {
+ read.option("samplingRatio", samplingRatio.toString).json(path)
+ }
+
+ /**
+ * Loads an RDD[String] storing JSON objects (one object per record), returning the result as a
+ * [[DataFrame]].
+ * It goes through the entire dataset once to determine the schema.
+ *
+ * @group specificdata
+ * @deprecated As of 1.4.0, replaced by `read().json()`.
+ */
+ @deprecated("Use read.json() instead.", "1.4.0")
+ def jsonRDD(json: RDD[String]): DataFrame = read.json(json)
+
+ /**
+ * Loads an RDD[String] storing JSON objects (one object per record), returning the result as a
+ * [[DataFrame]].
+ * It goes through the entire dataset once to determine the schema.
+ *
+ * @group specificdata
+ * @deprecated As of 1.4.0, replaced by `read().json()`.
+ */
+ @deprecated("Use read.json() instead.", "1.4.0")
+ def jsonRDD(json: JavaRDD[String]): DataFrame = read.json(json)
+
+ /**
+ * Loads an RDD[String] storing JSON objects (one object per record) and applies the given schema,
+ * returning the result as a [[DataFrame]].
+ *
+ * @group specificdata
+ * @deprecated As of 1.4.0, replaced by `read().json()`.
+ */
+ @deprecated("Use read.json() instead.", "1.4.0")
+ def jsonRDD(json: RDD[String], schema: StructType): DataFrame = {
+ read.schema(schema).json(json)
+ }
+
+ /**
+ * Loads an JavaRDD<String> storing JSON objects (one object per record) and applies the given
+ * schema, returning the result as a [[DataFrame]].
+ *
+ * @group specificdata
+ * @deprecated As of 1.4.0, replaced by `read().json()`.
+ */
+ @deprecated("Use read.json() instead.", "1.4.0")
+ def jsonRDD(json: JavaRDD[String], schema: StructType): DataFrame = {
+ read.schema(schema).json(json)
+ }
+
+ /**
+ * Loads an RDD[String] storing JSON objects (one object per record) inferring the
+ * schema, returning the result as a [[DataFrame]].
+ *
+ * @group specificdata
+ * @deprecated As of 1.4.0, replaced by `read().json()`.
+ */
+ @deprecated("Use read.json() instead.", "1.4.0")
+ def jsonRDD(json: RDD[String], samplingRatio: Double): DataFrame = {
+ read.option("samplingRatio", samplingRatio.toString).json(json)
+ }
+
+ /**
+ * Loads a JavaRDD[String] storing JSON objects (one object per record) inferring the
+ * schema, returning the result as a [[DataFrame]].
+ *
+ * @group specificdata
+ * @deprecated As of 1.4.0, replaced by `read().json()`.
+ */
+ @deprecated("Use read.json() instead.", "1.4.0")
+ def jsonRDD(json: JavaRDD[String], samplingRatio: Double): DataFrame = {
+ read.option("samplingRatio", samplingRatio.toString).json(json)
+ }
+
+ /**
+ * Returns the dataset stored at path as a DataFrame,
+ * using the default data source configured by spark.sql.sources.default.
+ *
+ * @group genericdata
+ * @deprecated As of 1.4.0, replaced by `read().load(path)`.
+ */
+ @deprecated("Use read.load(path) instead.", "1.4.0")
+ def load(path: String): DataFrame = {
+ read.load(path)
+ }
+
+ /**
+ * Returns the dataset stored at path as a DataFrame, using the given data source.
+ *
+ * @group genericdata
+ * @deprecated As of 1.4.0, replaced by `read().format(source).load(path)`.
+ */
+ @deprecated("Use read.format(source).load(path) instead.", "1.4.0")
+ def load(path: String, source: String): DataFrame = {
+ read.format(source).load(path)
+ }
+
+ /**
+ * (Java-specific) Returns the dataset specified by the given data source and
+ * a set of options as a DataFrame.
+ *
+ * @group genericdata
+ * @deprecated As of 1.4.0, replaced by `read().format(source).options(options).load()`.
+ */
+ @deprecated("Use read.format(source).options(options).load() instead.", "1.4.0")
+ def load(source: String, options: java.util.Map[String, String]): DataFrame = {
+ read.options(options).format(source).load()
+ }
+
+ /**
+ * (Scala-specific) Returns the dataset specified by the given data source and
+ * a set of options as a DataFrame.
+ *
+ * @group genericdata
+ * @deprecated As of 1.4.0, replaced by `read().format(source).options(options).load()`.
+ */
+ @deprecated("Use read.format(source).options(options).load() instead.", "1.4.0")
+ def load(source: String, options: Map[String, String]): DataFrame = {
+ read.options(options).format(source).load()
+ }
+
+ /**
+ * (Java-specific) Returns the dataset specified by the given data source and
+ * a set of options as a DataFrame, using the given schema as the schema of the DataFrame.
+ *
+ * @group genericdata
+ * @deprecated As of 1.4.0, replaced by
+ * `read().format(source).schema(schema).options(options).load()`.
+ */
+ @deprecated("Use read.format(source).schema(schema).options(options).load() instead.", "1.4.0")
+ def load(
+ source: String,
+ schema: StructType,
+ options: java.util.Map[String, String]): DataFrame = {
+ read.format(source).schema(schema).options(options).load()
+ }
+
+ /**
+ * (Scala-specific) Returns the dataset specified by the given data source and
+ * a set of options as a DataFrame, using the given schema as the schema of the DataFrame.
+ *
+ * @group genericdata
+ * @deprecated As of 1.4.0, replaced by
+ * `read().format(source).schema(schema).options(options).load()`.
+ */
+ @deprecated("Use read.format(source).schema(schema).options(options).load() instead.", "1.4.0")
+ def load(source: String, schema: StructType, options: Map[String, String]): DataFrame = {
+ read.format(source).schema(schema).options(options).load()
+ }
+
+ /**
+ * Construct a [[DataFrame]] representing the database table accessible via JDBC URL
+ * url named table.
+ *
+ * @group specificdata
+ * @deprecated As of 1.4.0, replaced by `read().jdbc()`.
+ */
+ @deprecated("Use read.jdbc() instead.", "1.4.0")
+ def jdbc(url: String, table: String): DataFrame = {
+ read.jdbc(url, table, new Properties)
+ }
+
+ /**
+ * Construct a [[DataFrame]] representing the database table accessible via JDBC URL
+ * url named table. Partitions of the table will be retrieved in parallel based on the parameters
+ * passed to this function.
+ *
+ * @param columnName the name of a column of integral type that will be used for partitioning.
+ * @param lowerBound the minimum value of `columnName` used to decide partition stride
+ * @param upperBound the maximum value of `columnName` used to decide partition stride
+ * @param numPartitions the number of partitions. the range `minValue`-`maxValue` will be split
+ * evenly into this many partitions
+ * @group specificdata
+ * @deprecated As of 1.4.0, replaced by `read().jdbc()`.
+ */
+ @deprecated("Use read.jdbc() instead.", "1.4.0")
+ def jdbc(
+ url: String,
+ table: String,
+ columnName: String,
+ lowerBound: Long,
+ upperBound: Long,
+ numPartitions: Int): DataFrame = {
+ read.jdbc(url, table, columnName, lowerBound, upperBound, numPartitions, new Properties)
+ }
+
+ /**
+ * Construct a [[DataFrame]] representing the database table accessible via JDBC URL
+ * url named table. The theParts parameter gives a list expressions
+ * suitable for inclusion in WHERE clauses; each one defines one partition
+ * of the [[DataFrame]].
+ *
+ * @group specificdata
+ * @deprecated As of 1.4.0, replaced by `read().jdbc()`.
+ */
+ @deprecated("Use read.jdbc() instead.", "1.4.0")
+ def jdbc(url: String, table: String, theParts: Array[String]): DataFrame = {
+ read.jdbc(url, table, theParts, new Properties)
+ }
}
/**