aboutsummaryrefslogtreecommitdiff
path: root/sql/core/src
diff options
context:
space:
mode:
authorReynold Xin <rxin@databricks.com>2016-01-04 18:02:38 -0800
committerMichael Armbrust <michael@databricks.com>2016-01-04 18:02:38 -0800
commit77ab49b8575d2ebd678065fa70b0343d532ab9c2 (patch)
treef9c4a990499d1856494f787f8bfc095d68a69735 /sql/core/src
parentfdfac22d08fc4fdc640843dd93a29e2ce4aee2ef (diff)
downloadspark-77ab49b8575d2ebd678065fa70b0343d532ab9c2.tar.gz
spark-77ab49b8575d2ebd678065fa70b0343d532ab9c2.tar.bz2
spark-77ab49b8575d2ebd678065fa70b0343d532ab9c2.zip
[SPARK-12600][SQL] Remove deprecated methods in Spark SQL
Author: Reynold Xin <rxin@databricks.com> Closes #10559 from rxin/remove-deprecated-sql.
Diffstat (limited to 'sql/core/src')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/Column.scala12
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala338
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala21
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala302
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRelation.scala5
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/functions.scala252
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/package.scala6
-rw-r--r--sql/core/src/test/java/test/org/apache/spark/sql/JavaApplySchemaSuite.java4
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala8
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala9
10 files changed, 19 insertions, 938 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index 5026c0d6d1..71fa970907 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -713,18 +713,6 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* by the evaluated values of the arguments.
*
* @group expr_ops
- * @since 1.3.0
- * @deprecated As of 1.5.0. Use isin. This will be removed in Spark 2.0.
- */
- @deprecated("use isin. This will be removed in Spark 2.0.", "1.5.0")
- @scala.annotation.varargs
- def in(list: Any*): Column = isin(list : _*)
-
- /**
- * A boolean expression that is evaluated to true if the value of this expression is contained
- * by the evaluated values of the arguments.
- *
- * @group expr_ops
* @since 1.5.0
*/
@scala.annotation.varargs
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
index 0763aa4ed9..c42192c83d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
@@ -1750,344 +1750,6 @@ class DataFrame private[sql](
}
}
- ////////////////////////////////////////////////////////////////////////////
- ////////////////////////////////////////////////////////////////////////////
- // Deprecated methods
- ////////////////////////////////////////////////////////////////////////////
- ////////////////////////////////////////////////////////////////////////////
-
- /**
- * @deprecated As of 1.3.0, replaced by `toDF()`. This will be removed in Spark 2.0.
- */
- @deprecated("Use toDF. This will be removed in Spark 2.0.", "1.3.0")
- def toSchemaRDD: DataFrame = this
-
- /**
- * Save this [[DataFrame]] to a JDBC database at `url` under the table name `table`.
- * This will run a `CREATE TABLE` and a bunch of `INSERT INTO` statements.
- * If you pass `true` for `allowExisting`, it will drop any table with the
- * given name; if you pass `false`, it will throw if the table already
- * exists.
- * @group output
- * @deprecated As of 1.340, replaced by `write().jdbc()`. This will be removed in Spark 2.0.
- */
- @deprecated("Use write.jdbc(). This will be removed in Spark 2.0.", "1.4.0")
- def createJDBCTable(url: String, table: String, allowExisting: Boolean): Unit = {
- val w = if (allowExisting) write.mode(SaveMode.Overwrite) else write
- w.jdbc(url, table, new Properties)
- }
-
- /**
- * Save this [[DataFrame]] to a JDBC database at `url` under the table name `table`.
- * Assumes the table already exists and has a compatible schema. If you
- * pass `true` for `overwrite`, it will `TRUNCATE` the table before
- * performing the `INSERT`s.
- *
- * The table must already exist on the database. It must have a schema
- * that is compatible with the schema of this RDD; inserting the rows of
- * the RDD in order via the simple statement
- * `INSERT INTO table VALUES (?, ?, ..., ?)` should not fail.
- * @group output
- * @deprecated As of 1.4.0, replaced by `write().jdbc()`. This will be removed in Spark 2.0.
- */
- @deprecated("Use write.jdbc(). This will be removed in Spark 2.0.", "1.4.0")
- def insertIntoJDBC(url: String, table: String, overwrite: Boolean): Unit = {
- val w = if (overwrite) write.mode(SaveMode.Overwrite) else write.mode(SaveMode.Append)
- w.jdbc(url, table, new Properties)
- }
-
- /**
- * Saves the contents of this [[DataFrame]] as a parquet file, preserving the schema.
- * Files that are written out using this method can be read back in as a [[DataFrame]]
- * using the `parquetFile` function in [[SQLContext]].
- * @group output
- * @deprecated As of 1.4.0, replaced by `write().parquet()`. This will be removed in Spark 2.0.
- */
- @deprecated("Use write.parquet(path). This will be removed in Spark 2.0.", "1.4.0")
- def saveAsParquetFile(path: String): Unit = {
- write.format("parquet").mode(SaveMode.ErrorIfExists).save(path)
- }
-
- /**
- * Creates a table from the the contents of this DataFrame.
- * It will use the default data source configured by spark.sql.sources.default.
- * This will fail if the table already exists.
- *
- * Note that this currently only works with DataFrames that are created from a HiveContext as
- * there is no notion of a persisted catalog in a standard SQL context. Instead you can write
- * an RDD out to a parquet file, and then register that file as a table. This "table" can then
- * be the target of an `insertInto`.
- *
- * When the DataFrame is created from a non-partitioned [[HadoopFsRelation]] with a single input
- * path, and the data source provider can be mapped to an existing Hive builtin SerDe (i.e. ORC
- * and Parquet), the table is persisted in a Hive compatible format, which means other systems
- * like Hive will be able to read this table. Otherwise, the table is persisted in a Spark SQL
- * specific format.
- *
- * @group output
- * @deprecated As of 1.4.0, replaced by `write().saveAsTable(tableName)`.
- * This will be removed in Spark 2.0.
- */
- @deprecated("Use write.saveAsTable(tableName). This will be removed in Spark 2.0.", "1.4.0")
- def saveAsTable(tableName: String): Unit = {
- write.mode(SaveMode.ErrorIfExists).saveAsTable(tableName)
- }
-
- /**
- * Creates a table from the the contents of this DataFrame, using the default data source
- * configured by spark.sql.sources.default and [[SaveMode.ErrorIfExists]] as the save mode.
- *
- * Note that this currently only works with DataFrames that are created from a HiveContext as
- * there is no notion of a persisted catalog in a standard SQL context. Instead you can write
- * an RDD out to a parquet file, and then register that file as a table. This "table" can then
- * be the target of an `insertInto`.
- *
- * When the DataFrame is created from a non-partitioned [[HadoopFsRelation]] with a single input
- * path, and the data source provider can be mapped to an existing Hive builtin SerDe (i.e. ORC
- * and Parquet), the table is persisted in a Hive compatible format, which means other systems
- * like Hive will be able to read this table. Otherwise, the table is persisted in a Spark SQL
- * specific format.
- *
- * @group output
- * @deprecated As of 1.4.0, replaced by `write().mode(mode).saveAsTable(tableName)`.
- * This will be removed in Spark 2.0.
- */
- @deprecated("Use write.mode(mode).saveAsTable(tableName). This will be removed in Spark 2.0.",
- "1.4.0")
- def saveAsTable(tableName: String, mode: SaveMode): Unit = {
- write.mode(mode).saveAsTable(tableName)
- }
-
- /**
- * Creates a table at the given path from the the contents of this DataFrame
- * based on a given data source and a set of options,
- * using [[SaveMode.ErrorIfExists]] as the save mode.
- *
- * Note that this currently only works with DataFrames that are created from a HiveContext as
- * there is no notion of a persisted catalog in a standard SQL context. Instead you can write
- * an RDD out to a parquet file, and then register that file as a table. This "table" can then
- * be the target of an `insertInto`.
- *
- * When the DataFrame is created from a non-partitioned [[HadoopFsRelation]] with a single input
- * path, and the data source provider can be mapped to an existing Hive builtin SerDe (i.e. ORC
- * and Parquet), the table is persisted in a Hive compatible format, which means other systems
- * like Hive will be able to read this table. Otherwise, the table is persisted in a Spark SQL
- * specific format.
- *
- * @group output
- * @deprecated As of 1.4.0, replaced by `write().format(source).saveAsTable(tableName)`.
- * This will be removed in Spark 2.0.
- */
- @deprecated("Use write.format(source).saveAsTable(tableName). This will be removed in Spark 2.0.",
- "1.4.0")
- def saveAsTable(tableName: String, source: String): Unit = {
- write.format(source).saveAsTable(tableName)
- }
-
- /**
- * :: Experimental ::
- * Creates a table at the given path from the the contents of this DataFrame
- * based on a given data source, [[SaveMode]] specified by mode, and a set of options.
- *
- * Note that this currently only works with DataFrames that are created from a HiveContext as
- * there is no notion of a persisted catalog in a standard SQL context. Instead you can write
- * an RDD out to a parquet file, and then register that file as a table. This "table" can then
- * be the target of an `insertInto`.
- *
- * When the DataFrame is created from a non-partitioned [[HadoopFsRelation]] with a single input
- * path, and the data source provider can be mapped to an existing Hive builtin SerDe (i.e. ORC
- * and Parquet), the table is persisted in a Hive compatible format, which means other systems
- * like Hive will be able to read this table. Otherwise, the table is persisted in a Spark SQL
- * specific format.
- *
- * @group output
- * @deprecated As of 1.4.0, replaced by `write().mode(mode).saveAsTable(tableName)`.
- * This will be removed in Spark 2.0.
- */
- @deprecated("Use write.format(source).mode(mode).saveAsTable(tableName). " +
- "This will be removed in Spark 2.0.", "1.4.0")
- def saveAsTable(tableName: String, source: String, mode: SaveMode): Unit = {
- write.format(source).mode(mode).saveAsTable(tableName)
- }
-
- /**
- * Creates a table at the given path from the the contents of this DataFrame
- * based on a given data source, [[SaveMode]] specified by mode, and a set of options.
- *
- * Note that this currently only works with DataFrames that are created from a HiveContext as
- * there is no notion of a persisted catalog in a standard SQL context. Instead you can write
- * an RDD out to a parquet file, and then register that file as a table. This "table" can then
- * be the target of an `insertInto`.
- *
- * When the DataFrame is created from a non-partitioned [[HadoopFsRelation]] with a single input
- * path, and the data source provider can be mapped to an existing Hive builtin SerDe (i.e. ORC
- * and Parquet), the table is persisted in a Hive compatible format, which means other systems
- * like Hive will be able to read this table. Otherwise, the table is persisted in a Spark SQL
- * specific format.
- *
- * @group output
- * @deprecated As of 1.4.0, replaced by
- * `write().format(source).mode(mode).options(options).saveAsTable(tableName)`.
- * This will be removed in Spark 2.0.
- */
- @deprecated("Use write.format(source).mode(mode).options(options).saveAsTable(tableName). " +
- "This will be removed in Spark 2.0.", "1.4.0")
- def saveAsTable(
- tableName: String,
- source: String,
- mode: SaveMode,
- options: java.util.Map[String, String]): Unit = {
- write.format(source).mode(mode).options(options).saveAsTable(tableName)
- }
-
- /**
- * (Scala-specific)
- * Creates a table from the the contents of this DataFrame based on a given data source,
- * [[SaveMode]] specified by mode, and a set of options.
- *
- * Note that this currently only works with DataFrames that are created from a HiveContext as
- * there is no notion of a persisted catalog in a standard SQL context. Instead you can write
- * an RDD out to a parquet file, and then register that file as a table. This "table" can then
- * be the target of an `insertInto`.
- *
- * When the DataFrame is created from a non-partitioned [[HadoopFsRelation]] with a single input
- * path, and the data source provider can be mapped to an existing Hive builtin SerDe (i.e. ORC
- * and Parquet), the table is persisted in a Hive compatible format, which means other systems
- * like Hive will be able to read this table. Otherwise, the table is persisted in a Spark SQL
- * specific format.
- *
- * @group output
- * @deprecated As of 1.4.0, replaced by
- * `write().format(source).mode(mode).options(options).saveAsTable(tableName)`.
- * This will be removed in Spark 2.0.
- */
- @deprecated("Use write.format(source).mode(mode).options(options).saveAsTable(tableName). " +
- "This will be removed in Spark 2.0.", "1.4.0")
- def saveAsTable(
- tableName: String,
- source: String,
- mode: SaveMode,
- options: Map[String, String]): Unit = {
- write.format(source).mode(mode).options(options).saveAsTable(tableName)
- }
-
- /**
- * Saves the contents of this DataFrame to the given path,
- * using the default data source configured by spark.sql.sources.default and
- * [[SaveMode.ErrorIfExists]] as the save mode.
- * @group output
- * @deprecated As of 1.4.0, replaced by `write().save(path)`. This will be removed in Spark 2.0.
- */
- @deprecated("Use write.save(path). This will be removed in Spark 2.0.", "1.4.0")
- def save(path: String): Unit = {
- write.save(path)
- }
-
- /**
- * Saves the contents of this DataFrame to the given path and [[SaveMode]] specified by mode,
- * using the default data source configured by spark.sql.sources.default.
- * @group output
- * @deprecated As of 1.4.0, replaced by `write().mode(mode).save(path)`.
- * This will be removed in Spark 2.0.
- */
- @deprecated("Use write.mode(mode).save(path). This will be removed in Spark 2.0.", "1.4.0")
- def save(path: String, mode: SaveMode): Unit = {
- write.mode(mode).save(path)
- }
-
- /**
- * Saves the contents of this DataFrame to the given path based on the given data source,
- * using [[SaveMode.ErrorIfExists]] as the save mode.
- * @group output
- * @deprecated As of 1.4.0, replaced by `write().format(source).save(path)`.
- * This will be removed in Spark 2.0.
- */
- @deprecated("Use write.format(source).save(path). This will be removed in Spark 2.0.", "1.4.0")
- def save(path: String, source: String): Unit = {
- write.format(source).save(path)
- }
-
- /**
- * Saves the contents of this DataFrame to the given path based on the given data source and
- * [[SaveMode]] specified by mode.
- * @group output
- * @deprecated As of 1.4.0, replaced by `write().format(source).mode(mode).save(path)`.
- * This will be removed in Spark 2.0.
- */
- @deprecated("Use write.format(source).mode(mode).save(path). " +
- "This will be removed in Spark 2.0.", "1.4.0")
- def save(path: String, source: String, mode: SaveMode): Unit = {
- write.format(source).mode(mode).save(path)
- }
-
- /**
- * Saves the contents of this DataFrame based on the given data source,
- * [[SaveMode]] specified by mode, and a set of options.
- * @group output
- * @deprecated As of 1.4.0, replaced by
- * `write().format(source).mode(mode).options(options).save(path)`.
- * This will be removed in Spark 2.0.
- */
- @deprecated("Use write.format(source).mode(mode).options(options).save(). " +
- "This will be removed in Spark 2.0.", "1.4.0")
- def save(
- source: String,
- mode: SaveMode,
- options: java.util.Map[String, String]): Unit = {
- write.format(source).mode(mode).options(options).save()
- }
-
- /**
- * (Scala-specific)
- * Saves the contents of this DataFrame based on the given data source,
- * [[SaveMode]] specified by mode, and a set of options
- * @group output
- * @deprecated As of 1.4.0, replaced by
- * `write().format(source).mode(mode).options(options).save(path)`.
- * This will be removed in Spark 2.0.
- */
- @deprecated("Use write.format(source).mode(mode).options(options).save(). " +
- "This will be removed in Spark 2.0.", "1.4.0")
- def save(
- source: String,
- mode: SaveMode,
- options: Map[String, String]): Unit = {
- write.format(source).mode(mode).options(options).save()
- }
-
- /**
- * Adds the rows from this RDD to the specified table, optionally overwriting the existing data.
- * @group output
- * @deprecated As of 1.4.0, replaced by
- * `write().mode(SaveMode.Append|SaveMode.Overwrite).saveAsTable(tableName)`.
- * This will be removed in Spark 2.0.
- */
- @deprecated("Use write.mode(SaveMode.Append|SaveMode.Overwrite).saveAsTable(tableName). " +
- "This will be removed in Spark 2.0.", "1.4.0")
- def insertInto(tableName: String, overwrite: Boolean): Unit = {
- write.mode(if (overwrite) SaveMode.Overwrite else SaveMode.Append).insertInto(tableName)
- }
-
- /**
- * Adds the rows from this RDD to the specified table.
- * Throws an exception if the table already exists.
- * @group output
- * @deprecated As of 1.4.0, replaced by
- * `write().mode(SaveMode.Append).saveAsTable(tableName)`.
- * This will be removed in Spark 2.0.
- */
- @deprecated("Use write.mode(SaveMode.Append).saveAsTable(tableName). " +
- "This will be removed in Spark 2.0.", "1.4.0")
- def insertInto(tableName: String): Unit = {
- write.mode(SaveMode.Append).insertInto(tableName)
- }
-
- ////////////////////////////////////////////////////////////////////////////
- ////////////////////////////////////////////////////////////////////////////
- // End of deprecated methods
- ////////////////////////////////////////////////////////////////////////////
- ////////////////////////////////////////////////////////////////////////////
-
/**
* Wrap a DataFrame action to track all Spark jobs in the body so that we can connect them with
* an execution.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index 6debb302d9..d4df913e47 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -99,17 +99,6 @@ class DataFrameReader private[sql](sqlContext: SQLContext) extends Logging {
}
/**
- * Loads input in as a [[DataFrame]], for data sources that require a path (e.g. data backed by
- * a local or distributed file system).
- *
- * @since 1.4.0
- */
- // TODO: Remove this one in Spark 2.0.
- def load(path: String): DataFrame = {
- option("path", path).load()
- }
-
- /**
* Loads input in as a [[DataFrame]], for data sources that don't require a path (e.g. external
* key-value stores).
*
@@ -126,6 +115,16 @@ class DataFrameReader private[sql](sqlContext: SQLContext) extends Logging {
}
/**
+ * Loads input in as a [[DataFrame]], for data sources that require a path (e.g. data backed by
+ * a local or distributed file system).
+ *
+ * @since 1.4.0
+ */
+ def load(path: String): DataFrame = {
+ option("path", path).load()
+ }
+
+ /**
* Loads input in as a [[DataFrame]], for data sources that support multiple paths.
* Only works if the source is a HadoopFsRelationProvider.
*
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 022303239f..3a875c4f9a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -888,9 +888,6 @@ class SQLContext private[sql](
}.toArray
}
- @deprecated("use org.apache.spark.sql.SparkPlanner", "1.6.0")
- protected[sql] class SparkPlanner extends sparkexecution.SparkPlanner(this)
-
@transient
protected[sql] val planner: sparkexecution.SparkPlanner = new sparkexecution.SparkPlanner(this)
@@ -908,10 +905,6 @@ class SQLContext private[sql](
)
}
- @deprecated("use org.apache.spark.sql.QueryExecution", "1.6.0")
- protected[sql] class QueryExecution(logical: LogicalPlan)
- extends sparkexecution.QueryExecution(this, logical)
-
/**
* Parses the data type in our internal string representation. The data type string should
* have the same format as the one generated by `toString` in scala.
@@ -952,301 +945,6 @@ class SQLContext private[sql](
}
}
- ////////////////////////////////////////////////////////////////////////////
- ////////////////////////////////////////////////////////////////////////////
- // Deprecated methods
- ////////////////////////////////////////////////////////////////////////////
- ////////////////////////////////////////////////////////////////////////////
-
- /**
- * @deprecated As of 1.3.0, replaced by `createDataFrame()`. This will be removed in Spark 2.0.
- */
- @deprecated("Use createDataFrame. This will be removed in Spark 2.0.", "1.3.0")
- def applySchema(rowRDD: RDD[Row], schema: StructType): DataFrame = {
- createDataFrame(rowRDD, schema)
- }
-
- /**
- * @deprecated As of 1.3.0, replaced by `createDataFrame()`. This will be removed in Spark 2.0.
- */
- @deprecated("Use createDataFrame. This will be removed in Spark 2.0.", "1.3.0")
- def applySchema(rowRDD: JavaRDD[Row], schema: StructType): DataFrame = {
- createDataFrame(rowRDD, schema)
- }
-
- /**
- * @deprecated As of 1.3.0, replaced by `createDataFrame()`. This will be removed in Spark 2.0.
- */
- @deprecated("Use createDataFrame. This will be removed in Spark 2.0.", "1.3.0")
- def applySchema(rdd: RDD[_], beanClass: Class[_]): DataFrame = {
- createDataFrame(rdd, beanClass)
- }
-
- /**
- * @deprecated As of 1.3.0, replaced by `createDataFrame()`. This will be removed in Spark 2.0.
- */
- @deprecated("Use createDataFrame. This will be removed in Spark 2.0.", "1.3.0")
- def applySchema(rdd: JavaRDD[_], beanClass: Class[_]): DataFrame = {
- createDataFrame(rdd, beanClass)
- }
-
- /**
- * Loads a Parquet file, returning the result as a [[DataFrame]]. This function returns an empty
- * [[DataFrame]] if no paths are passed in.
- *
- * @group specificdata
- * @deprecated As of 1.4.0, replaced by `read().parquet()`. This will be removed in Spark 2.0.
- */
- @deprecated("Use read.parquet(). This will be removed in Spark 2.0.", "1.4.0")
- @scala.annotation.varargs
- def parquetFile(paths: String*): DataFrame = {
- if (paths.isEmpty) {
- emptyDataFrame
- } else {
- read.parquet(paths : _*)
- }
- }
-
- /**
- * Loads a JSON file (one object per line), returning the result as a [[DataFrame]].
- * It goes through the entire dataset once to determine the schema.
- *
- * @group specificdata
- * @deprecated As of 1.4.0, replaced by `read().json()`. This will be removed in Spark 2.0.
- */
- @deprecated("Use read.json(). This will be removed in Spark 2.0.", "1.4.0")
- def jsonFile(path: String): DataFrame = {
- read.json(path)
- }
-
- /**
- * Loads a JSON file (one object per line) and applies the given schema,
- * returning the result as a [[DataFrame]].
- *
- * @group specificdata
- * @deprecated As of 1.4.0, replaced by `read().json()`. This will be removed in Spark 2.0.
- */
- @deprecated("Use read.json(). This will be removed in Spark 2.0.", "1.4.0")
- def jsonFile(path: String, schema: StructType): DataFrame = {
- read.schema(schema).json(path)
- }
-
- /**
- * @group specificdata
- * @deprecated As of 1.4.0, replaced by `read().json()`. This will be removed in Spark 2.0.
- */
- @deprecated("Use read.json(). This will be removed in Spark 2.0.", "1.4.0")
- def jsonFile(path: String, samplingRatio: Double): DataFrame = {
- read.option("samplingRatio", samplingRatio.toString).json(path)
- }
-
- /**
- * Loads an RDD[String] storing JSON objects (one object per record), returning the result as a
- * [[DataFrame]].
- * It goes through the entire dataset once to determine the schema.
- *
- * @group specificdata
- * @deprecated As of 1.4.0, replaced by `read().json()`. This will be removed in Spark 2.0.
- */
- @deprecated("Use read.json(). This will be removed in Spark 2.0.", "1.4.0")
- def jsonRDD(json: RDD[String]): DataFrame = read.json(json)
-
- /**
- * Loads an RDD[String] storing JSON objects (one object per record), returning the result as a
- * [[DataFrame]].
- * It goes through the entire dataset once to determine the schema.
- *
- * @group specificdata
- * @deprecated As of 1.4.0, replaced by `read().json()`. This will be removed in Spark 2.0.
- */
- @deprecated("Use read.json(). This will be removed in Spark 2.0.", "1.4.0")
- def jsonRDD(json: JavaRDD[String]): DataFrame = read.json(json)
-
- /**
- * Loads an RDD[String] storing JSON objects (one object per record) and applies the given schema,
- * returning the result as a [[DataFrame]].
- *
- * @group specificdata
- * @deprecated As of 1.4.0, replaced by `read().json()`. This will be removed in Spark 2.0.
- */
- @deprecated("Use read.json(). This will be removed in Spark 2.0.", "1.4.0")
- def jsonRDD(json: RDD[String], schema: StructType): DataFrame = {
- read.schema(schema).json(json)
- }
-
- /**
- * Loads an JavaRDD<String> storing JSON objects (one object per record) and applies the given
- * schema, returning the result as a [[DataFrame]].
- *
- * @group specificdata
- * @deprecated As of 1.4.0, replaced by `read().json()`. This will be removed in Spark 2.0.
- */
- @deprecated("Use read.json(). This will be removed in Spark 2.0.", "1.4.0")
- def jsonRDD(json: JavaRDD[String], schema: StructType): DataFrame = {
- read.schema(schema).json(json)
- }
-
- /**
- * Loads an RDD[String] storing JSON objects (one object per record) inferring the
- * schema, returning the result as a [[DataFrame]].
- *
- * @group specificdata
- * @deprecated As of 1.4.0, replaced by `read().json()`. This will be removed in Spark 2.0.
- */
- @deprecated("Use read.json(). This will be removed in Spark 2.0.", "1.4.0")
- def jsonRDD(json: RDD[String], samplingRatio: Double): DataFrame = {
- read.option("samplingRatio", samplingRatio.toString).json(json)
- }
-
- /**
- * Loads a JavaRDD[String] storing JSON objects (one object per record) inferring the
- * schema, returning the result as a [[DataFrame]].
- *
- * @group specificdata
- * @deprecated As of 1.4.0, replaced by `read().json()`. This will be removed in Spark 2.0.
- */
- @deprecated("Use read.json(). This will be removed in Spark 2.0.", "1.4.0")
- def jsonRDD(json: JavaRDD[String], samplingRatio: Double): DataFrame = {
- read.option("samplingRatio", samplingRatio.toString).json(json)
- }
-
- /**
- * Returns the dataset stored at path as a DataFrame,
- * using the default data source configured by spark.sql.sources.default.
- *
- * @group genericdata
- * @deprecated As of 1.4.0, replaced by `read().load(path)`. This will be removed in Spark 2.0.
- */
- @deprecated("Use read.load(path). This will be removed in Spark 2.0.", "1.4.0")
- def load(path: String): DataFrame = {
- read.load(path)
- }
-
- /**
- * Returns the dataset stored at path as a DataFrame, using the given data source.
- *
- * @group genericdata
- * @deprecated As of 1.4.0, replaced by `read().format(source).load(path)`.
- * This will be removed in Spark 2.0.
- */
- @deprecated("Use read.format(source).load(path). This will be removed in Spark 2.0.", "1.4.0")
- def load(path: String, source: String): DataFrame = {
- read.format(source).load(path)
- }
-
- /**
- * (Java-specific) Returns the dataset specified by the given data source and
- * a set of options as a DataFrame.
- *
- * @group genericdata
- * @deprecated As of 1.4.0, replaced by `read().format(source).options(options).load()`.
- * This will be removed in Spark 2.0.
- */
- @deprecated("Use read.format(source).options(options).load(). " +
- "This will be removed in Spark 2.0.", "1.4.0")
- def load(source: String, options: java.util.Map[String, String]): DataFrame = {
- read.options(options).format(source).load()
- }
-
- /**
- * (Scala-specific) Returns the dataset specified by the given data source and
- * a set of options as a DataFrame.
- *
- * @group genericdata
- * @deprecated As of 1.4.0, replaced by `read().format(source).options(options).load()`.
- */
- @deprecated("Use read.format(source).options(options).load(). " +
- "This will be removed in Spark 2.0.", "1.4.0")
- def load(source: String, options: Map[String, String]): DataFrame = {
- read.options(options).format(source).load()
- }
-
- /**
- * (Java-specific) Returns the dataset specified by the given data source and
- * a set of options as a DataFrame, using the given schema as the schema of the DataFrame.
- *
- * @group genericdata
- * @deprecated As of 1.4.0, replaced by
- * `read().format(source).schema(schema).options(options).load()`.
- */
- @deprecated("Use read.format(source).schema(schema).options(options).load(). " +
- "This will be removed in Spark 2.0.", "1.4.0")
- def load(source: String, schema: StructType, options: java.util.Map[String, String]): DataFrame =
- {
- read.format(source).schema(schema).options(options).load()
- }
-
- /**
- * (Scala-specific) Returns the dataset specified by the given data source and
- * a set of options as a DataFrame, using the given schema as the schema of the DataFrame.
- *
- * @group genericdata
- * @deprecated As of 1.4.0, replaced by
- * `read().format(source).schema(schema).options(options).load()`.
- */
- @deprecated("Use read.format(source).schema(schema).options(options).load(). " +
- "This will be removed in Spark 2.0.", "1.4.0")
- def load(source: String, schema: StructType, options: Map[String, String]): DataFrame = {
- read.format(source).schema(schema).options(options).load()
- }
-
- /**
- * Construct a [[DataFrame]] representing the database table accessible via JDBC URL
- * url named table.
- *
- * @group specificdata
- * @deprecated As of 1.4.0, replaced by `read().jdbc()`. This will be removed in Spark 2.0.
- */
- @deprecated("Use read.jdbc(). This will be removed in Spark 2.0.", "1.4.0")
- def jdbc(url: String, table: String): DataFrame = {
- read.jdbc(url, table, new Properties)
- }
-
- /**
- * Construct a [[DataFrame]] representing the database table accessible via JDBC URL
- * url named table. Partitions of the table will be retrieved in parallel based on the parameters
- * passed to this function.
- *
- * @param columnName the name of a column of integral type that will be used for partitioning.
- * @param lowerBound the minimum value of `columnName` used to decide partition stride
- * @param upperBound the maximum value of `columnName` used to decide partition stride
- * @param numPartitions the number of partitions. the range `minValue`-`maxValue` will be split
- * evenly into this many partitions
- * @group specificdata
- * @deprecated As of 1.4.0, replaced by `read().jdbc()`. This will be removed in Spark 2.0.
- */
- @deprecated("Use read.jdbc(). This will be removed in Spark 2.0.", "1.4.0")
- def jdbc(
- url: String,
- table: String,
- columnName: String,
- lowerBound: Long,
- upperBound: Long,
- numPartitions: Int): DataFrame = {
- read.jdbc(url, table, columnName, lowerBound, upperBound, numPartitions, new Properties)
- }
-
- /**
- * Construct a [[DataFrame]] representing the database table accessible via JDBC URL
- * url named table. The theParts parameter gives a list expressions
- * suitable for inclusion in WHERE clauses; each one defines one partition
- * of the [[DataFrame]].
- *
- * @group specificdata
- * @deprecated As of 1.4.0, replaced by `read().jdbc()`. This will be removed in Spark 2.0.
- */
- @deprecated("Use read.jdbc(). This will be removed in Spark 2.0.", "1.4.0")
- def jdbc(url: String, table: String, theParts: Array[String]): DataFrame = {
- read.jdbc(url, table, theParts, new Properties)
- }
-
- ////////////////////////////////////////////////////////////////////////////
- ////////////////////////////////////////////////////////////////////////////
- // End of deprecated methods
- ////////////////////////////////////////////////////////////////////////////
- ////////////////////////////////////////////////////////////////////////////
-
-
// Register a succesfully instantiatd context to the singleton. This should be at the end of
// the class definition so that the singleton is updated only if there is no exception in the
// construction of the instance.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRelation.scala
index af964b4d35..8e1fe8090c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRelation.scala
@@ -44,6 +44,7 @@ import org.apache.spark.broadcast.Broadcast
import org.apache.spark.rdd.{RDD, SqlNewHadoopPartition, SqlNewHadoopRDD}
import org.apache.spark.sql._
import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.util.LegacyTypeStringParser
import org.apache.spark.sql.execution.datasources.PartitionSpec
import org.apache.spark.sql.sources._
import org.apache.spark.sql.types.{DataType, StructType}
@@ -638,7 +639,7 @@ private[sql] object ParquetRelation extends Logging {
logInfo(
s"Serialized Spark schema in Parquet key-value metadata is not in JSON format, " +
"falling back to the deprecated DataType.fromCaseClassString parser.")
- DataType.fromCaseClassString(serializedSchema.get)
+ LegacyTypeStringParser.parse(serializedSchema.get)
}
.recover { case cause: Throwable =>
logWarning(
@@ -821,7 +822,7 @@ private[sql] object ParquetRelation extends Logging {
logInfo(
s"Serialized Spark schema in Parquet key-value metadata is not in JSON format, " +
"falling back to the deprecated DataType.fromCaseClassString parser.")
- DataType.fromCaseClassString(schemaString).asInstanceOf[StructType]
+ LegacyTypeStringParser.parse(schemaString).asInstanceOf[StructType]
}.recoverWith {
case cause: Throwable =>
logWarning(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 3572f3c3a1..2b3db398aa 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -559,13 +559,6 @@ object functions extends LegacyFunctions {
//////////////////////////////////////////////////////////////////////////////////////////////
/**
- * @group window_funcs
- * @deprecated As of 1.6.0, replaced by `cume_dist`. This will be removed in Spark 2.0.
- */
- @deprecated("Use cume_dist. This will be removed in Spark 2.0.", "1.6.0")
- def cumeDist(): Column = cume_dist()
-
- /**
* Window function: returns the cumulative distribution of values within a window partition,
* i.e. the fraction of rows that are below the current row.
*
@@ -580,13 +573,6 @@ object functions extends LegacyFunctions {
def cume_dist(): Column = withExpr { new CumeDist }
/**
- * @group window_funcs
- * @deprecated As of 1.6.0, replaced by `dense_rank`. This will be removed in Spark 2.0.
- */
- @deprecated("Use dense_rank. This will be removed in Spark 2.0.", "1.6.0")
- def denseRank(): Column = dense_rank()
-
- /**
* Window function: returns the rank of rows within a window partition, without any gaps.
*
* The difference between rank and denseRank is that denseRank leaves no gaps in ranking
@@ -716,13 +702,6 @@ object functions extends LegacyFunctions {
def ntile(n: Int): Column = withExpr { new NTile(Literal(n)) }
/**
- * @group window_funcs
- * @deprecated As of 1.6.0, replaced by `percent_rank`. This will be removed in Spark 2.0.
- */
- @deprecated("Use percent_rank. This will be removed in Spark 2.0.", "1.6.0")
- def percentRank(): Column = percent_rank()
-
- /**
* Window function: returns the relative rank (i.e. percentile) of rows within a window partition.
*
* This is computed by:
@@ -753,13 +732,6 @@ object functions extends LegacyFunctions {
def rank(): Column = withExpr { new Rank }
/**
- * @group window_funcs
- * @deprecated As of 1.6.0, replaced by `row_number`. This will be removed in Spark 2.0.
- */
- @deprecated("Use row_number. This will be removed in Spark 2.0.", "1.6.0")
- def rowNumber(): Column = row_number()
-
- /**
* Window function: returns a sequential number starting at 1 within a window partition.
*
* @group window_funcs
@@ -828,13 +800,6 @@ object functions extends LegacyFunctions {
def coalesce(e: Column*): Column = withExpr { Coalesce(e.map(_.expr)) }
/**
- * @group normal_funcs
- * @deprecated As of 1.6.0, replaced by `input_file_name`. This will be removed in Spark 2.0.
- */
- @deprecated("Use input_file_name. This will be removed in Spark 2.0.", "1.6.0")
- def inputFileName(): Column = input_file_name()
-
- /**
* Creates a string column for the file name of the current Spark task.
*
* @group normal_funcs
@@ -843,13 +808,6 @@ object functions extends LegacyFunctions {
def input_file_name(): Column = withExpr { InputFileName() }
/**
- * @group normal_funcs
- * @deprecated As of 1.6.0, replaced by `isnan`. This will be removed in Spark 2.0.
- */
- @deprecated("Use isnan. This will be removed in Spark 2.0.", "1.6.0")
- def isNaN(e: Column): Column = isnan(e)
-
- /**
* Return true iff the column is NaN.
*
* @group normal_funcs
@@ -973,14 +931,6 @@ object functions extends LegacyFunctions {
def randn(): Column = randn(Utils.random.nextLong)
/**
- * @group normal_funcs
- * @since 1.4.0
- * @deprecated As of 1.6.0, replaced by `spark_partition_id`. This will be removed in Spark 2.0.
- */
- @deprecated("Use cume_dist. This will be removed in Spark 2.0.", "1.6.0")
- def sparkPartitionId(): Column = spark_partition_id()
-
- /**
* Partition ID of the Spark task.
*
* Note that this is indeterministic because it depends on data partitioning and task scheduling.
@@ -2534,24 +2484,6 @@ object functions extends LegacyFunctions {
}""")
}
- (0 to 10).map { x =>
- val args = (1 to x).map(i => s"arg$i: Column").mkString(", ")
- val fTypes = Seq.fill(x + 1)("_").mkString(", ")
- val argsInUDF = (1 to x).map(i => s"arg$i.expr").mkString(", ")
- println(s"""
- /**
- * Call a Scala function of ${x} arguments as user-defined function (UDF). This requires
- * you to specify the return data type.
- *
- * @group udf_funcs
- * @since 1.3.0
- * @deprecated As of 1.5.0, since it's redundant with udf()
- */
- @deprecated("Use udf", "1.5.0")
- def callUDF(f: Function$x[$fTypes], returnType: DataType${if (args.length > 0) ", " + args else ""}): Column = withExpr {
- ScalaUDF(f, returnType, Option(Seq($argsInUDF)))
- }""")
- }
*/
/**
* Defines a user-defined function of 0 arguments as user-defined function (UDF).
@@ -2685,161 +2617,6 @@ object functions extends LegacyFunctions {
UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType, inputTypes)
}
- //////////////////////////////////////////////////////////////////////////////////////////////////
- /**
- * Call a Scala function of 0 arguments as user-defined function (UDF). This requires
- * you to specify the return data type.
- *
- * @group udf_funcs
- * @since 1.3.0
- * @deprecated As of 1.5.0, since it's redundant with udf()
- * This will be removed in Spark 2.0.
- */
- @deprecated("Use udf. This will be removed in Spark 2.0.", "1.5.0")
- def callUDF(f: Function0[_], returnType: DataType): Column = withExpr {
- ScalaUDF(f, returnType, Seq())
- }
-
- /**
- * Call a Scala function of 1 arguments as user-defined function (UDF). This requires
- * you to specify the return data type.
- *
- * @group udf_funcs
- * @since 1.3.0
- * @deprecated As of 1.5.0, since it's redundant with udf()
- * This will be removed in Spark 2.0.
- */
- @deprecated("Use udf. This will be removed in Spark 2.0.", "1.5.0")
- def callUDF(f: Function1[_, _], returnType: DataType, arg1: Column): Column = withExpr {
- ScalaUDF(f, returnType, Seq(arg1.expr))
- }
-
- /**
- * Call a Scala function of 2 arguments as user-defined function (UDF). This requires
- * you to specify the return data type.
- *
- * @group udf_funcs
- * @since 1.3.0
- * @deprecated As of 1.5.0, since it's redundant with udf()
- * This will be removed in Spark 2.0.
- */
- @deprecated("Use udf. This will be removed in Spark 2.0.", "1.5.0")
- def callUDF(f: Function2[_, _, _], returnType: DataType, arg1: Column, arg2: Column): Column = withExpr {
- ScalaUDF(f, returnType, Seq(arg1.expr, arg2.expr))
- }
-
- /**
- * Call a Scala function of 3 arguments as user-defined function (UDF). This requires
- * you to specify the return data type.
- *
- * @group udf_funcs
- * @since 1.3.0
- * @deprecated As of 1.5.0, since it's redundant with udf()
- * This will be removed in Spark 2.0.
- */
- @deprecated("Use udf. This will be removed in Spark 2.0.", "1.5.0")
- def callUDF(f: Function3[_, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column): Column = withExpr {
- ScalaUDF(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr))
- }
-
- /**
- * Call a Scala function of 4 arguments as user-defined function (UDF). This requires
- * you to specify the return data type.
- *
- * @group udf_funcs
- * @since 1.3.0
- * @deprecated As of 1.5.0, since it's redundant with udf()
- * This will be removed in Spark 2.0.
- */
- @deprecated("Use udf. This will be removed in Spark 2.0.", "1.5.0")
- def callUDF(f: Function4[_, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column): Column = withExpr {
- ScalaUDF(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr))
- }
-
- /**
- * Call a Scala function of 5 arguments as user-defined function (UDF). This requires
- * you to specify the return data type.
- *
- * @group udf_funcs
- * @since 1.3.0
- * @deprecated As of 1.5.0, since it's redundant with udf()
- * This will be removed in Spark 2.0.
- */
- @deprecated("Use udf. This will be removed in Spark 2.0.", "1.5.0")
- def callUDF(f: Function5[_, _, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column, arg5: Column): Column = withExpr {
- ScalaUDF(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr, arg5.expr))
- }
-
- /**
- * Call a Scala function of 6 arguments as user-defined function (UDF). This requires
- * you to specify the return data type.
- *
- * @group udf_funcs
- * @since 1.3.0
- * @deprecated As of 1.5.0, since it's redundant with udf()
- * This will be removed in Spark 2.0.
- */
- @deprecated("Use udf. This will be removed in Spark 2.0.", "1.5.0")
- def callUDF(f: Function6[_, _, _, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column, arg5: Column, arg6: Column): Column = withExpr {
- ScalaUDF(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr, arg5.expr, arg6.expr))
- }
-
- /**
- * Call a Scala function of 7 arguments as user-defined function (UDF). This requires
- * you to specify the return data type.
- *
- * @group udf_funcs
- * @since 1.3.0
- * @deprecated As of 1.5.0, since it's redundant with udf()
- * This will be removed in Spark 2.0.
- */
- @deprecated("Use udf. This will be removed in Spark 2.0.", "1.5.0")
- def callUDF(f: Function7[_, _, _, _, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column, arg5: Column, arg6: Column, arg7: Column): Column = withExpr {
- ScalaUDF(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr, arg5.expr, arg6.expr, arg7.expr))
- }
-
- /**
- * Call a Scala function of 8 arguments as user-defined function (UDF). This requires
- * you to specify the return data type.
- *
- * @group udf_funcs
- * @since 1.3.0
- * @deprecated As of 1.5.0, since it's redundant with udf()
- * This will be removed in Spark 2.0.
- */
- @deprecated("Use udf. This will be removed in Spark 2.0.", "1.5.0")
- def callUDF(f: Function8[_, _, _, _, _, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column, arg5: Column, arg6: Column, arg7: Column, arg8: Column): Column = withExpr {
- ScalaUDF(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr, arg5.expr, arg6.expr, arg7.expr, arg8.expr))
- }
-
- /**
- * Call a Scala function of 9 arguments as user-defined function (UDF). This requires
- * you to specify the return data type.
- *
- * @group udf_funcs
- * @since 1.3.0
- * @deprecated As of 1.5.0, since it's redundant with udf().
- * This will be removed in Spark 2.0.
- */
- @deprecated("Use udf. This will be removed in Spark 2.0.", "1.5.0")
- def callUDF(f: Function9[_, _, _, _, _, _, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column, arg5: Column, arg6: Column, arg7: Column, arg8: Column, arg9: Column): Column = withExpr {
- ScalaUDF(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr, arg5.expr, arg6.expr, arg7.expr, arg8.expr, arg9.expr))
- }
-
- /**
- * Call a Scala function of 10 arguments as user-defined function (UDF). This requires
- * you to specify the return data type.
- *
- * @group udf_funcs
- * @since 1.3.0
- * @deprecated As of 1.5.0, since it's redundant with udf().
- * This will be removed in Spark 2.0.
- */
- @deprecated("Use udf. This will be removed in Spark 2.0.", "1.5.0")
- def callUDF(f: Function10[_, _, _, _, _, _, _, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column, arg5: Column, arg6: Column, arg7: Column, arg8: Column, arg9: Column, arg10: Column): Column = withExpr {
- ScalaUDF(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr, arg5.expr, arg6.expr, arg7.expr, arg8.expr, arg9.expr, arg10.expr))
- }
-
// scalastyle:on parameter.number
// scalastyle:on line.size.limit
@@ -2877,33 +2654,4 @@ object functions extends LegacyFunctions {
UnresolvedFunction(udfName, cols.map(_.expr), isDistinct = false)
}
- /**
- * Call an user-defined function.
- * Example:
- * {{{
- * import org.apache.spark.sql._
- *
- * val df = Seq(("id1", 1), ("id2", 4), ("id3", 5)).toDF("id", "value")
- * val sqlContext = df.sqlContext
- * sqlContext.udf.register("simpleUDF", (v: Int) => v * v)
- * df.select($"id", callUdf("simpleUDF", $"value"))
- * }}}
- *
- * @group udf_funcs
- * @since 1.4.0
- * @deprecated As of 1.5.0, since it was not coherent to have two functions callUdf and callUDF.
- * This will be removed in Spark 2.0.
- */
- @deprecated("Use callUDF. This will be removed in Spark 2.0.", "1.5.0")
- def callUdf(udfName: String, cols: Column*): Column = withExpr {
- // Note: we avoid using closures here because on file systems that are case-insensitive, the
- // compiled class file for the closure here will conflict with the one in callUDF (upper case).
- val exprs = new Array[Expression](cols.size)
- var i = 0
- while (i < cols.size) {
- exprs(i) = cols(i).expr
- i += 1
- }
- UnresolvedFunction(udfName, exprs, isDistinct = false)
- }
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/package.scala b/sql/core/src/main/scala/org/apache/spark/sql/package.scala
index a9c600b139..bd73a36fd4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/package.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/package.scala
@@ -42,10 +42,4 @@ package object sql {
@DeveloperApi
type Strategy = org.apache.spark.sql.catalyst.planning.GenericStrategy[SparkPlan]
- /**
- * Type alias for [[DataFrame]]. Kept here for backward source compatibility for Scala.
- * @deprecated As of 1.3.0, replaced by `DataFrame`.
- */
- @deprecated("use DataFrame", "1.3.0")
- type SchemaRDD = DataFrame
}
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaApplySchemaSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaApplySchemaSuite.java
index 7b50aad4ad..640efcc737 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaApplySchemaSuite.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaApplySchemaSuite.java
@@ -107,7 +107,7 @@ public class JavaApplySchemaSuite implements Serializable {
fields.add(DataTypes.createStructField("age", DataTypes.IntegerType, false));
StructType schema = DataTypes.createStructType(fields);
- DataFrame df = sqlContext.applySchema(rowRDD, schema);
+ DataFrame df = sqlContext.createDataFrame(rowRDD, schema);
df.registerTempTable("people");
Row[] actual = sqlContext.sql("SELECT * FROM people").collect();
@@ -143,7 +143,7 @@ public class JavaApplySchemaSuite implements Serializable {
fields.add(DataTypes.createStructField("age", DataTypes.IntegerType, false));
StructType schema = DataTypes.createStructType(fields);
- DataFrame df = sqlContext.applySchema(rowRDD, schema);
+ DataFrame df = sqlContext.createDataFrame(rowRDD, schema);
df.registerTempTable("people");
List<String> actual = sqlContext.sql("SELECT * FROM people").toJavaRDD().map(new Function<Row, String>() {
@Override
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
index 38c0eb589f..53a9788024 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
@@ -298,7 +298,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSQLContext {
Row(true, true) :: Row(true, true) :: Row(false, false) :: Row(false, false) :: Nil)
checkAnswer(
- testData.select(isNaN($"a"), isNaN($"b")),
+ testData.select(isnan($"a"), isnan($"b")),
Row(true, true) :: Row(true, true) :: Row(false, false) :: Row(false, false) :: Nil)
checkAnswer(
@@ -586,7 +586,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSQLContext {
Iterator(Tuple1(1), Tuple1(2))
}.toDF("a")
checkAnswer(
- df.select(sparkPartitionId()),
+ df.select(spark_partition_id()),
Row(0) :: Row(0) :: Row(1) :: Row(1) :: Nil
)
}
@@ -595,11 +595,11 @@ class ColumnExpressionSuite extends QueryTest with SharedSQLContext {
withTempPath { dir =>
val data = sparkContext.parallelize(0 to 10).toDF("id")
data.write.parquet(dir.getCanonicalPath)
- val answer = sqlContext.read.parquet(dir.getCanonicalPath).select(inputFileName())
+ val answer = sqlContext.read.parquet(dir.getCanonicalPath).select(input_file_name())
.head.getString(0)
assert(answer.contains(dir.getCanonicalPath))
- checkAnswer(data.select(inputFileName()).limit(1), Row(""))
+ checkAnswer(data.select(input_file_name()).limit(1), Row(""))
}
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index ab02b32f91..e8fa663363 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -341,15 +341,6 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
)
}
- test("deprecated callUdf in SQLContext") {
- val df = Seq(("id1", 1), ("id2", 4), ("id3", 5)).toDF("id", "value")
- val sqlctx = df.sqlContext
- sqlctx.udf.register("simpleUdf", (v: Int) => v * v)
- checkAnswer(
- df.select($"id", callUdf("simpleUdf", $"value")),
- Row("id1", 1) :: Row("id2", 16) :: Row("id3", 25) :: Nil)
- }
-
test("callUDF in SQLContext") {
val df = Seq(("id1", 1), ("id2", 4), ("id3", 5)).toDF("id", "value")
val sqlctx = df.sqlContext