diff options
author | Reynold Xin <rxin@databricks.com> | 2016-01-04 18:02:38 -0800 |
---|---|---|
committer | Michael Armbrust <michael@databricks.com> | 2016-01-04 18:02:38 -0800 |
commit | 77ab49b8575d2ebd678065fa70b0343d532ab9c2 (patch) | |
tree | f9c4a990499d1856494f787f8bfc095d68a69735 /sql/core/src | |
parent | fdfac22d08fc4fdc640843dd93a29e2ce4aee2ef (diff) | |
download | spark-77ab49b8575d2ebd678065fa70b0343d532ab9c2.tar.gz spark-77ab49b8575d2ebd678065fa70b0343d532ab9c2.tar.bz2 spark-77ab49b8575d2ebd678065fa70b0343d532ab9c2.zip |
[SPARK-12600][SQL] Remove deprecated methods in Spark SQL
Author: Reynold Xin <rxin@databricks.com>
Closes #10559 from rxin/remove-deprecated-sql.
Diffstat (limited to 'sql/core/src')
10 files changed, 19 insertions, 938 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala index 5026c0d6d1..71fa970907 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala @@ -713,18 +713,6 @@ class Column(protected[sql] val expr: Expression) extends Logging { * by the evaluated values of the arguments. * * @group expr_ops - * @since 1.3.0 - * @deprecated As of 1.5.0. Use isin. This will be removed in Spark 2.0. - */ - @deprecated("use isin. This will be removed in Spark 2.0.", "1.5.0") - @scala.annotation.varargs - def in(list: Any*): Column = isin(list : _*) - - /** - * A boolean expression that is evaluated to true if the value of this expression is contained - * by the evaluated values of the arguments. - * - * @group expr_ops * @since 1.5.0 */ @scala.annotation.varargs diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala index 0763aa4ed9..c42192c83d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala @@ -1750,344 +1750,6 @@ class DataFrame private[sql]( } } - //////////////////////////////////////////////////////////////////////////// - //////////////////////////////////////////////////////////////////////////// - // Deprecated methods - //////////////////////////////////////////////////////////////////////////// - //////////////////////////////////////////////////////////////////////////// - - /** - * @deprecated As of 1.3.0, replaced by `toDF()`. This will be removed in Spark 2.0. - */ - @deprecated("Use toDF. This will be removed in Spark 2.0.", "1.3.0") - def toSchemaRDD: DataFrame = this - - /** - * Save this [[DataFrame]] to a JDBC database at `url` under the table name `table`. - * This will run a `CREATE TABLE` and a bunch of `INSERT INTO` statements. - * If you pass `true` for `allowExisting`, it will drop any table with the - * given name; if you pass `false`, it will throw if the table already - * exists. - * @group output - * @deprecated As of 1.340, replaced by `write().jdbc()`. This will be removed in Spark 2.0. - */ - @deprecated("Use write.jdbc(). This will be removed in Spark 2.0.", "1.4.0") - def createJDBCTable(url: String, table: String, allowExisting: Boolean): Unit = { - val w = if (allowExisting) write.mode(SaveMode.Overwrite) else write - w.jdbc(url, table, new Properties) - } - - /** - * Save this [[DataFrame]] to a JDBC database at `url` under the table name `table`. - * Assumes the table already exists and has a compatible schema. If you - * pass `true` for `overwrite`, it will `TRUNCATE` the table before - * performing the `INSERT`s. - * - * The table must already exist on the database. It must have a schema - * that is compatible with the schema of this RDD; inserting the rows of - * the RDD in order via the simple statement - * `INSERT INTO table VALUES (?, ?, ..., ?)` should not fail. - * @group output - * @deprecated As of 1.4.0, replaced by `write().jdbc()`. This will be removed in Spark 2.0. - */ - @deprecated("Use write.jdbc(). This will be removed in Spark 2.0.", "1.4.0") - def insertIntoJDBC(url: String, table: String, overwrite: Boolean): Unit = { - val w = if (overwrite) write.mode(SaveMode.Overwrite) else write.mode(SaveMode.Append) - w.jdbc(url, table, new Properties) - } - - /** - * Saves the contents of this [[DataFrame]] as a parquet file, preserving the schema. - * Files that are written out using this method can be read back in as a [[DataFrame]] - * using the `parquetFile` function in [[SQLContext]]. - * @group output - * @deprecated As of 1.4.0, replaced by `write().parquet()`. This will be removed in Spark 2.0. - */ - @deprecated("Use write.parquet(path). This will be removed in Spark 2.0.", "1.4.0") - def saveAsParquetFile(path: String): Unit = { - write.format("parquet").mode(SaveMode.ErrorIfExists).save(path) - } - - /** - * Creates a table from the the contents of this DataFrame. - * It will use the default data source configured by spark.sql.sources.default. - * This will fail if the table already exists. - * - * Note that this currently only works with DataFrames that are created from a HiveContext as - * there is no notion of a persisted catalog in a standard SQL context. Instead you can write - * an RDD out to a parquet file, and then register that file as a table. This "table" can then - * be the target of an `insertInto`. - * - * When the DataFrame is created from a non-partitioned [[HadoopFsRelation]] with a single input - * path, and the data source provider can be mapped to an existing Hive builtin SerDe (i.e. ORC - * and Parquet), the table is persisted in a Hive compatible format, which means other systems - * like Hive will be able to read this table. Otherwise, the table is persisted in a Spark SQL - * specific format. - * - * @group output - * @deprecated As of 1.4.0, replaced by `write().saveAsTable(tableName)`. - * This will be removed in Spark 2.0. - */ - @deprecated("Use write.saveAsTable(tableName). This will be removed in Spark 2.0.", "1.4.0") - def saveAsTable(tableName: String): Unit = { - write.mode(SaveMode.ErrorIfExists).saveAsTable(tableName) - } - - /** - * Creates a table from the the contents of this DataFrame, using the default data source - * configured by spark.sql.sources.default and [[SaveMode.ErrorIfExists]] as the save mode. - * - * Note that this currently only works with DataFrames that are created from a HiveContext as - * there is no notion of a persisted catalog in a standard SQL context. Instead you can write - * an RDD out to a parquet file, and then register that file as a table. This "table" can then - * be the target of an `insertInto`. - * - * When the DataFrame is created from a non-partitioned [[HadoopFsRelation]] with a single input - * path, and the data source provider can be mapped to an existing Hive builtin SerDe (i.e. ORC - * and Parquet), the table is persisted in a Hive compatible format, which means other systems - * like Hive will be able to read this table. Otherwise, the table is persisted in a Spark SQL - * specific format. - * - * @group output - * @deprecated As of 1.4.0, replaced by `write().mode(mode).saveAsTable(tableName)`. - * This will be removed in Spark 2.0. - */ - @deprecated("Use write.mode(mode).saveAsTable(tableName). This will be removed in Spark 2.0.", - "1.4.0") - def saveAsTable(tableName: String, mode: SaveMode): Unit = { - write.mode(mode).saveAsTable(tableName) - } - - /** - * Creates a table at the given path from the the contents of this DataFrame - * based on a given data source and a set of options, - * using [[SaveMode.ErrorIfExists]] as the save mode. - * - * Note that this currently only works with DataFrames that are created from a HiveContext as - * there is no notion of a persisted catalog in a standard SQL context. Instead you can write - * an RDD out to a parquet file, and then register that file as a table. This "table" can then - * be the target of an `insertInto`. - * - * When the DataFrame is created from a non-partitioned [[HadoopFsRelation]] with a single input - * path, and the data source provider can be mapped to an existing Hive builtin SerDe (i.e. ORC - * and Parquet), the table is persisted in a Hive compatible format, which means other systems - * like Hive will be able to read this table. Otherwise, the table is persisted in a Spark SQL - * specific format. - * - * @group output - * @deprecated As of 1.4.0, replaced by `write().format(source).saveAsTable(tableName)`. - * This will be removed in Spark 2.0. - */ - @deprecated("Use write.format(source).saveAsTable(tableName). This will be removed in Spark 2.0.", - "1.4.0") - def saveAsTable(tableName: String, source: String): Unit = { - write.format(source).saveAsTable(tableName) - } - - /** - * :: Experimental :: - * Creates a table at the given path from the the contents of this DataFrame - * based on a given data source, [[SaveMode]] specified by mode, and a set of options. - * - * Note that this currently only works with DataFrames that are created from a HiveContext as - * there is no notion of a persisted catalog in a standard SQL context. Instead you can write - * an RDD out to a parquet file, and then register that file as a table. This "table" can then - * be the target of an `insertInto`. - * - * When the DataFrame is created from a non-partitioned [[HadoopFsRelation]] with a single input - * path, and the data source provider can be mapped to an existing Hive builtin SerDe (i.e. ORC - * and Parquet), the table is persisted in a Hive compatible format, which means other systems - * like Hive will be able to read this table. Otherwise, the table is persisted in a Spark SQL - * specific format. - * - * @group output - * @deprecated As of 1.4.0, replaced by `write().mode(mode).saveAsTable(tableName)`. - * This will be removed in Spark 2.0. - */ - @deprecated("Use write.format(source).mode(mode).saveAsTable(tableName). " + - "This will be removed in Spark 2.0.", "1.4.0") - def saveAsTable(tableName: String, source: String, mode: SaveMode): Unit = { - write.format(source).mode(mode).saveAsTable(tableName) - } - - /** - * Creates a table at the given path from the the contents of this DataFrame - * based on a given data source, [[SaveMode]] specified by mode, and a set of options. - * - * Note that this currently only works with DataFrames that are created from a HiveContext as - * there is no notion of a persisted catalog in a standard SQL context. Instead you can write - * an RDD out to a parquet file, and then register that file as a table. This "table" can then - * be the target of an `insertInto`. - * - * When the DataFrame is created from a non-partitioned [[HadoopFsRelation]] with a single input - * path, and the data source provider can be mapped to an existing Hive builtin SerDe (i.e. ORC - * and Parquet), the table is persisted in a Hive compatible format, which means other systems - * like Hive will be able to read this table. Otherwise, the table is persisted in a Spark SQL - * specific format. - * - * @group output - * @deprecated As of 1.4.0, replaced by - * `write().format(source).mode(mode).options(options).saveAsTable(tableName)`. - * This will be removed in Spark 2.0. - */ - @deprecated("Use write.format(source).mode(mode).options(options).saveAsTable(tableName). " + - "This will be removed in Spark 2.0.", "1.4.0") - def saveAsTable( - tableName: String, - source: String, - mode: SaveMode, - options: java.util.Map[String, String]): Unit = { - write.format(source).mode(mode).options(options).saveAsTable(tableName) - } - - /** - * (Scala-specific) - * Creates a table from the the contents of this DataFrame based on a given data source, - * [[SaveMode]] specified by mode, and a set of options. - * - * Note that this currently only works with DataFrames that are created from a HiveContext as - * there is no notion of a persisted catalog in a standard SQL context. Instead you can write - * an RDD out to a parquet file, and then register that file as a table. This "table" can then - * be the target of an `insertInto`. - * - * When the DataFrame is created from a non-partitioned [[HadoopFsRelation]] with a single input - * path, and the data source provider can be mapped to an existing Hive builtin SerDe (i.e. ORC - * and Parquet), the table is persisted in a Hive compatible format, which means other systems - * like Hive will be able to read this table. Otherwise, the table is persisted in a Spark SQL - * specific format. - * - * @group output - * @deprecated As of 1.4.0, replaced by - * `write().format(source).mode(mode).options(options).saveAsTable(tableName)`. - * This will be removed in Spark 2.0. - */ - @deprecated("Use write.format(source).mode(mode).options(options).saveAsTable(tableName). " + - "This will be removed in Spark 2.0.", "1.4.0") - def saveAsTable( - tableName: String, - source: String, - mode: SaveMode, - options: Map[String, String]): Unit = { - write.format(source).mode(mode).options(options).saveAsTable(tableName) - } - - /** - * Saves the contents of this DataFrame to the given path, - * using the default data source configured by spark.sql.sources.default and - * [[SaveMode.ErrorIfExists]] as the save mode. - * @group output - * @deprecated As of 1.4.0, replaced by `write().save(path)`. This will be removed in Spark 2.0. - */ - @deprecated("Use write.save(path). This will be removed in Spark 2.0.", "1.4.0") - def save(path: String): Unit = { - write.save(path) - } - - /** - * Saves the contents of this DataFrame to the given path and [[SaveMode]] specified by mode, - * using the default data source configured by spark.sql.sources.default. - * @group output - * @deprecated As of 1.4.0, replaced by `write().mode(mode).save(path)`. - * This will be removed in Spark 2.0. - */ - @deprecated("Use write.mode(mode).save(path). This will be removed in Spark 2.0.", "1.4.0") - def save(path: String, mode: SaveMode): Unit = { - write.mode(mode).save(path) - } - - /** - * Saves the contents of this DataFrame to the given path based on the given data source, - * using [[SaveMode.ErrorIfExists]] as the save mode. - * @group output - * @deprecated As of 1.4.0, replaced by `write().format(source).save(path)`. - * This will be removed in Spark 2.0. - */ - @deprecated("Use write.format(source).save(path). This will be removed in Spark 2.0.", "1.4.0") - def save(path: String, source: String): Unit = { - write.format(source).save(path) - } - - /** - * Saves the contents of this DataFrame to the given path based on the given data source and - * [[SaveMode]] specified by mode. - * @group output - * @deprecated As of 1.4.0, replaced by `write().format(source).mode(mode).save(path)`. - * This will be removed in Spark 2.0. - */ - @deprecated("Use write.format(source).mode(mode).save(path). " + - "This will be removed in Spark 2.0.", "1.4.0") - def save(path: String, source: String, mode: SaveMode): Unit = { - write.format(source).mode(mode).save(path) - } - - /** - * Saves the contents of this DataFrame based on the given data source, - * [[SaveMode]] specified by mode, and a set of options. - * @group output - * @deprecated As of 1.4.0, replaced by - * `write().format(source).mode(mode).options(options).save(path)`. - * This will be removed in Spark 2.0. - */ - @deprecated("Use write.format(source).mode(mode).options(options).save(). " + - "This will be removed in Spark 2.0.", "1.4.0") - def save( - source: String, - mode: SaveMode, - options: java.util.Map[String, String]): Unit = { - write.format(source).mode(mode).options(options).save() - } - - /** - * (Scala-specific) - * Saves the contents of this DataFrame based on the given data source, - * [[SaveMode]] specified by mode, and a set of options - * @group output - * @deprecated As of 1.4.0, replaced by - * `write().format(source).mode(mode).options(options).save(path)`. - * This will be removed in Spark 2.0. - */ - @deprecated("Use write.format(source).mode(mode).options(options).save(). " + - "This will be removed in Spark 2.0.", "1.4.0") - def save( - source: String, - mode: SaveMode, - options: Map[String, String]): Unit = { - write.format(source).mode(mode).options(options).save() - } - - /** - * Adds the rows from this RDD to the specified table, optionally overwriting the existing data. - * @group output - * @deprecated As of 1.4.0, replaced by - * `write().mode(SaveMode.Append|SaveMode.Overwrite).saveAsTable(tableName)`. - * This will be removed in Spark 2.0. - */ - @deprecated("Use write.mode(SaveMode.Append|SaveMode.Overwrite).saveAsTable(tableName). " + - "This will be removed in Spark 2.0.", "1.4.0") - def insertInto(tableName: String, overwrite: Boolean): Unit = { - write.mode(if (overwrite) SaveMode.Overwrite else SaveMode.Append).insertInto(tableName) - } - - /** - * Adds the rows from this RDD to the specified table. - * Throws an exception if the table already exists. - * @group output - * @deprecated As of 1.4.0, replaced by - * `write().mode(SaveMode.Append).saveAsTable(tableName)`. - * This will be removed in Spark 2.0. - */ - @deprecated("Use write.mode(SaveMode.Append).saveAsTable(tableName). " + - "This will be removed in Spark 2.0.", "1.4.0") - def insertInto(tableName: String): Unit = { - write.mode(SaveMode.Append).insertInto(tableName) - } - - //////////////////////////////////////////////////////////////////////////// - //////////////////////////////////////////////////////////////////////////// - // End of deprecated methods - //////////////////////////////////////////////////////////////////////////// - //////////////////////////////////////////////////////////////////////////// - /** * Wrap a DataFrame action to track all Spark jobs in the body so that we can connect them with * an execution. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala index 6debb302d9..d4df913e47 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala @@ -99,17 +99,6 @@ class DataFrameReader private[sql](sqlContext: SQLContext) extends Logging { } /** - * Loads input in as a [[DataFrame]], for data sources that require a path (e.g. data backed by - * a local or distributed file system). - * - * @since 1.4.0 - */ - // TODO: Remove this one in Spark 2.0. - def load(path: String): DataFrame = { - option("path", path).load() - } - - /** * Loads input in as a [[DataFrame]], for data sources that don't require a path (e.g. external * key-value stores). * @@ -126,6 +115,16 @@ class DataFrameReader private[sql](sqlContext: SQLContext) extends Logging { } /** + * Loads input in as a [[DataFrame]], for data sources that require a path (e.g. data backed by + * a local or distributed file system). + * + * @since 1.4.0 + */ + def load(path: String): DataFrame = { + option("path", path).load() + } + + /** * Loads input in as a [[DataFrame]], for data sources that support multiple paths. * Only works if the source is a HadoopFsRelationProvider. * diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala index 022303239f..3a875c4f9a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala @@ -888,9 +888,6 @@ class SQLContext private[sql]( }.toArray } - @deprecated("use org.apache.spark.sql.SparkPlanner", "1.6.0") - protected[sql] class SparkPlanner extends sparkexecution.SparkPlanner(this) - @transient protected[sql] val planner: sparkexecution.SparkPlanner = new sparkexecution.SparkPlanner(this) @@ -908,10 +905,6 @@ class SQLContext private[sql]( ) } - @deprecated("use org.apache.spark.sql.QueryExecution", "1.6.0") - protected[sql] class QueryExecution(logical: LogicalPlan) - extends sparkexecution.QueryExecution(this, logical) - /** * Parses the data type in our internal string representation. The data type string should * have the same format as the one generated by `toString` in scala. @@ -952,301 +945,6 @@ class SQLContext private[sql]( } } - //////////////////////////////////////////////////////////////////////////// - //////////////////////////////////////////////////////////////////////////// - // Deprecated methods - //////////////////////////////////////////////////////////////////////////// - //////////////////////////////////////////////////////////////////////////// - - /** - * @deprecated As of 1.3.0, replaced by `createDataFrame()`. This will be removed in Spark 2.0. - */ - @deprecated("Use createDataFrame. This will be removed in Spark 2.0.", "1.3.0") - def applySchema(rowRDD: RDD[Row], schema: StructType): DataFrame = { - createDataFrame(rowRDD, schema) - } - - /** - * @deprecated As of 1.3.0, replaced by `createDataFrame()`. This will be removed in Spark 2.0. - */ - @deprecated("Use createDataFrame. This will be removed in Spark 2.0.", "1.3.0") - def applySchema(rowRDD: JavaRDD[Row], schema: StructType): DataFrame = { - createDataFrame(rowRDD, schema) - } - - /** - * @deprecated As of 1.3.0, replaced by `createDataFrame()`. This will be removed in Spark 2.0. - */ - @deprecated("Use createDataFrame. This will be removed in Spark 2.0.", "1.3.0") - def applySchema(rdd: RDD[_], beanClass: Class[_]): DataFrame = { - createDataFrame(rdd, beanClass) - } - - /** - * @deprecated As of 1.3.0, replaced by `createDataFrame()`. This will be removed in Spark 2.0. - */ - @deprecated("Use createDataFrame. This will be removed in Spark 2.0.", "1.3.0") - def applySchema(rdd: JavaRDD[_], beanClass: Class[_]): DataFrame = { - createDataFrame(rdd, beanClass) - } - - /** - * Loads a Parquet file, returning the result as a [[DataFrame]]. This function returns an empty - * [[DataFrame]] if no paths are passed in. - * - * @group specificdata - * @deprecated As of 1.4.0, replaced by `read().parquet()`. This will be removed in Spark 2.0. - */ - @deprecated("Use read.parquet(). This will be removed in Spark 2.0.", "1.4.0") - @scala.annotation.varargs - def parquetFile(paths: String*): DataFrame = { - if (paths.isEmpty) { - emptyDataFrame - } else { - read.parquet(paths : _*) - } - } - - /** - * Loads a JSON file (one object per line), returning the result as a [[DataFrame]]. - * It goes through the entire dataset once to determine the schema. - * - * @group specificdata - * @deprecated As of 1.4.0, replaced by `read().json()`. This will be removed in Spark 2.0. - */ - @deprecated("Use read.json(). This will be removed in Spark 2.0.", "1.4.0") - def jsonFile(path: String): DataFrame = { - read.json(path) - } - - /** - * Loads a JSON file (one object per line) and applies the given schema, - * returning the result as a [[DataFrame]]. - * - * @group specificdata - * @deprecated As of 1.4.0, replaced by `read().json()`. This will be removed in Spark 2.0. - */ - @deprecated("Use read.json(). This will be removed in Spark 2.0.", "1.4.0") - def jsonFile(path: String, schema: StructType): DataFrame = { - read.schema(schema).json(path) - } - - /** - * @group specificdata - * @deprecated As of 1.4.0, replaced by `read().json()`. This will be removed in Spark 2.0. - */ - @deprecated("Use read.json(). This will be removed in Spark 2.0.", "1.4.0") - def jsonFile(path: String, samplingRatio: Double): DataFrame = { - read.option("samplingRatio", samplingRatio.toString).json(path) - } - - /** - * Loads an RDD[String] storing JSON objects (one object per record), returning the result as a - * [[DataFrame]]. - * It goes through the entire dataset once to determine the schema. - * - * @group specificdata - * @deprecated As of 1.4.0, replaced by `read().json()`. This will be removed in Spark 2.0. - */ - @deprecated("Use read.json(). This will be removed in Spark 2.0.", "1.4.0") - def jsonRDD(json: RDD[String]): DataFrame = read.json(json) - - /** - * Loads an RDD[String] storing JSON objects (one object per record), returning the result as a - * [[DataFrame]]. - * It goes through the entire dataset once to determine the schema. - * - * @group specificdata - * @deprecated As of 1.4.0, replaced by `read().json()`. This will be removed in Spark 2.0. - */ - @deprecated("Use read.json(). This will be removed in Spark 2.0.", "1.4.0") - def jsonRDD(json: JavaRDD[String]): DataFrame = read.json(json) - - /** - * Loads an RDD[String] storing JSON objects (one object per record) and applies the given schema, - * returning the result as a [[DataFrame]]. - * - * @group specificdata - * @deprecated As of 1.4.0, replaced by `read().json()`. This will be removed in Spark 2.0. - */ - @deprecated("Use read.json(). This will be removed in Spark 2.0.", "1.4.0") - def jsonRDD(json: RDD[String], schema: StructType): DataFrame = { - read.schema(schema).json(json) - } - - /** - * Loads an JavaRDD<String> storing JSON objects (one object per record) and applies the given - * schema, returning the result as a [[DataFrame]]. - * - * @group specificdata - * @deprecated As of 1.4.0, replaced by `read().json()`. This will be removed in Spark 2.0. - */ - @deprecated("Use read.json(). This will be removed in Spark 2.0.", "1.4.0") - def jsonRDD(json: JavaRDD[String], schema: StructType): DataFrame = { - read.schema(schema).json(json) - } - - /** - * Loads an RDD[String] storing JSON objects (one object per record) inferring the - * schema, returning the result as a [[DataFrame]]. - * - * @group specificdata - * @deprecated As of 1.4.0, replaced by `read().json()`. This will be removed in Spark 2.0. - */ - @deprecated("Use read.json(). This will be removed in Spark 2.0.", "1.4.0") - def jsonRDD(json: RDD[String], samplingRatio: Double): DataFrame = { - read.option("samplingRatio", samplingRatio.toString).json(json) - } - - /** - * Loads a JavaRDD[String] storing JSON objects (one object per record) inferring the - * schema, returning the result as a [[DataFrame]]. - * - * @group specificdata - * @deprecated As of 1.4.0, replaced by `read().json()`. This will be removed in Spark 2.0. - */ - @deprecated("Use read.json(). This will be removed in Spark 2.0.", "1.4.0") - def jsonRDD(json: JavaRDD[String], samplingRatio: Double): DataFrame = { - read.option("samplingRatio", samplingRatio.toString).json(json) - } - - /** - * Returns the dataset stored at path as a DataFrame, - * using the default data source configured by spark.sql.sources.default. - * - * @group genericdata - * @deprecated As of 1.4.0, replaced by `read().load(path)`. This will be removed in Spark 2.0. - */ - @deprecated("Use read.load(path). This will be removed in Spark 2.0.", "1.4.0") - def load(path: String): DataFrame = { - read.load(path) - } - - /** - * Returns the dataset stored at path as a DataFrame, using the given data source. - * - * @group genericdata - * @deprecated As of 1.4.0, replaced by `read().format(source).load(path)`. - * This will be removed in Spark 2.0. - */ - @deprecated("Use read.format(source).load(path). This will be removed in Spark 2.0.", "1.4.0") - def load(path: String, source: String): DataFrame = { - read.format(source).load(path) - } - - /** - * (Java-specific) Returns the dataset specified by the given data source and - * a set of options as a DataFrame. - * - * @group genericdata - * @deprecated As of 1.4.0, replaced by `read().format(source).options(options).load()`. - * This will be removed in Spark 2.0. - */ - @deprecated("Use read.format(source).options(options).load(). " + - "This will be removed in Spark 2.0.", "1.4.0") - def load(source: String, options: java.util.Map[String, String]): DataFrame = { - read.options(options).format(source).load() - } - - /** - * (Scala-specific) Returns the dataset specified by the given data source and - * a set of options as a DataFrame. - * - * @group genericdata - * @deprecated As of 1.4.0, replaced by `read().format(source).options(options).load()`. - */ - @deprecated("Use read.format(source).options(options).load(). " + - "This will be removed in Spark 2.0.", "1.4.0") - def load(source: String, options: Map[String, String]): DataFrame = { - read.options(options).format(source).load() - } - - /** - * (Java-specific) Returns the dataset specified by the given data source and - * a set of options as a DataFrame, using the given schema as the schema of the DataFrame. - * - * @group genericdata - * @deprecated As of 1.4.0, replaced by - * `read().format(source).schema(schema).options(options).load()`. - */ - @deprecated("Use read.format(source).schema(schema).options(options).load(). " + - "This will be removed in Spark 2.0.", "1.4.0") - def load(source: String, schema: StructType, options: java.util.Map[String, String]): DataFrame = - { - read.format(source).schema(schema).options(options).load() - } - - /** - * (Scala-specific) Returns the dataset specified by the given data source and - * a set of options as a DataFrame, using the given schema as the schema of the DataFrame. - * - * @group genericdata - * @deprecated As of 1.4.0, replaced by - * `read().format(source).schema(schema).options(options).load()`. - */ - @deprecated("Use read.format(source).schema(schema).options(options).load(). " + - "This will be removed in Spark 2.0.", "1.4.0") - def load(source: String, schema: StructType, options: Map[String, String]): DataFrame = { - read.format(source).schema(schema).options(options).load() - } - - /** - * Construct a [[DataFrame]] representing the database table accessible via JDBC URL - * url named table. - * - * @group specificdata - * @deprecated As of 1.4.0, replaced by `read().jdbc()`. This will be removed in Spark 2.0. - */ - @deprecated("Use read.jdbc(). This will be removed in Spark 2.0.", "1.4.0") - def jdbc(url: String, table: String): DataFrame = { - read.jdbc(url, table, new Properties) - } - - /** - * Construct a [[DataFrame]] representing the database table accessible via JDBC URL - * url named table. Partitions of the table will be retrieved in parallel based on the parameters - * passed to this function. - * - * @param columnName the name of a column of integral type that will be used for partitioning. - * @param lowerBound the minimum value of `columnName` used to decide partition stride - * @param upperBound the maximum value of `columnName` used to decide partition stride - * @param numPartitions the number of partitions. the range `minValue`-`maxValue` will be split - * evenly into this many partitions - * @group specificdata - * @deprecated As of 1.4.0, replaced by `read().jdbc()`. This will be removed in Spark 2.0. - */ - @deprecated("Use read.jdbc(). This will be removed in Spark 2.0.", "1.4.0") - def jdbc( - url: String, - table: String, - columnName: String, - lowerBound: Long, - upperBound: Long, - numPartitions: Int): DataFrame = { - read.jdbc(url, table, columnName, lowerBound, upperBound, numPartitions, new Properties) - } - - /** - * Construct a [[DataFrame]] representing the database table accessible via JDBC URL - * url named table. The theParts parameter gives a list expressions - * suitable for inclusion in WHERE clauses; each one defines one partition - * of the [[DataFrame]]. - * - * @group specificdata - * @deprecated As of 1.4.0, replaced by `read().jdbc()`. This will be removed in Spark 2.0. - */ - @deprecated("Use read.jdbc(). This will be removed in Spark 2.0.", "1.4.0") - def jdbc(url: String, table: String, theParts: Array[String]): DataFrame = { - read.jdbc(url, table, theParts, new Properties) - } - - //////////////////////////////////////////////////////////////////////////// - //////////////////////////////////////////////////////////////////////////// - // End of deprecated methods - //////////////////////////////////////////////////////////////////////////// - //////////////////////////////////////////////////////////////////////////// - - // Register a succesfully instantiatd context to the singleton. This should be at the end of // the class definition so that the singleton is updated only if there is no exception in the // construction of the instance. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRelation.scala index af964b4d35..8e1fe8090c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRelation.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRelation.scala @@ -44,6 +44,7 @@ import org.apache.spark.broadcast.Broadcast import org.apache.spark.rdd.{RDD, SqlNewHadoopPartition, SqlNewHadoopRDD} import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.util.LegacyTypeStringParser import org.apache.spark.sql.execution.datasources.PartitionSpec import org.apache.spark.sql.sources._ import org.apache.spark.sql.types.{DataType, StructType} @@ -638,7 +639,7 @@ private[sql] object ParquetRelation extends Logging { logInfo( s"Serialized Spark schema in Parquet key-value metadata is not in JSON format, " + "falling back to the deprecated DataType.fromCaseClassString parser.") - DataType.fromCaseClassString(serializedSchema.get) + LegacyTypeStringParser.parse(serializedSchema.get) } .recover { case cause: Throwable => logWarning( @@ -821,7 +822,7 @@ private[sql] object ParquetRelation extends Logging { logInfo( s"Serialized Spark schema in Parquet key-value metadata is not in JSON format, " + "falling back to the deprecated DataType.fromCaseClassString parser.") - DataType.fromCaseClassString(schemaString).asInstanceOf[StructType] + LegacyTypeStringParser.parse(schemaString).asInstanceOf[StructType] }.recoverWith { case cause: Throwable => logWarning( diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index 3572f3c3a1..2b3db398aa 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -559,13 +559,6 @@ object functions extends LegacyFunctions { ////////////////////////////////////////////////////////////////////////////////////////////// /** - * @group window_funcs - * @deprecated As of 1.6.0, replaced by `cume_dist`. This will be removed in Spark 2.0. - */ - @deprecated("Use cume_dist. This will be removed in Spark 2.0.", "1.6.0") - def cumeDist(): Column = cume_dist() - - /** * Window function: returns the cumulative distribution of values within a window partition, * i.e. the fraction of rows that are below the current row. * @@ -580,13 +573,6 @@ object functions extends LegacyFunctions { def cume_dist(): Column = withExpr { new CumeDist } /** - * @group window_funcs - * @deprecated As of 1.6.0, replaced by `dense_rank`. This will be removed in Spark 2.0. - */ - @deprecated("Use dense_rank. This will be removed in Spark 2.0.", "1.6.0") - def denseRank(): Column = dense_rank() - - /** * Window function: returns the rank of rows within a window partition, without any gaps. * * The difference between rank and denseRank is that denseRank leaves no gaps in ranking @@ -716,13 +702,6 @@ object functions extends LegacyFunctions { def ntile(n: Int): Column = withExpr { new NTile(Literal(n)) } /** - * @group window_funcs - * @deprecated As of 1.6.0, replaced by `percent_rank`. This will be removed in Spark 2.0. - */ - @deprecated("Use percent_rank. This will be removed in Spark 2.0.", "1.6.0") - def percentRank(): Column = percent_rank() - - /** * Window function: returns the relative rank (i.e. percentile) of rows within a window partition. * * This is computed by: @@ -753,13 +732,6 @@ object functions extends LegacyFunctions { def rank(): Column = withExpr { new Rank } /** - * @group window_funcs - * @deprecated As of 1.6.0, replaced by `row_number`. This will be removed in Spark 2.0. - */ - @deprecated("Use row_number. This will be removed in Spark 2.0.", "1.6.0") - def rowNumber(): Column = row_number() - - /** * Window function: returns a sequential number starting at 1 within a window partition. * * @group window_funcs @@ -828,13 +800,6 @@ object functions extends LegacyFunctions { def coalesce(e: Column*): Column = withExpr { Coalesce(e.map(_.expr)) } /** - * @group normal_funcs - * @deprecated As of 1.6.0, replaced by `input_file_name`. This will be removed in Spark 2.0. - */ - @deprecated("Use input_file_name. This will be removed in Spark 2.0.", "1.6.0") - def inputFileName(): Column = input_file_name() - - /** * Creates a string column for the file name of the current Spark task. * * @group normal_funcs @@ -843,13 +808,6 @@ object functions extends LegacyFunctions { def input_file_name(): Column = withExpr { InputFileName() } /** - * @group normal_funcs - * @deprecated As of 1.6.0, replaced by `isnan`. This will be removed in Spark 2.0. - */ - @deprecated("Use isnan. This will be removed in Spark 2.0.", "1.6.0") - def isNaN(e: Column): Column = isnan(e) - - /** * Return true iff the column is NaN. * * @group normal_funcs @@ -973,14 +931,6 @@ object functions extends LegacyFunctions { def randn(): Column = randn(Utils.random.nextLong) /** - * @group normal_funcs - * @since 1.4.0 - * @deprecated As of 1.6.0, replaced by `spark_partition_id`. This will be removed in Spark 2.0. - */ - @deprecated("Use cume_dist. This will be removed in Spark 2.0.", "1.6.0") - def sparkPartitionId(): Column = spark_partition_id() - - /** * Partition ID of the Spark task. * * Note that this is indeterministic because it depends on data partitioning and task scheduling. @@ -2534,24 +2484,6 @@ object functions extends LegacyFunctions { }""") } - (0 to 10).map { x => - val args = (1 to x).map(i => s"arg$i: Column").mkString(", ") - val fTypes = Seq.fill(x + 1)("_").mkString(", ") - val argsInUDF = (1 to x).map(i => s"arg$i.expr").mkString(", ") - println(s""" - /** - * Call a Scala function of ${x} arguments as user-defined function (UDF). This requires - * you to specify the return data type. - * - * @group udf_funcs - * @since 1.3.0 - * @deprecated As of 1.5.0, since it's redundant with udf() - */ - @deprecated("Use udf", "1.5.0") - def callUDF(f: Function$x[$fTypes], returnType: DataType${if (args.length > 0) ", " + args else ""}): Column = withExpr { - ScalaUDF(f, returnType, Option(Seq($argsInUDF))) - }""") - } */ /** * Defines a user-defined function of 0 arguments as user-defined function (UDF). @@ -2685,161 +2617,6 @@ object functions extends LegacyFunctions { UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType, inputTypes) } - ////////////////////////////////////////////////////////////////////////////////////////////////// - /** - * Call a Scala function of 0 arguments as user-defined function (UDF). This requires - * you to specify the return data type. - * - * @group udf_funcs - * @since 1.3.0 - * @deprecated As of 1.5.0, since it's redundant with udf() - * This will be removed in Spark 2.0. - */ - @deprecated("Use udf. This will be removed in Spark 2.0.", "1.5.0") - def callUDF(f: Function0[_], returnType: DataType): Column = withExpr { - ScalaUDF(f, returnType, Seq()) - } - - /** - * Call a Scala function of 1 arguments as user-defined function (UDF). This requires - * you to specify the return data type. - * - * @group udf_funcs - * @since 1.3.0 - * @deprecated As of 1.5.0, since it's redundant with udf() - * This will be removed in Spark 2.0. - */ - @deprecated("Use udf. This will be removed in Spark 2.0.", "1.5.0") - def callUDF(f: Function1[_, _], returnType: DataType, arg1: Column): Column = withExpr { - ScalaUDF(f, returnType, Seq(arg1.expr)) - } - - /** - * Call a Scala function of 2 arguments as user-defined function (UDF). This requires - * you to specify the return data type. - * - * @group udf_funcs - * @since 1.3.0 - * @deprecated As of 1.5.0, since it's redundant with udf() - * This will be removed in Spark 2.0. - */ - @deprecated("Use udf. This will be removed in Spark 2.0.", "1.5.0") - def callUDF(f: Function2[_, _, _], returnType: DataType, arg1: Column, arg2: Column): Column = withExpr { - ScalaUDF(f, returnType, Seq(arg1.expr, arg2.expr)) - } - - /** - * Call a Scala function of 3 arguments as user-defined function (UDF). This requires - * you to specify the return data type. - * - * @group udf_funcs - * @since 1.3.0 - * @deprecated As of 1.5.0, since it's redundant with udf() - * This will be removed in Spark 2.0. - */ - @deprecated("Use udf. This will be removed in Spark 2.0.", "1.5.0") - def callUDF(f: Function3[_, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column): Column = withExpr { - ScalaUDF(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr)) - } - - /** - * Call a Scala function of 4 arguments as user-defined function (UDF). This requires - * you to specify the return data type. - * - * @group udf_funcs - * @since 1.3.0 - * @deprecated As of 1.5.0, since it's redundant with udf() - * This will be removed in Spark 2.0. - */ - @deprecated("Use udf. This will be removed in Spark 2.0.", "1.5.0") - def callUDF(f: Function4[_, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column): Column = withExpr { - ScalaUDF(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr)) - } - - /** - * Call a Scala function of 5 arguments as user-defined function (UDF). This requires - * you to specify the return data type. - * - * @group udf_funcs - * @since 1.3.0 - * @deprecated As of 1.5.0, since it's redundant with udf() - * This will be removed in Spark 2.0. - */ - @deprecated("Use udf. This will be removed in Spark 2.0.", "1.5.0") - def callUDF(f: Function5[_, _, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column, arg5: Column): Column = withExpr { - ScalaUDF(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr, arg5.expr)) - } - - /** - * Call a Scala function of 6 arguments as user-defined function (UDF). This requires - * you to specify the return data type. - * - * @group udf_funcs - * @since 1.3.0 - * @deprecated As of 1.5.0, since it's redundant with udf() - * This will be removed in Spark 2.0. - */ - @deprecated("Use udf. This will be removed in Spark 2.0.", "1.5.0") - def callUDF(f: Function6[_, _, _, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column, arg5: Column, arg6: Column): Column = withExpr { - ScalaUDF(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr, arg5.expr, arg6.expr)) - } - - /** - * Call a Scala function of 7 arguments as user-defined function (UDF). This requires - * you to specify the return data type. - * - * @group udf_funcs - * @since 1.3.0 - * @deprecated As of 1.5.0, since it's redundant with udf() - * This will be removed in Spark 2.0. - */ - @deprecated("Use udf. This will be removed in Spark 2.0.", "1.5.0") - def callUDF(f: Function7[_, _, _, _, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column, arg5: Column, arg6: Column, arg7: Column): Column = withExpr { - ScalaUDF(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr, arg5.expr, arg6.expr, arg7.expr)) - } - - /** - * Call a Scala function of 8 arguments as user-defined function (UDF). This requires - * you to specify the return data type. - * - * @group udf_funcs - * @since 1.3.0 - * @deprecated As of 1.5.0, since it's redundant with udf() - * This will be removed in Spark 2.0. - */ - @deprecated("Use udf. This will be removed in Spark 2.0.", "1.5.0") - def callUDF(f: Function8[_, _, _, _, _, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column, arg5: Column, arg6: Column, arg7: Column, arg8: Column): Column = withExpr { - ScalaUDF(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr, arg5.expr, arg6.expr, arg7.expr, arg8.expr)) - } - - /** - * Call a Scala function of 9 arguments as user-defined function (UDF). This requires - * you to specify the return data type. - * - * @group udf_funcs - * @since 1.3.0 - * @deprecated As of 1.5.0, since it's redundant with udf(). - * This will be removed in Spark 2.0. - */ - @deprecated("Use udf. This will be removed in Spark 2.0.", "1.5.0") - def callUDF(f: Function9[_, _, _, _, _, _, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column, arg5: Column, arg6: Column, arg7: Column, arg8: Column, arg9: Column): Column = withExpr { - ScalaUDF(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr, arg5.expr, arg6.expr, arg7.expr, arg8.expr, arg9.expr)) - } - - /** - * Call a Scala function of 10 arguments as user-defined function (UDF). This requires - * you to specify the return data type. - * - * @group udf_funcs - * @since 1.3.0 - * @deprecated As of 1.5.0, since it's redundant with udf(). - * This will be removed in Spark 2.0. - */ - @deprecated("Use udf. This will be removed in Spark 2.0.", "1.5.0") - def callUDF(f: Function10[_, _, _, _, _, _, _, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column, arg5: Column, arg6: Column, arg7: Column, arg8: Column, arg9: Column, arg10: Column): Column = withExpr { - ScalaUDF(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr, arg5.expr, arg6.expr, arg7.expr, arg8.expr, arg9.expr, arg10.expr)) - } - // scalastyle:on parameter.number // scalastyle:on line.size.limit @@ -2877,33 +2654,4 @@ object functions extends LegacyFunctions { UnresolvedFunction(udfName, cols.map(_.expr), isDistinct = false) } - /** - * Call an user-defined function. - * Example: - * {{{ - * import org.apache.spark.sql._ - * - * val df = Seq(("id1", 1), ("id2", 4), ("id3", 5)).toDF("id", "value") - * val sqlContext = df.sqlContext - * sqlContext.udf.register("simpleUDF", (v: Int) => v * v) - * df.select($"id", callUdf("simpleUDF", $"value")) - * }}} - * - * @group udf_funcs - * @since 1.4.0 - * @deprecated As of 1.5.0, since it was not coherent to have two functions callUdf and callUDF. - * This will be removed in Spark 2.0. - */ - @deprecated("Use callUDF. This will be removed in Spark 2.0.", "1.5.0") - def callUdf(udfName: String, cols: Column*): Column = withExpr { - // Note: we avoid using closures here because on file systems that are case-insensitive, the - // compiled class file for the closure here will conflict with the one in callUDF (upper case). - val exprs = new Array[Expression](cols.size) - var i = 0 - while (i < cols.size) { - exprs(i) = cols(i).expr - i += 1 - } - UnresolvedFunction(udfName, exprs, isDistinct = false) - } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/package.scala b/sql/core/src/main/scala/org/apache/spark/sql/package.scala index a9c600b139..bd73a36fd4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/package.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/package.scala @@ -42,10 +42,4 @@ package object sql { @DeveloperApi type Strategy = org.apache.spark.sql.catalyst.planning.GenericStrategy[SparkPlan] - /** - * Type alias for [[DataFrame]]. Kept here for backward source compatibility for Scala. - * @deprecated As of 1.3.0, replaced by `DataFrame`. - */ - @deprecated("use DataFrame", "1.3.0") - type SchemaRDD = DataFrame } diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaApplySchemaSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaApplySchemaSuite.java index 7b50aad4ad..640efcc737 100644 --- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaApplySchemaSuite.java +++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaApplySchemaSuite.java @@ -107,7 +107,7 @@ public class JavaApplySchemaSuite implements Serializable { fields.add(DataTypes.createStructField("age", DataTypes.IntegerType, false)); StructType schema = DataTypes.createStructType(fields); - DataFrame df = sqlContext.applySchema(rowRDD, schema); + DataFrame df = sqlContext.createDataFrame(rowRDD, schema); df.registerTempTable("people"); Row[] actual = sqlContext.sql("SELECT * FROM people").collect(); @@ -143,7 +143,7 @@ public class JavaApplySchemaSuite implements Serializable { fields.add(DataTypes.createStructField("age", DataTypes.IntegerType, false)); StructType schema = DataTypes.createStructType(fields); - DataFrame df = sqlContext.applySchema(rowRDD, schema); + DataFrame df = sqlContext.createDataFrame(rowRDD, schema); df.registerTempTable("people"); List<String> actual = sqlContext.sql("SELECT * FROM people").toJavaRDD().map(new Function<Row, String>() { @Override diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala index 38c0eb589f..53a9788024 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala @@ -298,7 +298,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSQLContext { Row(true, true) :: Row(true, true) :: Row(false, false) :: Row(false, false) :: Nil) checkAnswer( - testData.select(isNaN($"a"), isNaN($"b")), + testData.select(isnan($"a"), isnan($"b")), Row(true, true) :: Row(true, true) :: Row(false, false) :: Row(false, false) :: Nil) checkAnswer( @@ -586,7 +586,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSQLContext { Iterator(Tuple1(1), Tuple1(2)) }.toDF("a") checkAnswer( - df.select(sparkPartitionId()), + df.select(spark_partition_id()), Row(0) :: Row(0) :: Row(1) :: Row(1) :: Nil ) } @@ -595,11 +595,11 @@ class ColumnExpressionSuite extends QueryTest with SharedSQLContext { withTempPath { dir => val data = sparkContext.parallelize(0 to 10).toDF("id") data.write.parquet(dir.getCanonicalPath) - val answer = sqlContext.read.parquet(dir.getCanonicalPath).select(inputFileName()) + val answer = sqlContext.read.parquet(dir.getCanonicalPath).select(input_file_name()) .head.getString(0) assert(answer.contains(dir.getCanonicalPath)) - checkAnswer(data.select(inputFileName()).limit(1), Row("")) + checkAnswer(data.select(input_file_name()).limit(1), Row("")) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index ab02b32f91..e8fa663363 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -341,15 +341,6 @@ class DataFrameSuite extends QueryTest with SharedSQLContext { ) } - test("deprecated callUdf in SQLContext") { - val df = Seq(("id1", 1), ("id2", 4), ("id3", 5)).toDF("id", "value") - val sqlctx = df.sqlContext - sqlctx.udf.register("simpleUdf", (v: Int) => v * v) - checkAnswer( - df.select($"id", callUdf("simpleUdf", $"value")), - Row("id1", 1) :: Row("id2", 16) :: Row("id3", 25) :: Nil) - } - test("callUDF in SQLContext") { val df = Seq(("id1", 1), ("id2", 4), ("id3", 5)).toDF("id", "value") val sqlctx = df.sqlContext |