aboutsummaryrefslogtreecommitdiff
path: root/sql/core
diff options
context:
space:
mode:
authorWenchen Fan <cloud0fan@outlook.com>2015-08-14 12:00:01 -0700
committerReynold Xin <rxin@databricks.com>2015-08-14 12:00:01 -0700
commit34d610be854d2a975d9c1e232d87433b85add6fd (patch)
tree5bbf9882166496d4fe16cf8f6af4087c9e150f4b /sql/core
parenta7317ccdc20d001e5b7f5277b0535923468bfbc6 (diff)
downloadspark-34d610be854d2a975d9c1e232d87433b85add6fd.tar.gz
spark-34d610be854d2a975d9c1e232d87433b85add6fd.tar.bz2
spark-34d610be854d2a975d9c1e232d87433b85add6fd.zip
[SPARK-9929] [SQL] support metadata in withColumn
in MLlib sometimes we need to set metadata for the new column, thus we will alias the new column with metadata before call `withColumn` and in `withColumn` we alias this clolumn again. Here I overloaded `withColumn` to allow user set metadata, just like what we did for `Column.as`. Author: Wenchen Fan <cloud0fan@outlook.com> Closes #8159 from cloud-fan/withColumn.
Diffstat (limited to 'sql/core')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala17
1 files changed, 17 insertions, 0 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
index c466d9e6cb..cf75e64e88 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
@@ -1150,6 +1150,23 @@ class DataFrame private[sql](
}
/**
+ * Returns a new [[DataFrame]] by adding a column with metadata.
+ */
+ private[spark] def withColumn(colName: String, col: Column, metadata: Metadata): DataFrame = {
+ val resolver = sqlContext.analyzer.resolver
+ val replaced = schema.exists(f => resolver(f.name, colName))
+ if (replaced) {
+ val colNames = schema.map { field =>
+ val name = field.name
+ if (resolver(name, colName)) col.as(colName, metadata) else Column(name)
+ }
+ select(colNames : _*)
+ } else {
+ select(Column("*"), col.as(colName, metadata))
+ }
+ }
+
+ /**
* Returns a new [[DataFrame]] with a column renamed.
* This is a no-op if schema doesn't contain existingName.
* @group dfops