aboutsummaryrefslogtreecommitdiff
path: root/sql/core
diff options
context:
space:
mode:
Diffstat (limited to 'sql/core')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/functions.scala11
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala40
2 files changed, 42 insertions, 9 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index a5b6828685..4ee1fb8374 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -739,17 +739,18 @@ object functions {
def sqrt(colName: String): Column = sqrt(Column(colName))
/**
- * Creates a new struct column. The input column must be a column in a [[DataFrame]], or
- * a derived column expression that is named (i.e. aliased).
+ * Creates a new struct column.
+ * If the input column is a column in a [[DataFrame]], or a derived column expression
+ * that is named (i.e. aliased), its name would be remained as the StructField's name,
+ * otherwise, the newly generated StructField's name would be auto generated as col${index + 1},
+ * i.e. col1, col2, col3, ...
*
* @group normal_funcs
* @since 1.4.0
*/
@scala.annotation.varargs
def struct(cols: Column*): Column = {
- require(cols.forall(_.expr.isInstanceOf[NamedExpression]),
- s"struct input columns must all be named or aliased ($cols)")
- CreateStruct(cols.map(_.expr.asInstanceOf[NamedExpression]))
+ CreateStruct(cols.map(_.expr))
}
/**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
index 7ae89bcb1b..0d43aca877 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
@@ -79,10 +79,42 @@ class DataFrameFunctionsSuite extends QueryTest {
assert(row.getAs[Row](0) === Row(2, "str"))
}
- test("struct: must use named column expression") {
- intercept[IllegalArgumentException] {
- struct(col("a") * 2)
- }
+ test("struct with column expression to be automatically named") {
+ val df = Seq((1, "str")).toDF("a", "b")
+ val result = df.select(struct((col("a") * 2), col("b")))
+
+ val expectedType = StructType(Seq(
+ StructField("col1", IntegerType, nullable = false),
+ StructField("b", StringType)
+ ))
+ assert(result.first.schema(0).dataType === expectedType)
+ checkAnswer(result, Row(Row(2, "str")))
+ }
+
+ test("struct with literal columns") {
+ val df = Seq((1, "str1"), (2, "str2")).toDF("a", "b")
+ val result = df.select(struct((col("a") * 2), lit(5.0)))
+
+ val expectedType = StructType(Seq(
+ StructField("col1", IntegerType, nullable = false),
+ StructField("col2", DoubleType, nullable = false)
+ ))
+
+ assert(result.first.schema(0).dataType === expectedType)
+ checkAnswer(result, Seq(Row(Row(2, 5.0)), Row(Row(4, 5.0))))
+ }
+
+ test("struct with all literal columns") {
+ val df = Seq((1, "str1"), (2, "str2")).toDF("a", "b")
+ val result = df.select(struct(lit("v"), lit(5.0)))
+
+ val expectedType = StructType(Seq(
+ StructField("col1", StringType, nullable = false),
+ StructField("col2", DoubleType, nullable = false)
+ ))
+
+ assert(result.first.schema(0).dataType === expectedType)
+ checkAnswer(result, Seq(Row(Row("v", 5.0)), Row(Row("v", 5.0))))
}
test("constant functions") {