aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala11
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala12
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala10
-rw-r--r--sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala1
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala2
-rw-r--r--sql/hive/src/test/resources/golden/SPARK-9034 Reflect field names defined in GenericUDTF #1-0-ff502d8c06f4b32f57aa45057b7fab0e1
-rw-r--r--sql/hive/src/test/resources/golden/SPARK-9034 Reflect field names defined in GenericUDTF #2-0-d6d0def30a7fad5f90fd835361820c301
-rw-r--r--sql/hive/src/test/resources/golden/lateral_view_noalias-0-50131c0ba7b7a6b65c789a5a8497bada1
-rw-r--r--sql/hive/src/test/resources/golden/lateral_view_noalias-1-72509f06e1f7c5d5ccc292f775f8eea70
-rw-r--r--sql/hive/src/test/resources/golden/lateral_view_noalias-2-6d5806dd1d2511911a5de1e205523f422
-rw-r--r--sql/hive/src/test/resources/golden/lateral_view_noalias-3-155b3cc2f5054725a9c2acca3c38c00a0
-rw-r--r--sql/hive/src/test/resources/golden/lateral_view_noalias-4-3b7045ace234af8e5e86d8ac23ccee562
-rw-r--r--sql/hive/src/test/resources/golden/lateral_view_noalias-5-e1eca4e08216897d090259d4fd1e3fe0
-rw-r--r--sql/hive/src/test/resources/golden/lateral_view_noalias-6-16d227442dd775615c6ecfceedc6c6120
-rw-r--r--sql/hive/src/test/resources/golden/lateral_view_noalias-7-66cb5ab20690dd85b2ed95bbfb9481d32
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala6
16 files changed, 34 insertions, 17 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 912c967b95..899ee67352 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -147,7 +147,7 @@ class Analyzer(
case u @ UnresolvedAlias(child) => child match {
case ne: NamedExpression => ne
case e if !e.resolved => u
- case g: Generator if g.elementTypes.size > 1 => MultiAlias(g, Nil)
+ case g: Generator => MultiAlias(g, Nil)
case c @ Cast(ne: NamedExpression, _) => Alias(c, ne.name)()
case other => Alias(other, s"_c$i")()
}
@@ -722,7 +722,7 @@ class Analyzer(
/**
* Construct the output attributes for a [[Generator]], given a list of names. If the list of
- * names is empty names are assigned by ordinal (i.e., _c0, _c1, ...) to match Hive's defaults.
+ * names is empty names are assigned from field names in generator.
*/
private def makeGeneratorOutput(
generator: Generator,
@@ -731,13 +731,12 @@ class Analyzer(
if (names.length == elementTypes.length) {
names.zip(elementTypes).map {
- case (name, (t, nullable)) =>
+ case (name, (t, nullable, _)) =>
AttributeReference(name, t, nullable)()
}
} else if (names.isEmpty) {
- elementTypes.zipWithIndex.map {
- // keep the default column names as Hive does _c0, _c1, _cN
- case ((t, nullable), i) => AttributeReference(s"_c$i", t, nullable)()
+ elementTypes.map {
+ case (t, nullable, name) => AttributeReference(name, t, nullable)()
}
} else {
failAnalysis(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
index 1a2092c909..894a0730d1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
@@ -53,7 +53,7 @@ trait Generator extends Expression {
* The output element data types in structure of Seq[(DataType, Nullable)]
* TODO we probably need to add more information like metadata etc.
*/
- def elementTypes: Seq[(DataType, Boolean)]
+ def elementTypes: Seq[(DataType, Boolean, String)]
/** Should be implemented by child classes to perform specific Generators. */
override def eval(input: InternalRow): TraversableOnce[InternalRow]
@@ -69,7 +69,7 @@ trait Generator extends Expression {
* A generator that produces its output using the provided lambda function.
*/
case class UserDefinedGenerator(
- elementTypes: Seq[(DataType, Boolean)],
+ elementTypes: Seq[(DataType, Boolean, String)],
function: Row => TraversableOnce[InternalRow],
children: Seq[Expression])
extends Generator with CodegenFallback {
@@ -112,9 +112,11 @@ case class Explode(child: Expression) extends UnaryExpression with Generator wit
}
}
- override def elementTypes: Seq[(DataType, Boolean)] = child.dataType match {
- case ArrayType(et, containsNull) => (et, containsNull) :: Nil
- case MapType(kt, vt, valueContainsNull) => (kt, false) :: (vt, valueContainsNull) :: Nil
+ // hive-compatible default alias for explode function ("col" for array, "key", "value" for map)
+ override def elementTypes: Seq[(DataType, Boolean, String)] = child.dataType match {
+ case ArrayType(et, containsNull) => (et, containsNull, "col") :: Nil
+ case MapType(kt, vt, valueContainsNull) =>
+ (kt, false, "key") :: (vt, valueContainsNull, "value") :: Nil
}
override def eval(input: InternalRow): TraversableOnce[InternalRow] = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
index 53ad3c0266..fc0ab632f9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
@@ -1175,7 +1175,8 @@ class DataFrame private[sql](
def explode[A <: Product : TypeTag](input: Column*)(f: Row => TraversableOnce[A]): DataFrame = {
val schema = ScalaReflection.schemaFor[A].dataType.asInstanceOf[StructType]
- val elementTypes = schema.toAttributes.map { attr => (attr.dataType, attr.nullable) }
+ val elementTypes = schema.toAttributes.map {
+ attr => (attr.dataType, attr.nullable, attr.name) }
val names = schema.toAttributes.map(_.name)
val convert = CatalystTypeConverters.createToCatalystConverter(schema)
@@ -1184,7 +1185,7 @@ class DataFrame private[sql](
val generator = UserDefinedGenerator(elementTypes, rowFunction, input.map(_.expr))
Generate(generator, join = true, outer = false,
- qualifier = None, names.map(UnresolvedAttribute(_)), logicalPlan)
+ qualifier = None, generatorOutput = Nil, logicalPlan)
}
/**
@@ -1203,8 +1204,7 @@ class DataFrame private[sql](
val dataType = ScalaReflection.schemaFor[B].dataType
val attributes = AttributeReference(outputColumn, dataType)() :: Nil
// TODO handle the metadata?
- val elementTypes = attributes.map { attr => (attr.dataType, attr.nullable) }
- val names = attributes.map(_.name)
+ val elementTypes = attributes.map { attr => (attr.dataType, attr.nullable, attr.name) }
def rowFunction(row: Row): TraversableOnce[InternalRow] = {
val convert = CatalystTypeConverters.createToCatalystConverter(dataType)
@@ -1213,7 +1213,7 @@ class DataFrame private[sql](
val generator = UserDefinedGenerator(elementTypes, rowFunction, apply(inputColumn).expr :: Nil)
Generate(generator, join = true, outer = false,
- qualifier = None, names.map(UnresolvedAttribute(_)), logicalPlan)
+ qualifier = None, generatorOutput = Nil, logicalPlan)
}
/////////////////////////////////////////////////////////////////////////////
diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
index 6ed40b0397..2d0d7b8af3 100644
--- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
+++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
@@ -661,6 +661,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"join_star",
"lateral_view",
"lateral_view_cp",
+ "lateral_view_noalias",
"lateral_view_ppd",
"leftsemijoin",
"leftsemijoin_mr",
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
index 0b5e863506..a9db70119d 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
@@ -511,7 +511,7 @@ private[hive] case class HiveGenericUDTF(
protected lazy val collector = new UDTFCollector
override lazy val elementTypes = outputInspector.getAllStructFieldRefs.asScala.map {
- field => (inspectorToDataType(field.getFieldObjectInspector), true)
+ field => (inspectorToDataType(field.getFieldObjectInspector), true, field.getFieldName)
}
@transient
diff --git a/sql/hive/src/test/resources/golden/SPARK-9034 Reflect field names defined in GenericUDTF #1-0-ff502d8c06f4b32f57aa45057b7fab0e b/sql/hive/src/test/resources/golden/SPARK-9034 Reflect field names defined in GenericUDTF #1-0-ff502d8c06f4b32f57aa45057b7fab0e
new file mode 100644
index 0000000000..1cf253f92c
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/SPARK-9034 Reflect field names defined in GenericUDTF #1-0-ff502d8c06f4b32f57aa45057b7fab0e
@@ -0,0 +1 @@
+238
diff --git a/sql/hive/src/test/resources/golden/SPARK-9034 Reflect field names defined in GenericUDTF #2-0-d6d0def30a7fad5f90fd835361820c30 b/sql/hive/src/test/resources/golden/SPARK-9034 Reflect field names defined in GenericUDTF #2-0-d6d0def30a7fad5f90fd835361820c30
new file mode 100644
index 0000000000..60878ffb77
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/SPARK-9034 Reflect field names defined in GenericUDTF #2-0-d6d0def30a7fad5f90fd835361820c30
@@ -0,0 +1 @@
+238 val_238
diff --git a/sql/hive/src/test/resources/golden/lateral_view_noalias-0-50131c0ba7b7a6b65c789a5a8497bada b/sql/hive/src/test/resources/golden/lateral_view_noalias-0-50131c0ba7b7a6b65c789a5a8497bada
new file mode 100644
index 0000000000..573541ac97
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/lateral_view_noalias-0-50131c0ba7b7a6b65c789a5a8497bada
@@ -0,0 +1 @@
+0
diff --git a/sql/hive/src/test/resources/golden/lateral_view_noalias-1-72509f06e1f7c5d5ccc292f775f8eea7 b/sql/hive/src/test/resources/golden/lateral_view_noalias-1-72509f06e1f7c5d5ccc292f775f8eea7
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/lateral_view_noalias-1-72509f06e1f7c5d5ccc292f775f8eea7
diff --git a/sql/hive/src/test/resources/golden/lateral_view_noalias-2-6d5806dd1d2511911a5de1e205523f42 b/sql/hive/src/test/resources/golden/lateral_view_noalias-2-6d5806dd1d2511911a5de1e205523f42
new file mode 100644
index 0000000000..0da0d93886
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/lateral_view_noalias-2-6d5806dd1d2511911a5de1e205523f42
@@ -0,0 +1,2 @@
+key1 100
+key2 200
diff --git a/sql/hive/src/test/resources/golden/lateral_view_noalias-3-155b3cc2f5054725a9c2acca3c38c00a b/sql/hive/src/test/resources/golden/lateral_view_noalias-3-155b3cc2f5054725a9c2acca3c38c00a
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/lateral_view_noalias-3-155b3cc2f5054725a9c2acca3c38c00a
diff --git a/sql/hive/src/test/resources/golden/lateral_view_noalias-4-3b7045ace234af8e5e86d8ac23ccee56 b/sql/hive/src/test/resources/golden/lateral_view_noalias-4-3b7045ace234af8e5e86d8ac23ccee56
new file mode 100644
index 0000000000..0da0d93886
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/lateral_view_noalias-4-3b7045ace234af8e5e86d8ac23ccee56
@@ -0,0 +1,2 @@
+key1 100
+key2 200
diff --git a/sql/hive/src/test/resources/golden/lateral_view_noalias-5-e1eca4e08216897d090259d4fd1e3fe b/sql/hive/src/test/resources/golden/lateral_view_noalias-5-e1eca4e08216897d090259d4fd1e3fe
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/lateral_view_noalias-5-e1eca4e08216897d090259d4fd1e3fe
diff --git a/sql/hive/src/test/resources/golden/lateral_view_noalias-6-16d227442dd775615c6ecfceedc6c612 b/sql/hive/src/test/resources/golden/lateral_view_noalias-6-16d227442dd775615c6ecfceedc6c612
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/lateral_view_noalias-6-16d227442dd775615c6ecfceedc6c612
diff --git a/sql/hive/src/test/resources/golden/lateral_view_noalias-7-66cb5ab20690dd85b2ed95bbfb9481d3 b/sql/hive/src/test/resources/golden/lateral_view_noalias-7-66cb5ab20690dd85b2ed95bbfb9481d3
new file mode 100644
index 0000000000..4ba46bbda5
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/lateral_view_noalias-7-66cb5ab20690dd85b2ed95bbfb9481d3
@@ -0,0 +1,2 @@
+key1 100 key1 100
+key2 200 key2 200
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index e597d6865f..fc72e3c7dc 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -563,6 +563,12 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter {
createQueryTest("Specify the udtf output",
"SELECT d FROM (SELECT explode(array(1,1)) d FROM src LIMIT 1) t")
+ createQueryTest("SPARK-9034 Reflect field names defined in GenericUDTF #1",
+ "SELECT col FROM (SELECT explode(array(key,value)) FROM src LIMIT 1) t")
+
+ createQueryTest("SPARK-9034 Reflect field names defined in GenericUDTF #2",
+ "SELECT key,value FROM (SELECT explode(map(key,value)) FROM src LIMIT 1) t")
+
test("sampling") {
sql("SELECT * FROM src TABLESAMPLE(0.1 PERCENT) s")
sql("SELECT * FROM src TABLESAMPLE(100 PERCENT) s")