aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Armbrust <michael@databricks.com>2014-05-01 21:32:43 -0700
committerReynold Xin <rxin@apache.org>2014-05-01 21:32:43 -0700
commita43d9c14f2083d6632f410d74db98476e0e4d986 (patch)
tree45eab35f1f430989606038ab4aaa189c6942b258
parent98b65593bdcfea54010f8c0fdf2a2b840f18c283 (diff)
downloadspark-a43d9c14f2083d6632f410d74db98476e0e4d986.tar.gz
spark-a43d9c14f2083d6632f410d74db98476e0e4d986.tar.bz2
spark-a43d9c14f2083d6632f410d74db98476e0e4d986.zip
[SQL] SPARK-1661 - Fix regex_serde test
The JIRA in question is actually reporting a bug with Shark, but I wanted to make sure Spark SQL did not have similar problems. This fixes a bug in our parsing code that was preventing the test from executing, but it looks like the RegexSerDe is working in Spark SQL. Author: Michael Armbrust <michael@databricks.com> Closes #595 from marmbrus/fixRegexSerdeTest and squashes the following commits: a4dc612 [Michael Armbrust] Add files created by hive to gitignore. efa6402 [Michael Armbrust] Fix Hive serde_regex test.
-rw-r--r--.gitignore5
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala6
-rw-r--r--sql/hive/src/test/resources/golden/serde_regex-0-60462d14b99bb445b02800e9fb22760e22
-rw-r--r--sql/hive/src/test/resources/golden/serde_regex-1-dea03bd88cbaabcf438b398e23c139f60
-rw-r--r--sql/hive/src/test/resources/golden/serde_regex-10-c5b3ec90419a40660e5f83736241c42938
-rw-r--r--sql/hive/src/test/resources/golden/serde_regex-11-b00710a6f7a3e5b92c2e17da54cd9ad10
-rw-r--r--sql/hive/src/test/resources/golden/serde_regex-2-9d00484beaee46cf72b154a1351aeee90
-rw-r--r--sql/hive/src/test/resources/golden/serde_regex-3-817190d8871b70611483cd2abe2e55dc0
-rw-r--r--sql/hive/src/test/resources/golden/serde_regex-4-c3e345183543f40a14d2dd742ebd53462
-rw-r--r--sql/hive/src/test/resources/golden/serde_regex-5-e2d14a76a87a6014c6d2d3501a0c91292
-rw-r--r--sql/hive/src/test/resources/golden/serde_regex-6-f818821654f219d1f4e2482951fae4f10
-rw-r--r--sql/hive/src/test/resources/golden/serde_regex-7-4db287576a17c0963219ca63ee0b20e022
-rw-r--r--sql/hive/src/test/resources/golden/serde_regex-8-c429ee76b751e674992f61a29c95af770
-rw-r--r--sql/hive/src/test/resources/golden/serde_regex-9-f0e8d394ad18dcbd381792fe9bd8894b0
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala1
15 files changed, 97 insertions, 1 deletions
diff --git a/.gitignore b/.gitignore
index 857e9feb95..a204456d42 100644
--- a/.gitignore
+++ b/.gitignore
@@ -49,3 +49,8 @@ unit-tests.log
/lib/
rat-results.txt
scalastyle.txt
+
+# For Hive
+metastore_db/
+metastore/
+warehouse/
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
index 1777e96b67..1f688fe111 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
@@ -347,7 +347,11 @@ private[hive] object HiveQl {
protected def nodeToPlan(node: Node): LogicalPlan = node match {
// Just fake explain for any of the native commands.
case Token("TOK_EXPLAIN", explainArgs) if nativeCommands contains explainArgs.head.getText =>
- NoRelation
+ ExplainCommand(NoRelation)
+ // Create tables aren't native commands due to CTAS queries, but we still don't need to
+ // explain them.
+ case Token("TOK_EXPLAIN", explainArgs) if explainArgs.head.getText == "TOK_CREATETABLE" =>
+ ExplainCommand(NoRelation)
case Token("TOK_EXPLAIN", explainArgs) =>
// Ignore FORMATTED if present.
val Some(query) :: _ :: _ :: Nil =
diff --git a/sql/hive/src/test/resources/golden/serde_regex-0-60462d14b99bb445b02800e9fb22760e b/sql/hive/src/test/resources/golden/serde_regex-0-60462d14b99bb445b02800e9fb22760e
new file mode 100644
index 0000000000..d00ee7786a
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/serde_regex-0-60462d14b99bb445b02800e9fb22760e
@@ -0,0 +1,22 @@
+ABSTRACT SYNTAX TREE:
+ (TOK_CREATETABLE (TOK_TABNAME serde_regex) TOK_LIKETABLE (TOK_TABCOLLIST (TOK_TABCOL host TOK_STRING) (TOK_TABCOL identity TOK_STRING) (TOK_TABCOL user TOK_STRING) (TOK_TABCOL time TOK_STRING) (TOK_TABCOL request TOK_STRING) (TOK_TABCOL status TOK_STRING) (TOK_TABCOL size TOK_INT) (TOK_TABCOL referer TOK_STRING) (TOK_TABCOL agent TOK_STRING)) (TOK_TABLESERIALIZER (TOK_SERDENAME 'org.apache.hadoop.hive.serde2.RegexSerDe' (TOK_TABLEPROPERTIES (TOK_TABLEPROPLIST (TOK_TABLEPROPERTY "input.regex" "([^ ]*) ([^ ]*) ([^ ]*) (-|\\[[^\\]]*\\]) ([^ \"]*|\"[^\"]*\") (-|[0-9]*) (-|[0-9]*)(?: ([^ \"]*|\"[^\"]*\") ([^ \"]*|\"[^\"]*\"))?"))))) TOK_TBLTEXTFILE)
+
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Create Table Operator:
+ Create Table
+ columns: host string, identity string, user string, time string, request string, status string, size int, referer string, agent string
+ if not exists: false
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ # buckets: -1
+ output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat
+ serde name: org.apache.hadoop.hive.serde2.RegexSerDe
+ serde properties:
+ input.regex ([^ ]*) ([^ ]*) ([^ ]*) (-|\[[^\]]*\]) ([^ "]*|"[^"]*") (-|[0-9]*) (-|[0-9]*)(?: ([^ "]*|"[^"]*") ([^ "]*|"[^"]*"))?
+ name: serde_regex
+ isExternal: false
+
+
diff --git a/sql/hive/src/test/resources/golden/serde_regex-1-dea03bd88cbaabcf438b398e23c139f6 b/sql/hive/src/test/resources/golden/serde_regex-1-dea03bd88cbaabcf438b398e23c139f6
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/serde_regex-1-dea03bd88cbaabcf438b398e23c139f6
diff --git a/sql/hive/src/test/resources/golden/serde_regex-10-c5b3ec90419a40660e5f83736241c429 b/sql/hive/src/test/resources/golden/serde_regex-10-c5b3ec90419a40660e5f83736241c429
new file mode 100644
index 0000000000..3e290231c2
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/serde_regex-10-c5b3ec90419a40660e5f83736241c429
@@ -0,0 +1,38 @@
+NULL 0
+NULL 0
+-1234567890.123456789 -1234567890
+-4400 4400
+-1255.49 -1255
+-1.122 -11
+-1.12 -1
+-1.12 -1
+-0.333 0
+-0.33 0
+-0.3 0
+0 0
+0 0
+0.01 0
+0.02 0
+0.1 0
+0.2 0
+0.3 0
+0.33 0
+0.333 0
+0.9999999999999999999999999 1
+1 1
+1 1
+1.12 1
+1.122 1
+2 2
+2 2
+3.14 3
+3.14 3
+3.14 3
+3.14 4
+10 10
+20 20
+100 100
+124 124
+125.2 125
+200 200
+1234567890.12345678 1234567890
diff --git a/sql/hive/src/test/resources/golden/serde_regex-11-b00710a6f7a3e5b92c2e17da54cd9ad1 b/sql/hive/src/test/resources/golden/serde_regex-11-b00710a6f7a3e5b92c2e17da54cd9ad1
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/serde_regex-11-b00710a6f7a3e5b92c2e17da54cd9ad1
diff --git a/sql/hive/src/test/resources/golden/serde_regex-2-9d00484beaee46cf72b154a1351aeee9 b/sql/hive/src/test/resources/golden/serde_regex-2-9d00484beaee46cf72b154a1351aeee9
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/serde_regex-2-9d00484beaee46cf72b154a1351aeee9
diff --git a/sql/hive/src/test/resources/golden/serde_regex-3-817190d8871b70611483cd2abe2e55dc b/sql/hive/src/test/resources/golden/serde_regex-3-817190d8871b70611483cd2abe2e55dc
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/serde_regex-3-817190d8871b70611483cd2abe2e55dc
diff --git a/sql/hive/src/test/resources/golden/serde_regex-4-c3e345183543f40a14d2dd742ebd5346 b/sql/hive/src/test/resources/golden/serde_regex-4-c3e345183543f40a14d2dd742ebd5346
new file mode 100644
index 0000000000..c55f3dd475
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/serde_regex-4-c3e345183543f40a14d2dd742ebd5346
@@ -0,0 +1,2 @@
+127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326 NULL NULL
+127.0.0.1 - - [26/May/2009:00:00:00 +0000] "GET /someurl/?track=Blabla(Main) HTTP/1.1" 200 5864 - "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/1.0.154.65 Safari/525.19"
diff --git a/sql/hive/src/test/resources/golden/serde_regex-5-e2d14a76a87a6014c6d2d3501a0c9129 b/sql/hive/src/test/resources/golden/serde_regex-5-e2d14a76a87a6014c6d2d3501a0c9129
new file mode 100644
index 0000000000..8bd185bc66
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/serde_regex-5-e2d14a76a87a6014c6d2d3501a0c9129
@@ -0,0 +1,2 @@
+127.0.0.1 2326 200 [10/Oct/2000:13:55:36 -0700]
+127.0.0.1 5864 200 [26/May/2009:00:00:00 +0000]
diff --git a/sql/hive/src/test/resources/golden/serde_regex-6-f818821654f219d1f4e2482951fae4f1 b/sql/hive/src/test/resources/golden/serde_regex-6-f818821654f219d1f4e2482951fae4f1
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/serde_regex-6-f818821654f219d1f4e2482951fae4f1
diff --git a/sql/hive/src/test/resources/golden/serde_regex-7-4db287576a17c0963219ca63ee0b20e0 b/sql/hive/src/test/resources/golden/serde_regex-7-4db287576a17c0963219ca63ee0b20e0
new file mode 100644
index 0000000000..da61769c65
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/serde_regex-7-4db287576a17c0963219ca63ee0b20e0
@@ -0,0 +1,22 @@
+ABSTRACT SYNTAX TREE:
+ (TOK_CREATETABLE (TOK_TABNAME serde_regex1) TOK_LIKETABLE (TOK_TABCOLLIST (TOK_TABCOL key TOK_DECIMAL) (TOK_TABCOL value TOK_INT)) (TOK_TABLESERIALIZER (TOK_SERDENAME 'org.apache.hadoop.hive.serde2.RegexSerDe' (TOK_TABLEPROPERTIES (TOK_TABLEPROPLIST (TOK_TABLEPROPERTY "input.regex" "([^ ]*) ([^ ]*)"))))) TOK_TBLTEXTFILE)
+
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Create Table Operator:
+ Create Table
+ columns: key decimal, value int
+ if not exists: false
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ # buckets: -1
+ output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat
+ serde name: org.apache.hadoop.hive.serde2.RegexSerDe
+ serde properties:
+ input.regex ([^ ]*) ([^ ]*)
+ name: serde_regex1
+ isExternal: false
+
+
diff --git a/sql/hive/src/test/resources/golden/serde_regex-8-c429ee76b751e674992f61a29c95af77 b/sql/hive/src/test/resources/golden/serde_regex-8-c429ee76b751e674992f61a29c95af77
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/serde_regex-8-c429ee76b751e674992f61a29c95af77
diff --git a/sql/hive/src/test/resources/golden/serde_regex-9-f0e8d394ad18dcbd381792fe9bd8894b b/sql/hive/src/test/resources/golden/serde_regex-9-f0e8d394ad18dcbd381792fe9bd8894b
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/serde_regex-9-f0e8d394ad18dcbd381792fe9bd8894b
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
index 0bb76f31c3..6c08e63a39 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
@@ -568,6 +568,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"select_unquote_and",
"select_unquote_not",
"select_unquote_or",
+ "serde_regex",
"serde_reported_schema",
"set_variable_sub",
"show_describe_func_quotes",