aboutsummaryrefslogtreecommitdiff
path: root/sql/hive/src/test/resources/data/scripts
diff options
context:
space:
mode:
authorMichael Armbrust <michael@databricks.com>2014-03-21 15:05:45 -0700
committerPatrick Wendell <pwendell@gmail.com>2014-03-21 15:05:45 -0700
commit7e17fe69f9c3dc4cac024ea483f5d5f34ee06203 (patch)
treebf6235fda03105bb981d64a25819ddb5a49bc19c /sql/hive/src/test/resources/data/scripts
parente09139d9ca529a8f983a8b3e2a8158c3f3caa523 (diff)
downloadspark-7e17fe69f9c3dc4cac024ea483f5d5f34ee06203.tar.gz
spark-7e17fe69f9c3dc4cac024ea483f5d5f34ee06203.tar.bz2
spark-7e17fe69f9c3dc4cac024ea483f5d5f34ee06203.zip
Add hive test files to repository. Remove download script.
This PR removes our test dependence on files hosted at Berkeley by checking the test queries and answers into the repository. This should also fix the maven Jenkins build. I realize this is a *giant* commit. But size wise its actually pretty small. We are only looking at ~1.2Mb compressed (~30Mb uncompressed). Given that we already have a ~80Mb file permanently added to the spark code lineage, I do not think that this will change the developer experience significantly. Furthermore, I think it is good engineering practice to consider such test support files as "code", since changes to them would indicate a change in functionality. These files were only excluded from the initial PR as I wanted the diff to be readable. Author: Michael Armbrust <michael@databricks.com> Closes #199 from marmbrus/hiveTestFiles and squashes the following commits: b9b9b17 [Michael Armbrust] Add hive test files to repository. Remove download script.
Diffstat (limited to 'sql/hive/src/test/resources/data/scripts')
-rw-r--r--sql/hive/src/test/resources/data/scripts/cat.py29
-rw-r--r--sql/hive/src/test/resources/data/scripts/cat_error.py24
-rw-r--r--sql/hive/src/test/resources/data/scripts/doubleescapedtab.py24
-rw-r--r--sql/hive/src/test/resources/data/scripts/dumpdata_script.py27
-rwxr-xr-xsql/hive/src/test/resources/data/scripts/error_script26
-rw-r--r--sql/hive/src/test/resources/data/scripts/escapedcarriagereturn.py23
-rw-r--r--sql/hive/src/test/resources/data/scripts/escapednewline.py23
-rw-r--r--sql/hive/src/test/resources/data/scripts/escapedtab.py23
-rwxr-xr-xsql/hive/src/test/resources/data/scripts/input20_script20
-rw-r--r--sql/hive/src/test/resources/data/scripts/newline.py24
-rw-r--r--sql/hive/src/test/resources/data/scripts/q_test_cleanup.sql10
-rw-r--r--sql/hive/src/test/resources/data/scripts/q_test_init.sql132
-rw-r--r--sql/hive/src/test/resources/data/scripts/test_init_file.sql1
13 files changed, 386 insertions, 0 deletions
diff --git a/sql/hive/src/test/resources/data/scripts/cat.py b/sql/hive/src/test/resources/data/scripts/cat.py
new file mode 100644
index 0000000000..2395b2cdeb
--- /dev/null
+++ b/sql/hive/src/test/resources/data/scripts/cat.py
@@ -0,0 +1,29 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+import sys, re
+import datetime
+import os
+
+table_name=None
+if os.environ.has_key('hive_streaming_tablename'):
+ table_name=os.environ['hive_streaming_tablename']
+
+for line in sys.stdin:
+ print line
+ print >> sys.stderr, "dummy"
diff --git a/sql/hive/src/test/resources/data/scripts/cat_error.py b/sql/hive/src/test/resources/data/scripts/cat_error.py
new file mode 100644
index 0000000000..9642efec8e
--- /dev/null
+++ b/sql/hive/src/test/resources/data/scripts/cat_error.py
@@ -0,0 +1,24 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+import sys
+
+for line in sys.stdin:
+ print line
+
+sys.exit(1)
diff --git a/sql/hive/src/test/resources/data/scripts/doubleescapedtab.py b/sql/hive/src/test/resources/data/scripts/doubleescapedtab.py
new file mode 100644
index 0000000000..d373067bae
--- /dev/null
+++ b/sql/hive/src/test/resources/data/scripts/doubleescapedtab.py
@@ -0,0 +1,24 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+import sys
+
+for line in sys.stdin:
+ print "1\\\\\\t2"
+ print "1\\\\\\\\t2"
+
diff --git a/sql/hive/src/test/resources/data/scripts/dumpdata_script.py b/sql/hive/src/test/resources/data/scripts/dumpdata_script.py
new file mode 100644
index 0000000000..c96c9e529b
--- /dev/null
+++ b/sql/hive/src/test/resources/data/scripts/dumpdata_script.py
@@ -0,0 +1,27 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+import sys
+
+for i in xrange(50):
+ for j in xrange(5):
+ for k in xrange(20022):
+ print 20000 * i + k
+
+for line in sys.stdin:
+ pass
diff --git a/sql/hive/src/test/resources/data/scripts/error_script b/sql/hive/src/test/resources/data/scripts/error_script
new file mode 100755
index 0000000000..8d86b62f0f
--- /dev/null
+++ b/sql/hive/src/test/resources/data/scripts/error_script
@@ -0,0 +1,26 @@
+#! /bin/bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+exit 1
+ret=0
+while [ "$ret" = "0" ];
+do
+ read -t 1 -a v
+ ret=$?
+done
+
+exit 1
diff --git a/sql/hive/src/test/resources/data/scripts/escapedcarriagereturn.py b/sql/hive/src/test/resources/data/scripts/escapedcarriagereturn.py
new file mode 100644
index 0000000000..475928a243
--- /dev/null
+++ b/sql/hive/src/test/resources/data/scripts/escapedcarriagereturn.py
@@ -0,0 +1,23 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+import sys
+
+for line in sys.stdin:
+ print "1\\\\r2"
+
diff --git a/sql/hive/src/test/resources/data/scripts/escapednewline.py b/sql/hive/src/test/resources/data/scripts/escapednewline.py
new file mode 100644
index 0000000000..0d5751454b
--- /dev/null
+++ b/sql/hive/src/test/resources/data/scripts/escapednewline.py
@@ -0,0 +1,23 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+import sys
+
+for line in sys.stdin:
+ print "1\\\\n2"
+
diff --git a/sql/hive/src/test/resources/data/scripts/escapedtab.py b/sql/hive/src/test/resources/data/scripts/escapedtab.py
new file mode 100644
index 0000000000..549c91e444
--- /dev/null
+++ b/sql/hive/src/test/resources/data/scripts/escapedtab.py
@@ -0,0 +1,23 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+import sys
+
+for line in sys.stdin:
+ print "1\\\\t2"
+
diff --git a/sql/hive/src/test/resources/data/scripts/input20_script b/sql/hive/src/test/resources/data/scripts/input20_script
new file mode 100755
index 0000000000..e8e41189c1
--- /dev/null
+++ b/sql/hive/src/test/resources/data/scripts/input20_script
@@ -0,0 +1,20 @@
+#! /bin/bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This script outputs a row of the following format
+# <# of values for the key> <key>_<key>
+uniq -c | sed "s@^ *@@" | sed "s@ @_@" | sed "s@ @ @"
diff --git a/sql/hive/src/test/resources/data/scripts/newline.py b/sql/hive/src/test/resources/data/scripts/newline.py
new file mode 100644
index 0000000000..6500d900dd
--- /dev/null
+++ b/sql/hive/src/test/resources/data/scripts/newline.py
@@ -0,0 +1,24 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+import sys
+
+for line in sys.stdin:
+ print "1\\n2"
+ print "1\\r2"
+ print "1\\t2"
diff --git a/sql/hive/src/test/resources/data/scripts/q_test_cleanup.sql b/sql/hive/src/test/resources/data/scripts/q_test_cleanup.sql
new file mode 100644
index 0000000000..31bd7205d8
--- /dev/null
+++ b/sql/hive/src/test/resources/data/scripts/q_test_cleanup.sql
@@ -0,0 +1,10 @@
+DROP TABLE IF EXISTS src;
+DROP TABLE IF EXISTS src1;
+DROP TABLE IF EXISTS src_json;
+DROP TABLE IF EXISTS src_sequencefile;
+DROP TABLE IF EXISTS src_thrift;
+DROP TABLE IF EXISTS srcbucket;
+DROP TABLE IF EXISTS srcbucket2;
+DROP TABLE IF EXISTS srcpart;
+DROP TABLE IF EXISTS primitives;
+
diff --git a/sql/hive/src/test/resources/data/scripts/q_test_init.sql b/sql/hive/src/test/resources/data/scripts/q_test_init.sql
new file mode 100644
index 0000000000..12afdf3911
--- /dev/null
+++ b/sql/hive/src/test/resources/data/scripts/q_test_init.sql
@@ -0,0 +1,132 @@
+--
+-- Table src
+--
+DROP TABLE IF EXISTS src;
+
+CREATE TABLE src (key STRING, value STRING) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/kv1.txt" INTO TABLE src;
+
+--
+-- Table src1
+--
+DROP TABLE IF EXISTS src1;
+
+CREATE TABLE src1 (key STRING, value STRING) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/kv3.txt" INTO TABLE src1;
+
+--
+-- Table src_json
+--
+DROP TABLE IF EXISTS src_json;
+
+CREATE TABLE src_json (json STRING) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/json.txt" INTO TABLE src_json;
+
+
+--
+-- Table src_sequencefile
+--
+DROP TABLE IF EXISTS src_sequencefile;
+
+CREATE TABLE src_sequencefile (key STRING, value STRING) STORED AS SEQUENCEFILE;
+
+LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/kv1.seq" INTO TABLE src_sequencefile;
+
+
+--
+-- Table src_thrift
+--
+DROP TABLE IF EXISTS src_thrift;
+
+CREATE TABLE src_thrift
+ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.thrift.ThriftDeserializer'
+WITH SERDEPROPERTIES (
+ 'serialization.class' = 'org.apache.hadoop.hive.serde2.thrift.test.Complex',
+ 'serialization.format' = 'com.facebook.thrift.protocol.TBinaryProtocol')
+STORED AS SEQUENCEFILE;
+
+LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/complex.seq" INTO TABLE src_thrift;
+
+
+--
+-- Table srcbucket
+--
+DROP TABLE IF EXISTS srcbucket;
+
+CREATE TABLE srcbucket (key INT, value STRING)
+CLUSTERED BY (key) INTO 2 BUCKETS
+STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/srcbucket0.txt" INTO TABLE srcbucket;
+LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/srcbucket1.txt" INTO TABLE srcbucket;
+
+
+--
+-- Table srcbucket2
+--
+DROP TABLE IF EXISTS srcbucket2;
+
+CREATE TABLE srcbucket2 (key INT, value STRING)
+CLUSTERED BY (key) INTO 4 BUCKETS
+STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/srcbucket20.txt" INTO TABLE srcbucket2;
+LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/srcbucket21.txt" INTO TABLE srcbucket2;
+
+
+--
+-- Table srcpart
+--
+DROP TABLE IF EXISTS srcpart;
+
+CREATE TABLE srcpart (key STRING, value STRING)
+PARTITIONED BY (ds STRING, hr STRING)
+STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/kv1.txt"
+OVERWRITE INTO TABLE srcpart PARTITION (ds="2008-04-08", hr="11");
+
+LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/kv1.txt"
+OVERWRITE INTO TABLE srcpart PARTITION (ds="2008-04-08", hr="12");
+
+LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/kv1.txt"
+OVERWRITE INTO TABLE srcpart PARTITION (ds="2008-04-09", hr="11");
+
+LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/kv1.txt"
+OVERWRITE INTO TABLE srcpart PARTITION (ds="2008-04-09", hr="12");
+
+
+DROP TABLE IF EXISTS primitives;
+CREATE TABLE primitives (
+ id INT,
+ bool_col BOOLEAN,
+ tinyint_col TINYINT,
+ smallint_col SMALLINT,
+ int_col INT,
+ bigint_col BIGINT,
+ float_col FLOAT,
+ double_col DOUBLE,
+ date_string_col STRING,
+ string_col STRING,
+ timestamp_col TIMESTAMP)
+PARTITIONED BY (year INT, month INT)
+ROW FORMAT DELIMITED
+ FIELDS TERMINATED BY ','
+ ESCAPED BY '\\'
+STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/types/primitives/090101.txt"
+OVERWRITE INTO TABLE primitives PARTITION(year=2009, month=1);
+
+LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/types/primitives/090201.txt"
+OVERWRITE INTO TABLE primitives PARTITION(year=2009, month=2);
+
+LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/types/primitives/090301.txt"
+OVERWRITE INTO TABLE primitives PARTITION(year=2009, month=3);
+
+LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/types/primitives/090401.txt"
+OVERWRITE INTO TABLE primitives PARTITION(year=2009, month=4);
+
diff --git a/sql/hive/src/test/resources/data/scripts/test_init_file.sql b/sql/hive/src/test/resources/data/scripts/test_init_file.sql
new file mode 100644
index 0000000000..776a46be08
--- /dev/null
+++ b/sql/hive/src/test/resources/data/scripts/test_init_file.sql
@@ -0,0 +1 @@
+create table tbl_created_by_init(i int);