From 665e175328130ab3eb0370cdd2a43ed5a7bed1d6 Mon Sep 17 00:00:00 2001 From: petermaxlee Date: Wed, 10 Aug 2016 21:26:46 -0700 Subject: [SPARK-17007][SQL] Move test data files into a test-data folder ## What changes were proposed in this pull request? This patch moves all the test data files in sql/core/src/test/resources to sql/core/src/test/resources/test-data, so we don't clutter the top level sql/core/src/test/resources. Also deleted sql/core/src/test/resources/old-repeated.parquet since it is no longer used. The change will make it easier to spot sql-tests directory. ## How was this patch tested? This is a test-only change. Author: petermaxlee Closes #14589 from petermaxlee/SPARK-17007. --- .../org/apache/spark/sql/JavaDataFrameSuite.java | 12 ++++---- sql/core/src/test/resources/bool.csv | 5 --- sql/core/src/test/resources/cars-alternative.csv | 5 --- .../src/test/resources/cars-blank-column-name.csv | 3 -- sql/core/src/test/resources/cars-malformed.csv | 6 ---- sql/core/src/test/resources/cars-null.csv | 6 ---- .../src/test/resources/cars-unbalanced-quotes.csv | 4 --- sql/core/src/test/resources/cars.csv | 7 ----- sql/core/src/test/resources/cars.tsv | 4 --- sql/core/src/test/resources/cars_iso-8859-1.csv | 6 ---- sql/core/src/test/resources/comments.csv | 6 ---- sql/core/src/test/resources/dates.csv | 4 --- .../src/test/resources/dec-in-fixed-len.parquet | Bin 460 -> 0 bytes sql/core/src/test/resources/dec-in-i32.parquet | Bin 420 -> 0 bytes sql/core/src/test/resources/dec-in-i64.parquet | Bin 437 -> 0 bytes sql/core/src/test/resources/decimal.csv | 7 ----- sql/core/src/test/resources/disable_comments.csv | 2 -- sql/core/src/test/resources/empty.csv | 0 .../src/test/resources/nested-array-struct.parquet | Bin 775 -> 0 bytes sql/core/src/test/resources/numbers.csv | 9 ------ .../src/test/resources/old-repeated-int.parquet | Bin 389 -> 0 bytes .../test/resources/old-repeated-message.parquet | Bin 600 -> 0 bytes sql/core/src/test/resources/old-repeated.parquet | Bin 432 -> 0 bytes .../resources/parquet-thrift-compat.snappy.parquet | Bin 10550 -> 0 bytes .../test/resources/proto-repeated-string.parquet | Bin 411 -> 0 bytes .../test/resources/proto-repeated-struct.parquet | Bin 608 -> 0 bytes .../resources/proto-struct-with-array-many.parquet | Bin 802 -> 0 bytes .../test/resources/proto-struct-with-array.parquet | Bin 1576 -> 0 bytes sql/core/src/test/resources/simple_sparse.csv | 5 --- sql/core/src/test/resources/test-data/bool.csv | 5 +++ .../test/resources/test-data/cars-alternative.csv | 5 +++ .../resources/test-data/cars-blank-column-name.csv | 3 ++ .../test/resources/test-data/cars-malformed.csv | 6 ++++ .../src/test/resources/test-data/cars-null.csv | 6 ++++ .../resources/test-data/cars-unbalanced-quotes.csv | 4 +++ sql/core/src/test/resources/test-data/cars.csv | 7 +++++ sql/core/src/test/resources/test-data/cars.tsv | 4 +++ .../test/resources/test-data/cars_iso-8859-1.csv | 6 ++++ sql/core/src/test/resources/test-data/comments.csv | 6 ++++ sql/core/src/test/resources/test-data/dates.csv | 4 +++ .../resources/test-data/dec-in-fixed-len.parquet | Bin 0 -> 460 bytes .../test/resources/test-data/dec-in-i32.parquet | Bin 0 -> 420 bytes .../test/resources/test-data/dec-in-i64.parquet | Bin 0 -> 437 bytes sql/core/src/test/resources/test-data/decimal.csv | 7 +++++ .../test/resources/test-data/disable_comments.csv | 2 ++ sql/core/src/test/resources/test-data/empty.csv | 0 .../test-data/nested-array-struct.parquet | Bin 0 -> 775 bytes sql/core/src/test/resources/test-data/numbers.csv | 9 ++++++ .../resources/test-data/old-repeated-int.parquet | Bin 0 -> 389 bytes .../test-data/old-repeated-message.parquet | Bin 0 -> 600 bytes .../test-data/parquet-thrift-compat.snappy.parquet | Bin 0 -> 10550 bytes .../test-data/proto-repeated-string.parquet | Bin 0 -> 411 bytes .../test-data/proto-repeated-struct.parquet | Bin 0 -> 608 bytes .../test-data/proto-struct-with-array-many.parquet | Bin 0 -> 802 bytes .../test-data/proto-struct-with-array.parquet | Bin 0 -> 1576 bytes .../src/test/resources/test-data/simple_sparse.csv | 5 +++ .../test-data/text-partitioned/year=2014/data.txt | 1 + .../test-data/text-partitioned/year=2015/data.txt | 1 + .../src/test/resources/test-data/text-suite.txt | 4 +++ .../src/test/resources/test-data/text-suite2.txt | 1 + .../test/resources/test-data/unescaped-quotes.csv | 2 ++ .../resources/text-partitioned/year=2014/data.txt | 1 - .../resources/text-partitioned/year=2015/data.txt | 1 - sql/core/src/test/resources/text-suite.txt | 4 --- sql/core/src/test/resources/text-suite2.txt | 1 - sql/core/src/test/resources/unescaped-quotes.csv | 2 -- .../spark/sql/execution/command/DDLSuite.scala | 3 +- .../sql/execution/datasources/csv/CSVSuite.scala | 34 ++++++++++----------- .../datasources/parquet/ParquetIOSuite.scala | 6 ++-- .../ParquetProtobufCompatibilitySuite.scala | 14 ++++----- .../parquet/ParquetThriftCompatibilitySuite.scala | 4 +-- .../sql/execution/datasources/text/TextSuite.scala | 6 ++-- 72 files changed, 128 insertions(+), 127 deletions(-) delete mode 100644 sql/core/src/test/resources/bool.csv delete mode 100644 sql/core/src/test/resources/cars-alternative.csv delete mode 100644 sql/core/src/test/resources/cars-blank-column-name.csv delete mode 100644 sql/core/src/test/resources/cars-malformed.csv delete mode 100644 sql/core/src/test/resources/cars-null.csv delete mode 100644 sql/core/src/test/resources/cars-unbalanced-quotes.csv delete mode 100644 sql/core/src/test/resources/cars.csv delete mode 100644 sql/core/src/test/resources/cars.tsv delete mode 100644 sql/core/src/test/resources/cars_iso-8859-1.csv delete mode 100644 sql/core/src/test/resources/comments.csv delete mode 100644 sql/core/src/test/resources/dates.csv delete mode 100644 sql/core/src/test/resources/dec-in-fixed-len.parquet delete mode 100755 sql/core/src/test/resources/dec-in-i32.parquet delete mode 100755 sql/core/src/test/resources/dec-in-i64.parquet delete mode 100644 sql/core/src/test/resources/decimal.csv delete mode 100644 sql/core/src/test/resources/disable_comments.csv delete mode 100644 sql/core/src/test/resources/empty.csv delete mode 100644 sql/core/src/test/resources/nested-array-struct.parquet delete mode 100644 sql/core/src/test/resources/numbers.csv delete mode 100644 sql/core/src/test/resources/old-repeated-int.parquet delete mode 100644 sql/core/src/test/resources/old-repeated-message.parquet delete mode 100644 sql/core/src/test/resources/old-repeated.parquet delete mode 100644 sql/core/src/test/resources/parquet-thrift-compat.snappy.parquet delete mode 100644 sql/core/src/test/resources/proto-repeated-string.parquet delete mode 100644 sql/core/src/test/resources/proto-repeated-struct.parquet delete mode 100644 sql/core/src/test/resources/proto-struct-with-array-many.parquet delete mode 100644 sql/core/src/test/resources/proto-struct-with-array.parquet delete mode 100644 sql/core/src/test/resources/simple_sparse.csv create mode 100644 sql/core/src/test/resources/test-data/bool.csv create mode 100644 sql/core/src/test/resources/test-data/cars-alternative.csv create mode 100644 sql/core/src/test/resources/test-data/cars-blank-column-name.csv create mode 100644 sql/core/src/test/resources/test-data/cars-malformed.csv create mode 100644 sql/core/src/test/resources/test-data/cars-null.csv create mode 100644 sql/core/src/test/resources/test-data/cars-unbalanced-quotes.csv create mode 100644 sql/core/src/test/resources/test-data/cars.csv create mode 100644 sql/core/src/test/resources/test-data/cars.tsv create mode 100644 sql/core/src/test/resources/test-data/cars_iso-8859-1.csv create mode 100644 sql/core/src/test/resources/test-data/comments.csv create mode 100644 sql/core/src/test/resources/test-data/dates.csv create mode 100644 sql/core/src/test/resources/test-data/dec-in-fixed-len.parquet create mode 100755 sql/core/src/test/resources/test-data/dec-in-i32.parquet create mode 100755 sql/core/src/test/resources/test-data/dec-in-i64.parquet create mode 100644 sql/core/src/test/resources/test-data/decimal.csv create mode 100644 sql/core/src/test/resources/test-data/disable_comments.csv create mode 100644 sql/core/src/test/resources/test-data/empty.csv create mode 100644 sql/core/src/test/resources/test-data/nested-array-struct.parquet create mode 100644 sql/core/src/test/resources/test-data/numbers.csv create mode 100644 sql/core/src/test/resources/test-data/old-repeated-int.parquet create mode 100644 sql/core/src/test/resources/test-data/old-repeated-message.parquet create mode 100644 sql/core/src/test/resources/test-data/parquet-thrift-compat.snappy.parquet create mode 100644 sql/core/src/test/resources/test-data/proto-repeated-string.parquet create mode 100644 sql/core/src/test/resources/test-data/proto-repeated-struct.parquet create mode 100644 sql/core/src/test/resources/test-data/proto-struct-with-array-many.parquet create mode 100644 sql/core/src/test/resources/test-data/proto-struct-with-array.parquet create mode 100644 sql/core/src/test/resources/test-data/simple_sparse.csv create mode 100644 sql/core/src/test/resources/test-data/text-partitioned/year=2014/data.txt create mode 100644 sql/core/src/test/resources/test-data/text-partitioned/year=2015/data.txt create mode 100644 sql/core/src/test/resources/test-data/text-suite.txt create mode 100644 sql/core/src/test/resources/test-data/text-suite2.txt create mode 100644 sql/core/src/test/resources/test-data/unescaped-quotes.csv delete mode 100644 sql/core/src/test/resources/text-partitioned/year=2014/data.txt delete mode 100644 sql/core/src/test/resources/text-partitioned/year=2015/data.txt delete mode 100644 sql/core/src/test/resources/text-suite.txt delete mode 100644 sql/core/src/test/resources/text-suite2.txt delete mode 100644 sql/core/src/test/resources/unescaped-quotes.csv (limited to 'sql/core/src/test') diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameSuite.java index 318b53cdbb..c44fc3d393 100644 --- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameSuite.java +++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameSuite.java @@ -327,23 +327,23 @@ public class JavaDataFrameSuite { @Test public void testGenericLoad() { - Dataset df1 = spark.read().format("text").load(getResource("text-suite.txt")); + Dataset df1 = spark.read().format("text").load(getResource("test-data/text-suite.txt")); Assert.assertEquals(4L, df1.count()); Dataset df2 = spark.read().format("text").load( - getResource("text-suite.txt"), - getResource("text-suite2.txt")); + getResource("test-data/text-suite.txt"), + getResource("test-data/text-suite2.txt")); Assert.assertEquals(5L, df2.count()); } @Test public void testTextLoad() { - Dataset ds1 = spark.read().textFile(getResource("text-suite.txt")); + Dataset ds1 = spark.read().textFile(getResource("test-data/text-suite.txt")); Assert.assertEquals(4L, ds1.count()); Dataset ds2 = spark.read().textFile( - getResource("text-suite.txt"), - getResource("text-suite2.txt")); + getResource("test-data/text-suite.txt"), + getResource("test-data/text-suite2.txt")); Assert.assertEquals(5L, ds2.count()); } diff --git a/sql/core/src/test/resources/bool.csv b/sql/core/src/test/resources/bool.csv deleted file mode 100644 index 94b2d49506..0000000000 --- a/sql/core/src/test/resources/bool.csv +++ /dev/null @@ -1,5 +0,0 @@ -bool -"True" -"False" - -"true" diff --git a/sql/core/src/test/resources/cars-alternative.csv b/sql/core/src/test/resources/cars-alternative.csv deleted file mode 100644 index 646f7c456c..0000000000 --- a/sql/core/src/test/resources/cars-alternative.csv +++ /dev/null @@ -1,5 +0,0 @@ -year|make|model|comment|blank -'2012'|'Tesla'|'S'| 'No comment'| - -1997|Ford|E350|'Go get one now they are going fast'| -2015|Chevy|Volt diff --git a/sql/core/src/test/resources/cars-blank-column-name.csv b/sql/core/src/test/resources/cars-blank-column-name.csv deleted file mode 100644 index 0b804b1614..0000000000 --- a/sql/core/src/test/resources/cars-blank-column-name.csv +++ /dev/null @@ -1,3 +0,0 @@ -"",,make,customer,comment -2012,"Tesla","S","bill","blank" -2013,"Tesla","S","c","something" diff --git a/sql/core/src/test/resources/cars-malformed.csv b/sql/core/src/test/resources/cars-malformed.csv deleted file mode 100644 index cfa378c01f..0000000000 --- a/sql/core/src/test/resources/cars-malformed.csv +++ /dev/null @@ -1,6 +0,0 @@ -~ All the rows here are malformed having tokens more than the schema (header). -year,make,model,comment,blank -"2012","Tesla","S","No comment",,null,null - -1997,Ford,E350,"Go get one now they are going fast",,null,null -2015,Chevy,,,, diff --git a/sql/core/src/test/resources/cars-null.csv b/sql/core/src/test/resources/cars-null.csv deleted file mode 100644 index 130c0b40bb..0000000000 --- a/sql/core/src/test/resources/cars-null.csv +++ /dev/null @@ -1,6 +0,0 @@ -year,make,model,comment,blank -"2012","Tesla","S",null, - -1997,Ford,E350,"Go get one now they are going fast", -null,Chevy,Volt - diff --git a/sql/core/src/test/resources/cars-unbalanced-quotes.csv b/sql/core/src/test/resources/cars-unbalanced-quotes.csv deleted file mode 100644 index 5ea39fcbfa..0000000000 --- a/sql/core/src/test/resources/cars-unbalanced-quotes.csv +++ /dev/null @@ -1,4 +0,0 @@ -year,make,model,comment,blank -"2012,Tesla,S,No comment -1997,Ford,E350,Go get one now they are going fast" -"2015,"Chevy",Volt, diff --git a/sql/core/src/test/resources/cars.csv b/sql/core/src/test/resources/cars.csv deleted file mode 100644 index 40ded573ad..0000000000 --- a/sql/core/src/test/resources/cars.csv +++ /dev/null @@ -1,7 +0,0 @@ - -year,make,model,comment,blank -"2012","Tesla","S","No comment", - -1997,Ford,E350,"Go get one now they are going fast", -2015,Chevy,Volt - diff --git a/sql/core/src/test/resources/cars.tsv b/sql/core/src/test/resources/cars.tsv deleted file mode 100644 index a7bfa9a91f..0000000000 --- a/sql/core/src/test/resources/cars.tsv +++ /dev/null @@ -1,4 +0,0 @@ -year make model price comment blank -2012 Tesla S "80,000.65" -1997 Ford E350 35,000 "Go get one now they are going fast" -2015 Chevy Volt 5,000.10 diff --git a/sql/core/src/test/resources/cars_iso-8859-1.csv b/sql/core/src/test/resources/cars_iso-8859-1.csv deleted file mode 100644 index c51b6c5901..0000000000 --- a/sql/core/src/test/resources/cars_iso-8859-1.csv +++ /dev/null @@ -1,6 +0,0 @@ -yearmakemodelcommentblank -"2012""Tesla""S""No comment" - -1997FordE350"Go get one now they are oing fast" -2015ChevyVolt - diff --git a/sql/core/src/test/resources/comments.csv b/sql/core/src/test/resources/comments.csv deleted file mode 100644 index 6275be7285..0000000000 --- a/sql/core/src/test/resources/comments.csv +++ /dev/null @@ -1,6 +0,0 @@ -~ Version 1.0 -~ Using a non-standard comment char to test CSV parser defaults are overridden -1,2,3,4,5.01,2015-08-20 15:57:00 -6,7,8,9,0,2015-08-21 16:58:01 -~0,9,8,7,6,2015-08-22 17:59:02 -1,2,3,4,5,2015-08-23 18:00:42 diff --git a/sql/core/src/test/resources/dates.csv b/sql/core/src/test/resources/dates.csv deleted file mode 100644 index 9ee99c31b3..0000000000 --- a/sql/core/src/test/resources/dates.csv +++ /dev/null @@ -1,4 +0,0 @@ -date -26/08/2015 18:00 -27/10/2014 18:30 -28/01/2016 20:00 diff --git a/sql/core/src/test/resources/dec-in-fixed-len.parquet b/sql/core/src/test/resources/dec-in-fixed-len.parquet deleted file mode 100644 index 6ad37d5639..0000000000 Binary files a/sql/core/src/test/resources/dec-in-fixed-len.parquet and /dev/null differ diff --git a/sql/core/src/test/resources/dec-in-i32.parquet b/sql/core/src/test/resources/dec-in-i32.parquet deleted file mode 100755 index bb5d4af8dd..0000000000 Binary files a/sql/core/src/test/resources/dec-in-i32.parquet and /dev/null differ diff --git a/sql/core/src/test/resources/dec-in-i64.parquet b/sql/core/src/test/resources/dec-in-i64.parquet deleted file mode 100755 index e07c4a0ad9..0000000000 Binary files a/sql/core/src/test/resources/dec-in-i64.parquet and /dev/null differ diff --git a/sql/core/src/test/resources/decimal.csv b/sql/core/src/test/resources/decimal.csv deleted file mode 100644 index 870f6aaf1b..0000000000 --- a/sql/core/src/test/resources/decimal.csv +++ /dev/null @@ -1,7 +0,0 @@ -~ decimal field has integer, integer and decimal values. The last value cannot fit to a long -~ long field has integer, long and integer values. -~ double field has double, double and decimal values. -decimal,long,double -1,1,0.1 -1,9223372036854775807,1.0 -92233720368547758070,1,92233720368547758070 diff --git a/sql/core/src/test/resources/disable_comments.csv b/sql/core/src/test/resources/disable_comments.csv deleted file mode 100644 index 304d406e4d..0000000000 --- a/sql/core/src/test/resources/disable_comments.csv +++ /dev/null @@ -1,2 +0,0 @@ -#1,2,3 -4,5,6 diff --git a/sql/core/src/test/resources/empty.csv b/sql/core/src/test/resources/empty.csv deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/sql/core/src/test/resources/nested-array-struct.parquet b/sql/core/src/test/resources/nested-array-struct.parquet deleted file mode 100644 index 41a43fa35d..0000000000 Binary files a/sql/core/src/test/resources/nested-array-struct.parquet and /dev/null differ diff --git a/sql/core/src/test/resources/numbers.csv b/sql/core/src/test/resources/numbers.csv deleted file mode 100644 index af8feac784..0000000000 --- a/sql/core/src/test/resources/numbers.csv +++ /dev/null @@ -1,9 +0,0 @@ -int,long,float,double -8,1000000,1.042,23848545.0374 ---,34232323,98.343,184721.23987223 -34,--,98.343,184721.23987223 -34,43323123,--,184721.23987223 -34,43323123,223823.9484,-- -34,43323123,223823.NAN,NAN -34,43323123,223823.INF,INF -34,43323123,223823.-INF,-INF diff --git a/sql/core/src/test/resources/old-repeated-int.parquet b/sql/core/src/test/resources/old-repeated-int.parquet deleted file mode 100644 index 520922f73e..0000000000 Binary files a/sql/core/src/test/resources/old-repeated-int.parquet and /dev/null differ diff --git a/sql/core/src/test/resources/old-repeated-message.parquet b/sql/core/src/test/resources/old-repeated-message.parquet deleted file mode 100644 index 548db99162..0000000000 Binary files a/sql/core/src/test/resources/old-repeated-message.parquet and /dev/null differ diff --git a/sql/core/src/test/resources/old-repeated.parquet b/sql/core/src/test/resources/old-repeated.parquet deleted file mode 100644 index 213f1a9029..0000000000 Binary files a/sql/core/src/test/resources/old-repeated.parquet and /dev/null differ diff --git a/sql/core/src/test/resources/parquet-thrift-compat.snappy.parquet b/sql/core/src/test/resources/parquet-thrift-compat.snappy.parquet deleted file mode 100644 index 837e4876ee..0000000000 Binary files a/sql/core/src/test/resources/parquet-thrift-compat.snappy.parquet and /dev/null differ diff --git a/sql/core/src/test/resources/proto-repeated-string.parquet b/sql/core/src/test/resources/proto-repeated-string.parquet deleted file mode 100644 index 8a7eea601d..0000000000 Binary files a/sql/core/src/test/resources/proto-repeated-string.parquet and /dev/null differ diff --git a/sql/core/src/test/resources/proto-repeated-struct.parquet b/sql/core/src/test/resources/proto-repeated-struct.parquet deleted file mode 100644 index c29eee35c3..0000000000 Binary files a/sql/core/src/test/resources/proto-repeated-struct.parquet and /dev/null differ diff --git a/sql/core/src/test/resources/proto-struct-with-array-many.parquet b/sql/core/src/test/resources/proto-struct-with-array-many.parquet deleted file mode 100644 index ff9809675f..0000000000 Binary files a/sql/core/src/test/resources/proto-struct-with-array-many.parquet and /dev/null differ diff --git a/sql/core/src/test/resources/proto-struct-with-array.parquet b/sql/core/src/test/resources/proto-struct-with-array.parquet deleted file mode 100644 index 325a8370ad..0000000000 Binary files a/sql/core/src/test/resources/proto-struct-with-array.parquet and /dev/null differ diff --git a/sql/core/src/test/resources/simple_sparse.csv b/sql/core/src/test/resources/simple_sparse.csv deleted file mode 100644 index 02d29cabf9..0000000000 --- a/sql/core/src/test/resources/simple_sparse.csv +++ /dev/null @@ -1,5 +0,0 @@ -A,B,C,D -1,,, -,1,, -,,1, -,,,1 diff --git a/sql/core/src/test/resources/test-data/bool.csv b/sql/core/src/test/resources/test-data/bool.csv new file mode 100644 index 0000000000..94b2d49506 --- /dev/null +++ b/sql/core/src/test/resources/test-data/bool.csv @@ -0,0 +1,5 @@ +bool +"True" +"False" + +"true" diff --git a/sql/core/src/test/resources/test-data/cars-alternative.csv b/sql/core/src/test/resources/test-data/cars-alternative.csv new file mode 100644 index 0000000000..646f7c456c --- /dev/null +++ b/sql/core/src/test/resources/test-data/cars-alternative.csv @@ -0,0 +1,5 @@ +year|make|model|comment|blank +'2012'|'Tesla'|'S'| 'No comment'| + +1997|Ford|E350|'Go get one now they are going fast'| +2015|Chevy|Volt diff --git a/sql/core/src/test/resources/test-data/cars-blank-column-name.csv b/sql/core/src/test/resources/test-data/cars-blank-column-name.csv new file mode 100644 index 0000000000..0b804b1614 --- /dev/null +++ b/sql/core/src/test/resources/test-data/cars-blank-column-name.csv @@ -0,0 +1,3 @@ +"",,make,customer,comment +2012,"Tesla","S","bill","blank" +2013,"Tesla","S","c","something" diff --git a/sql/core/src/test/resources/test-data/cars-malformed.csv b/sql/core/src/test/resources/test-data/cars-malformed.csv new file mode 100644 index 0000000000..cfa378c01f --- /dev/null +++ b/sql/core/src/test/resources/test-data/cars-malformed.csv @@ -0,0 +1,6 @@ +~ All the rows here are malformed having tokens more than the schema (header). +year,make,model,comment,blank +"2012","Tesla","S","No comment",,null,null + +1997,Ford,E350,"Go get one now they are going fast",,null,null +2015,Chevy,,,, diff --git a/sql/core/src/test/resources/test-data/cars-null.csv b/sql/core/src/test/resources/test-data/cars-null.csv new file mode 100644 index 0000000000..130c0b40bb --- /dev/null +++ b/sql/core/src/test/resources/test-data/cars-null.csv @@ -0,0 +1,6 @@ +year,make,model,comment,blank +"2012","Tesla","S",null, + +1997,Ford,E350,"Go get one now they are going fast", +null,Chevy,Volt + diff --git a/sql/core/src/test/resources/test-data/cars-unbalanced-quotes.csv b/sql/core/src/test/resources/test-data/cars-unbalanced-quotes.csv new file mode 100644 index 0000000000..5ea39fcbfa --- /dev/null +++ b/sql/core/src/test/resources/test-data/cars-unbalanced-quotes.csv @@ -0,0 +1,4 @@ +year,make,model,comment,blank +"2012,Tesla,S,No comment +1997,Ford,E350,Go get one now they are going fast" +"2015,"Chevy",Volt, diff --git a/sql/core/src/test/resources/test-data/cars.csv b/sql/core/src/test/resources/test-data/cars.csv new file mode 100644 index 0000000000..40ded573ad --- /dev/null +++ b/sql/core/src/test/resources/test-data/cars.csv @@ -0,0 +1,7 @@ + +year,make,model,comment,blank +"2012","Tesla","S","No comment", + +1997,Ford,E350,"Go get one now they are going fast", +2015,Chevy,Volt + diff --git a/sql/core/src/test/resources/test-data/cars.tsv b/sql/core/src/test/resources/test-data/cars.tsv new file mode 100644 index 0000000000..a7bfa9a91f --- /dev/null +++ b/sql/core/src/test/resources/test-data/cars.tsv @@ -0,0 +1,4 @@ +year make model price comment blank +2012 Tesla S "80,000.65" +1997 Ford E350 35,000 "Go get one now they are going fast" +2015 Chevy Volt 5,000.10 diff --git a/sql/core/src/test/resources/test-data/cars_iso-8859-1.csv b/sql/core/src/test/resources/test-data/cars_iso-8859-1.csv new file mode 100644 index 0000000000..c51b6c5901 --- /dev/null +++ b/sql/core/src/test/resources/test-data/cars_iso-8859-1.csv @@ -0,0 +1,6 @@ +yearmakemodelcommentblank +"2012""Tesla""S""No comment" + +1997FordE350"Go get one now they are oing fast" +2015ChevyVolt + diff --git a/sql/core/src/test/resources/test-data/comments.csv b/sql/core/src/test/resources/test-data/comments.csv new file mode 100644 index 0000000000..6275be7285 --- /dev/null +++ b/sql/core/src/test/resources/test-data/comments.csv @@ -0,0 +1,6 @@ +~ Version 1.0 +~ Using a non-standard comment char to test CSV parser defaults are overridden +1,2,3,4,5.01,2015-08-20 15:57:00 +6,7,8,9,0,2015-08-21 16:58:01 +~0,9,8,7,6,2015-08-22 17:59:02 +1,2,3,4,5,2015-08-23 18:00:42 diff --git a/sql/core/src/test/resources/test-data/dates.csv b/sql/core/src/test/resources/test-data/dates.csv new file mode 100644 index 0000000000..9ee99c31b3 --- /dev/null +++ b/sql/core/src/test/resources/test-data/dates.csv @@ -0,0 +1,4 @@ +date +26/08/2015 18:00 +27/10/2014 18:30 +28/01/2016 20:00 diff --git a/sql/core/src/test/resources/test-data/dec-in-fixed-len.parquet b/sql/core/src/test/resources/test-data/dec-in-fixed-len.parquet new file mode 100644 index 0000000000..6ad37d5639 Binary files /dev/null and b/sql/core/src/test/resources/test-data/dec-in-fixed-len.parquet differ diff --git a/sql/core/src/test/resources/test-data/dec-in-i32.parquet b/sql/core/src/test/resources/test-data/dec-in-i32.parquet new file mode 100755 index 0000000000..bb5d4af8dd Binary files /dev/null and b/sql/core/src/test/resources/test-data/dec-in-i32.parquet differ diff --git a/sql/core/src/test/resources/test-data/dec-in-i64.parquet b/sql/core/src/test/resources/test-data/dec-in-i64.parquet new file mode 100755 index 0000000000..e07c4a0ad9 Binary files /dev/null and b/sql/core/src/test/resources/test-data/dec-in-i64.parquet differ diff --git a/sql/core/src/test/resources/test-data/decimal.csv b/sql/core/src/test/resources/test-data/decimal.csv new file mode 100644 index 0000000000..870f6aaf1b --- /dev/null +++ b/sql/core/src/test/resources/test-data/decimal.csv @@ -0,0 +1,7 @@ +~ decimal field has integer, integer and decimal values. The last value cannot fit to a long +~ long field has integer, long and integer values. +~ double field has double, double and decimal values. +decimal,long,double +1,1,0.1 +1,9223372036854775807,1.0 +92233720368547758070,1,92233720368547758070 diff --git a/sql/core/src/test/resources/test-data/disable_comments.csv b/sql/core/src/test/resources/test-data/disable_comments.csv new file mode 100644 index 0000000000..304d406e4d --- /dev/null +++ b/sql/core/src/test/resources/test-data/disable_comments.csv @@ -0,0 +1,2 @@ +#1,2,3 +4,5,6 diff --git a/sql/core/src/test/resources/test-data/empty.csv b/sql/core/src/test/resources/test-data/empty.csv new file mode 100644 index 0000000000..e69de29bb2 diff --git a/sql/core/src/test/resources/test-data/nested-array-struct.parquet b/sql/core/src/test/resources/test-data/nested-array-struct.parquet new file mode 100644 index 0000000000..41a43fa35d Binary files /dev/null and b/sql/core/src/test/resources/test-data/nested-array-struct.parquet differ diff --git a/sql/core/src/test/resources/test-data/numbers.csv b/sql/core/src/test/resources/test-data/numbers.csv new file mode 100644 index 0000000000..af8feac784 --- /dev/null +++ b/sql/core/src/test/resources/test-data/numbers.csv @@ -0,0 +1,9 @@ +int,long,float,double +8,1000000,1.042,23848545.0374 +--,34232323,98.343,184721.23987223 +34,--,98.343,184721.23987223 +34,43323123,--,184721.23987223 +34,43323123,223823.9484,-- +34,43323123,223823.NAN,NAN +34,43323123,223823.INF,INF +34,43323123,223823.-INF,-INF diff --git a/sql/core/src/test/resources/test-data/old-repeated-int.parquet b/sql/core/src/test/resources/test-data/old-repeated-int.parquet new file mode 100644 index 0000000000..520922f73e Binary files /dev/null and b/sql/core/src/test/resources/test-data/old-repeated-int.parquet differ diff --git a/sql/core/src/test/resources/test-data/old-repeated-message.parquet b/sql/core/src/test/resources/test-data/old-repeated-message.parquet new file mode 100644 index 0000000000..548db99162 Binary files /dev/null and b/sql/core/src/test/resources/test-data/old-repeated-message.parquet differ diff --git a/sql/core/src/test/resources/test-data/parquet-thrift-compat.snappy.parquet b/sql/core/src/test/resources/test-data/parquet-thrift-compat.snappy.parquet new file mode 100644 index 0000000000..837e4876ee Binary files /dev/null and b/sql/core/src/test/resources/test-data/parquet-thrift-compat.snappy.parquet differ diff --git a/sql/core/src/test/resources/test-data/proto-repeated-string.parquet b/sql/core/src/test/resources/test-data/proto-repeated-string.parquet new file mode 100644 index 0000000000..8a7eea601d Binary files /dev/null and b/sql/core/src/test/resources/test-data/proto-repeated-string.parquet differ diff --git a/sql/core/src/test/resources/test-data/proto-repeated-struct.parquet b/sql/core/src/test/resources/test-data/proto-repeated-struct.parquet new file mode 100644 index 0000000000..c29eee35c3 Binary files /dev/null and b/sql/core/src/test/resources/test-data/proto-repeated-struct.parquet differ diff --git a/sql/core/src/test/resources/test-data/proto-struct-with-array-many.parquet b/sql/core/src/test/resources/test-data/proto-struct-with-array-many.parquet new file mode 100644 index 0000000000..ff9809675f Binary files /dev/null and b/sql/core/src/test/resources/test-data/proto-struct-with-array-many.parquet differ diff --git a/sql/core/src/test/resources/test-data/proto-struct-with-array.parquet b/sql/core/src/test/resources/test-data/proto-struct-with-array.parquet new file mode 100644 index 0000000000..325a8370ad Binary files /dev/null and b/sql/core/src/test/resources/test-data/proto-struct-with-array.parquet differ diff --git a/sql/core/src/test/resources/test-data/simple_sparse.csv b/sql/core/src/test/resources/test-data/simple_sparse.csv new file mode 100644 index 0000000000..02d29cabf9 --- /dev/null +++ b/sql/core/src/test/resources/test-data/simple_sparse.csv @@ -0,0 +1,5 @@ +A,B,C,D +1,,, +,1,, +,,1, +,,,1 diff --git a/sql/core/src/test/resources/test-data/text-partitioned/year=2014/data.txt b/sql/core/src/test/resources/test-data/text-partitioned/year=2014/data.txt new file mode 100644 index 0000000000..e2719428bb --- /dev/null +++ b/sql/core/src/test/resources/test-data/text-partitioned/year=2014/data.txt @@ -0,0 +1 @@ +2014-test diff --git a/sql/core/src/test/resources/test-data/text-partitioned/year=2015/data.txt b/sql/core/src/test/resources/test-data/text-partitioned/year=2015/data.txt new file mode 100644 index 0000000000..b8c03daa8c --- /dev/null +++ b/sql/core/src/test/resources/test-data/text-partitioned/year=2015/data.txt @@ -0,0 +1 @@ +2015-test diff --git a/sql/core/src/test/resources/test-data/text-suite.txt b/sql/core/src/test/resources/test-data/text-suite.txt new file mode 100644 index 0000000000..e8fd967197 --- /dev/null +++ b/sql/core/src/test/resources/test-data/text-suite.txt @@ -0,0 +1,4 @@ +This is a test file for the text data source +1+1 +数据砖头 +"doh" diff --git a/sql/core/src/test/resources/test-data/text-suite2.txt b/sql/core/src/test/resources/test-data/text-suite2.txt new file mode 100644 index 0000000000..f9d498c804 --- /dev/null +++ b/sql/core/src/test/resources/test-data/text-suite2.txt @@ -0,0 +1 @@ +This is another file for testing multi path loading. diff --git a/sql/core/src/test/resources/test-data/unescaped-quotes.csv b/sql/core/src/test/resources/test-data/unescaped-quotes.csv new file mode 100644 index 0000000000..7c68055575 --- /dev/null +++ b/sql/core/src/test/resources/test-data/unescaped-quotes.csv @@ -0,0 +1,2 @@ +"a"b,ccc,ddd +ab,cc"c,ddd" diff --git a/sql/core/src/test/resources/text-partitioned/year=2014/data.txt b/sql/core/src/test/resources/text-partitioned/year=2014/data.txt deleted file mode 100644 index e2719428bb..0000000000 --- a/sql/core/src/test/resources/text-partitioned/year=2014/data.txt +++ /dev/null @@ -1 +0,0 @@ -2014-test diff --git a/sql/core/src/test/resources/text-partitioned/year=2015/data.txt b/sql/core/src/test/resources/text-partitioned/year=2015/data.txt deleted file mode 100644 index b8c03daa8c..0000000000 --- a/sql/core/src/test/resources/text-partitioned/year=2015/data.txt +++ /dev/null @@ -1 +0,0 @@ -2015-test diff --git a/sql/core/src/test/resources/text-suite.txt b/sql/core/src/test/resources/text-suite.txt deleted file mode 100644 index e8fd967197..0000000000 --- a/sql/core/src/test/resources/text-suite.txt +++ /dev/null @@ -1,4 +0,0 @@ -This is a test file for the text data source -1+1 -数据砖头 -"doh" diff --git a/sql/core/src/test/resources/text-suite2.txt b/sql/core/src/test/resources/text-suite2.txt deleted file mode 100644 index f9d498c804..0000000000 --- a/sql/core/src/test/resources/text-suite2.txt +++ /dev/null @@ -1 +0,0 @@ -This is another file for testing multi path loading. diff --git a/sql/core/src/test/resources/unescaped-quotes.csv b/sql/core/src/test/resources/unescaped-quotes.csv deleted file mode 100644 index 7c68055575..0000000000 --- a/sql/core/src/test/resources/unescaped-quotes.csv +++ /dev/null @@ -1,2 +0,0 @@ -"a"b,ccc,ddd -ab,cc"c,ddd" diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index e14e84e0a7..ce1f7c5082 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -677,7 +677,8 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { } test("create temporary view using") { - val csvFile = Thread.currentThread().getContextClassLoader.getResource("cars.csv").toString() + val csvFile = + Thread.currentThread().getContextClassLoader.getResource("test-data/cars.csv").toString withView("testview") { sql(s"CREATE OR REPLACE TEMPORARY VIEW testview (c1: String, c2: String) USING " + "org.apache.spark.sql.execution.datasources.csv.CSVFileFormat " + diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala index 311f1fa8d2..8cd76ddf20 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala @@ -33,23 +33,23 @@ import org.apache.spark.sql.types._ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils { import testImplicits._ - private val carsFile = "cars.csv" - private val carsMalformedFile = "cars-malformed.csv" - private val carsFile8859 = "cars_iso-8859-1.csv" - private val carsTsvFile = "cars.tsv" - private val carsAltFile = "cars-alternative.csv" - private val carsUnbalancedQuotesFile = "cars-unbalanced-quotes.csv" - private val carsNullFile = "cars-null.csv" - private val carsBlankColName = "cars-blank-column-name.csv" - private val emptyFile = "empty.csv" - private val commentsFile = "comments.csv" - private val disableCommentsFile = "disable_comments.csv" - private val boolFile = "bool.csv" - private val decimalFile = "decimal.csv" - private val simpleSparseFile = "simple_sparse.csv" - private val numbersFile = "numbers.csv" - private val datesFile = "dates.csv" - private val unescapedQuotesFile = "unescaped-quotes.csv" + private val carsFile = "test-data/cars.csv" + private val carsMalformedFile = "test-data/cars-malformed.csv" + private val carsFile8859 = "test-data/cars_iso-8859-1.csv" + private val carsTsvFile = "test-data/cars.tsv" + private val carsAltFile = "test-data/cars-alternative.csv" + private val carsUnbalancedQuotesFile = "test-data/cars-unbalanced-quotes.csv" + private val carsNullFile = "test-data/cars-null.csv" + private val carsBlankColName = "test-data/cars-blank-column-name.csv" + private val emptyFile = "test-data/empty.csv" + private val commentsFile = "test-data/comments.csv" + private val disableCommentsFile = "test-data/disable_comments.csv" + private val boolFile = "test-data/bool.csv" + private val decimalFile = "test-data/decimal.csv" + private val simpleSparseFile = "test-data/simple_sparse.csv" + private val numbersFile = "test-data/numbers.csv" + private val datesFile = "test-data/dates.csv" + private val unescapedQuotesFile = "test-data/unescaped-quotes.csv" private def testFile(fileName: String): String = { Thread.currentThread().getContextClassLoader.getResource(fileName).toString diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala index 0f74094699..4aa046bd91 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala @@ -568,7 +568,7 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSQLContext { withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> vectorized) { checkAnswer( // Decimal column in this file is encoded using plain dictionary - readResourceParquetFile("dec-in-i32.parquet"), + readResourceParquetFile("test-data/dec-in-i32.parquet"), spark.range(1 << 4).select('id % 10 cast DecimalType(5, 2) as 'i32_dec)) } } @@ -579,7 +579,7 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSQLContext { withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> vectorized) { checkAnswer( // Decimal column in this file is encoded using plain dictionary - readResourceParquetFile("dec-in-i64.parquet"), + readResourceParquetFile("test-data/dec-in-i64.parquet"), spark.range(1 << 4).select('id % 10 cast DecimalType(10, 2) as 'i64_dec)) } } @@ -590,7 +590,7 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSQLContext { withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> vectorized) { checkAnswer( // Decimal column in this file is encoded using plain dictionary - readResourceParquetFile("dec-in-fixed-len.parquet"), + readResourceParquetFile("test-data/dec-in-fixed-len.parquet"), spark.range(1 << 4).select('id % 10 cast DecimalType(10, 2) as 'fixed_len_dec)) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetProtobufCompatibilitySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetProtobufCompatibilitySuite.scala index 98333e58ca..fa88019298 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetProtobufCompatibilitySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetProtobufCompatibilitySuite.scala @@ -22,12 +22,12 @@ import org.apache.spark.sql.test.SharedSQLContext class ParquetProtobufCompatibilitySuite extends ParquetCompatibilityTest with SharedSQLContext { test("unannotated array of primitive type") { - checkAnswer(readResourceParquetFile("old-repeated-int.parquet"), Row(Seq(1, 2, 3))) + checkAnswer(readResourceParquetFile("test-data/old-repeated-int.parquet"), Row(Seq(1, 2, 3))) } test("unannotated array of struct") { checkAnswer( - readResourceParquetFile("old-repeated-message.parquet"), + readResourceParquetFile("test-data/old-repeated-message.parquet"), Row( Seq( Row("First inner", null, null), @@ -35,14 +35,14 @@ class ParquetProtobufCompatibilitySuite extends ParquetCompatibilityTest with Sh Row(null, null, "Third inner")))) checkAnswer( - readResourceParquetFile("proto-repeated-struct.parquet"), + readResourceParquetFile("test-data/proto-repeated-struct.parquet"), Row( Seq( Row("0 - 1", "0 - 2", "0 - 3"), Row("1 - 1", "1 - 2", "1 - 3")))) checkAnswer( - readResourceParquetFile("proto-struct-with-array-many.parquet"), + readResourceParquetFile("test-data/proto-struct-with-array-many.parquet"), Seq( Row( Seq( @@ -60,13 +60,13 @@ class ParquetProtobufCompatibilitySuite extends ParquetCompatibilityTest with Sh test("struct with unannotated array") { checkAnswer( - readResourceParquetFile("proto-struct-with-array.parquet"), + readResourceParquetFile("test-data/proto-struct-with-array.parquet"), Row(10, 9, Seq.empty, null, Row(9), Seq(Row(9), Row(10)))) } test("unannotated array of struct with unannotated array") { checkAnswer( - readResourceParquetFile("nested-array-struct.parquet"), + readResourceParquetFile("test-data/nested-array-struct.parquet"), Seq( Row(2, Seq(Row(1, Seq(Row(3))))), Row(5, Seq(Row(4, Seq(Row(6))))), @@ -75,7 +75,7 @@ class ParquetProtobufCompatibilitySuite extends ParquetCompatibilityTest with Sh test("unannotated array of string") { checkAnswer( - readResourceParquetFile("proto-repeated-string.parquet"), + readResourceParquetFile("test-data/proto-repeated-string.parquet"), Seq( Row(Seq("hello", "world")), Row(Seq("good", "bye")), diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetThriftCompatibilitySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetThriftCompatibilitySuite.scala index ff5706999a..4157a5b46d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetThriftCompatibilitySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetThriftCompatibilitySuite.scala @@ -23,8 +23,8 @@ import org.apache.spark.sql.test.SharedSQLContext class ParquetThriftCompatibilitySuite extends ParquetCompatibilityTest with SharedSQLContext { import ParquetCompatibilityTest._ - private val parquetFilePath = - Thread.currentThread().getContextClassLoader.getResource("parquet-thrift-compat.snappy.parquet") + private val parquetFilePath = Thread.currentThread().getContextClassLoader.getResource( + "test-data/parquet-thrift-compat.snappy.parquet") test("Read Parquet file generated by parquet-thrift") { logInfo( diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/TextSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/TextSuite.scala index 71d3da9158..d11c2acb81 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/TextSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/TextSuite.scala @@ -66,7 +66,7 @@ class TextSuite extends QueryTest with SharedSQLContext { test("reading partitioned data using read.textFile()") { val partitionedData = Thread.currentThread().getContextClassLoader - .getResource("text-partitioned").toString + .getResource("test-data/text-partitioned").toString val ds = spark.read.textFile(partitionedData) val data = ds.collect() @@ -76,7 +76,7 @@ class TextSuite extends QueryTest with SharedSQLContext { test("support for partitioned reading using read.text()") { val partitionedData = Thread.currentThread().getContextClassLoader - .getResource("text-partitioned").toString + .getResource("test-data/text-partitioned").toString val df = spark.read.text(partitionedData) val data = df.filter("year = '2015'").select("value").collect() @@ -155,7 +155,7 @@ class TextSuite extends QueryTest with SharedSQLContext { } private def testFile: String = { - Thread.currentThread().getContextClassLoader.getResource("text-suite.txt").toString + Thread.currentThread().getContextClassLoader.getResource("test-data/text-suite.txt").toString } /** Verifies data and schema. */ -- cgit v1.2.3