diff options
author | q00251598 <qiyadong@huawei.com> | 2015-03-02 10:13:11 -0800 |
---|---|---|
committer | Michael Armbrust <michael@databricks.com> | 2015-03-02 10:13:11 -0800 |
commit | 9ce12aaf283a2793e719bdc956dd858922636e8d (patch) | |
tree | 41bfd77744af24af670c99f5f2809772e8fc5861 /sql/core | |
parent | 95ac68bf127b5370c13d6bc15adbda78228829cc (diff) | |
download | spark-9ce12aaf283a2793e719bdc956dd858922636e8d.tar.gz spark-9ce12aaf283a2793e719bdc956dd858922636e8d.tar.bz2 spark-9ce12aaf283a2793e719bdc956dd858922636e8d.zip |
[SPARK-5741][SQL] Support the path contains comma in HiveContext
When run ```select * from nzhang_part where hr = 'file,';```, it throws exception ```java.lang.IllegalArgumentException: Can not create a Path from an empty string```
. Because the path of hdfs contains comma, and FileInputFormat.setInputPaths will split path by comma.
### SQL
```
set hive.merge.mapfiles=true;
set hive.merge.mapredfiles=true;
set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
create table nzhang_part like srcpart;
insert overwrite table nzhang_part partition (ds='2010-08-15', hr) select key, value, hr from srcpart where ds='2008-04-08';
insert overwrite table nzhang_part partition (ds='2010-08-15', hr=11) select key, value from srcpart where ds='2008-04-08';
insert overwrite table nzhang_part partition (ds='2010-08-15', hr)
select * from (
select key, value, hr from srcpart where ds='2008-04-08'
union all
select '1' as key, '1' as value, 'file,' as hr from src limit 1) s;
select * from nzhang_part where hr = 'file,';
```
### Error Log
```
15/02/10 14:33:16 ERROR SparkSQLDriver: Failed in [select * from nzhang_part where hr = 'file,']
java.lang.IllegalArgumentException: Can not create a Path from an empty string
at org.apache.hadoop.fs.Path.checkPathArg(Path.java:127)
at org.apache.hadoop.fs.Path.<init>(Path.java:135)
at org.apache.hadoop.util.StringUtils.stringToPath(StringUtils.java:241)
at org.apache.hadoop.mapred.FileInputFormat.setInputPaths(FileInputFormat.java:400)
at org.apache.spark.sql.hive.HadoopTableReader$.initializeLocalJobConfFunc(TableReader.scala:251)
at org.apache.spark.sql.hive.HadoopTableReader$$anonfun$11.apply(TableReader.scala:229)
at org.apache.spark.sql.hive.HadoopTableReader$$anonfun$11.apply(TableReader.scala:229)
at org.apache.spark.rdd.HadoopRDD$$anonfun$getJobConf$6.apply(HadoopRDD.scala:172)
at org.apache.spark.rdd.HadoopRDD$$anonfun$getJobConf$6.apply(HadoopRDD.scala:172)
at scala.Option.map(Option.scala:145)
at org.apache.spark.rdd.HadoopRDD.getJobConf(HadoopRDD.scala:172)
at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:196)
Author: q00251598 <qiyadong@huawei.com>
Closes #4532 from watermen/SPARK-5741 and squashes the following commits:
9758ab1 [q00251598] fix bug
1db1a1c [q00251598] use setInputPaths(Job job, Path... inputPaths)
b788a72 [q00251598] change FileInputFormat.setInputPaths to jobConf.set and add test suite
Diffstat (limited to 'sql/core')
0 files changed, 0 insertions, 0 deletions