diff options
author | Yuming Wang <wgyumg@gmail.com> | 2017-02-28 10:13:42 +0000 |
---|---|---|
committer | Sean Owen <sowen@cloudera.com> | 2017-02-28 10:13:42 +0000 |
commit | 9b8eca65dcf68129470ead39362ce870ffb0bb1d (patch) | |
tree | 282c7af7443b31416ff3f9821615f18635de916b /python/pyspark | |
parent | a350bc16d36c58b48ac01f0258678ffcdb77e793 (diff) | |
download | spark-9b8eca65dcf68129470ead39362ce870ffb0bb1d.tar.gz spark-9b8eca65dcf68129470ead39362ce870ffb0bb1d.tar.bz2 spark-9b8eca65dcf68129470ead39362ce870ffb0bb1d.zip |
[SPARK-19660][CORE][SQL] Replace the configuration property names that are deprecated in the version of Hadoop 2.6
## What changes were proposed in this pull request?
Replace all the Hadoop deprecated configuration property names according to [DeprecatedProperties](https://hadoop.apache.org/docs/r2.6.0/hadoop-project-dist/hadoop-common/DeprecatedProperties.html).
except:
https://github.com/apache/spark/blob/v2.1.0/python/pyspark/sql/tests.py#L1533
https://github.com/apache/spark/blob/v2.1.0/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala#L987
https://github.com/apache/spark/blob/v2.1.0/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala#L45
https://github.com/apache/spark/blob/v2.1.0/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala#L614
## How was this patch tested?
Existing tests
Author: Yuming Wang <wgyumg@gmail.com>
Closes #16990 from wangyum/HadoopDeprecatedProperties.
Diffstat (limited to 'python/pyspark')
-rw-r--r-- | python/pyspark/tests.py | 47 |
1 files changed, 24 insertions, 23 deletions
diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py index e908b1e739..a2aead7e6b 100644 --- a/python/pyspark/tests.py +++ b/python/pyspark/tests.py @@ -1347,7 +1347,7 @@ class InputFormatTests(ReusedPySparkTestCase): self.assertEqual(ints, ei) hellopath = os.path.join(SPARK_HOME, "python/test_support/hello/hello.txt") - oldconf = {"mapred.input.dir": hellopath} + oldconf = {"mapreduce.input.fileinputformat.inputdir": hellopath} hello = self.sc.hadoopRDD("org.apache.hadoop.mapred.TextInputFormat", "org.apache.hadoop.io.LongWritable", "org.apache.hadoop.io.Text", @@ -1366,7 +1366,7 @@ class InputFormatTests(ReusedPySparkTestCase): self.assertEqual(ints, ei) hellopath = os.path.join(SPARK_HOME, "python/test_support/hello/hello.txt") - newconf = {"mapred.input.dir": hellopath} + newconf = {"mapreduce.input.fileinputformat.inputdir": hellopath} hello = self.sc.newAPIHadoopRDD("org.apache.hadoop.mapreduce.lib.input.TextInputFormat", "org.apache.hadoop.io.LongWritable", "org.apache.hadoop.io.Text", @@ -1515,12 +1515,12 @@ class OutputFormatTests(ReusedPySparkTestCase): conf = { "mapred.output.format.class": "org.apache.hadoop.mapred.SequenceFileOutputFormat", - "mapred.output.key.class": "org.apache.hadoop.io.IntWritable", - "mapred.output.value.class": "org.apache.hadoop.io.MapWritable", - "mapred.output.dir": basepath + "/olddataset/" + "mapreduce.job.output.key.class": "org.apache.hadoop.io.IntWritable", + "mapreduce.job.output.value.class": "org.apache.hadoop.io.MapWritable", + "mapreduce.output.fileoutputformat.outputdir": basepath + "/olddataset/" } self.sc.parallelize(dict_data).saveAsHadoopDataset(conf) - input_conf = {"mapred.input.dir": basepath + "/olddataset/"} + input_conf = {"mapreduce.input.fileinputformat.inputdir": basepath + "/olddataset/"} result = self.sc.hadoopRDD( "org.apache.hadoop.mapred.SequenceFileInputFormat", "org.apache.hadoop.io.IntWritable", @@ -1547,14 +1547,14 @@ class OutputFormatTests(ReusedPySparkTestCase): self.assertEqual(result, data) conf = { - "mapreduce.outputformat.class": + "mapreduce.job.outputformat.class": "org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat", - "mapred.output.key.class": "org.apache.hadoop.io.IntWritable", - "mapred.output.value.class": "org.apache.hadoop.io.Text", - "mapred.output.dir": basepath + "/newdataset/" + "mapreduce.job.output.key.class": "org.apache.hadoop.io.IntWritable", + "mapreduce.job.output.value.class": "org.apache.hadoop.io.Text", + "mapreduce.output.fileoutputformat.outputdir": basepath + "/newdataset/" } self.sc.parallelize(data).saveAsNewAPIHadoopDataset(conf) - input_conf = {"mapred.input.dir": basepath + "/newdataset/"} + input_conf = {"mapreduce.input.fileinputformat.inputdir": basepath + "/newdataset/"} new_dataset = sorted(self.sc.newAPIHadoopRDD( "org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat", "org.apache.hadoop.io.IntWritable", @@ -1584,16 +1584,16 @@ class OutputFormatTests(ReusedPySparkTestCase): self.assertEqual(result, array_data) conf = { - "mapreduce.outputformat.class": + "mapreduce.job.outputformat.class": "org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat", - "mapred.output.key.class": "org.apache.hadoop.io.IntWritable", - "mapred.output.value.class": "org.apache.spark.api.python.DoubleArrayWritable", - "mapred.output.dir": basepath + "/newdataset/" + "mapreduce.job.output.key.class": "org.apache.hadoop.io.IntWritable", + "mapreduce.job.output.value.class": "org.apache.spark.api.python.DoubleArrayWritable", + "mapreduce.output.fileoutputformat.outputdir": basepath + "/newdataset/" } self.sc.parallelize(array_data).saveAsNewAPIHadoopDataset( conf, valueConverter="org.apache.spark.api.python.DoubleArrayToWritableConverter") - input_conf = {"mapred.input.dir": basepath + "/newdataset/"} + input_conf = {"mapreduce.input.fileinputformat.inputdir": basepath + "/newdataset/"} new_dataset = sorted(self.sc.newAPIHadoopRDD( "org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat", "org.apache.hadoop.io.IntWritable", @@ -1663,18 +1663,19 @@ class OutputFormatTests(ReusedPySparkTestCase): conf4 = { "mapred.output.format.class": "org.apache.hadoop.mapred.SequenceFileOutputFormat", - "mapred.output.key.class": "org.apache.hadoop.io.IntWritable", - "mapred.output.value.class": "org.apache.hadoop.io.IntWritable", - "mapred.output.dir": basepath + "/reserialize/dataset"} + "mapreduce.job.output.key.class": "org.apache.hadoop.io.IntWritable", + "mapreduce.job.output.value.class": "org.apache.hadoop.io.IntWritable", + "mapreduce.output.fileoutputformat.outputdir": basepath + "/reserialize/dataset"} rdd.saveAsHadoopDataset(conf4) result4 = sorted(self.sc.sequenceFile(basepath + "/reserialize/dataset").collect()) self.assertEqual(result4, data) - conf5 = {"mapreduce.outputformat.class": + conf5 = {"mapreduce.job.outputformat.class": "org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat", - "mapred.output.key.class": "org.apache.hadoop.io.IntWritable", - "mapred.output.value.class": "org.apache.hadoop.io.IntWritable", - "mapred.output.dir": basepath + "/reserialize/newdataset"} + "mapreduce.job.output.key.class": "org.apache.hadoop.io.IntWritable", + "mapreduce.job.output.value.class": "org.apache.hadoop.io.IntWritable", + "mapreduce.output.fileoutputformat.outputdir": basepath + "/reserialize/newdataset" + } rdd.saveAsNewAPIHadoopDataset(conf5) result5 = sorted(self.sc.sequenceFile(basepath + "/reserialize/newdataset").collect()) self.assertEqual(result5, data) |