aboutsummaryrefslogtreecommitdiff
path: root/examples/src/main/python/hbase_inputformat.py
diff options
context:
space:
mode:
Diffstat (limited to 'examples/src/main/python/hbase_inputformat.py')
-rw-r--r--examples/src/main/python/hbase_inputformat.py10
1 files changed, 7 insertions, 3 deletions
diff --git a/examples/src/main/python/hbase_inputformat.py b/examples/src/main/python/hbase_inputformat.py
index befacee0de..3b16010f1c 100644
--- a/examples/src/main/python/hbase_inputformat.py
+++ b/examples/src/main/python/hbase_inputformat.py
@@ -51,7 +51,8 @@ if __name__ == "__main__":
Usage: hbase_inputformat <host> <table>
Run with example jar:
- ./bin/spark-submit --driver-class-path /path/to/example/jar /path/to/examples/hbase_inputformat.py <host> <table>
+ ./bin/spark-submit --driver-class-path /path/to/example/jar \
+ /path/to/examples/hbase_inputformat.py <host> <table>
Assumes you have some data in HBase already, running on <host>, in <table>
"""
exit(-1)
@@ -61,12 +62,15 @@ if __name__ == "__main__":
sc = SparkContext(appName="HBaseInputFormat")
conf = {"hbase.zookeeper.quorum": host, "hbase.mapreduce.inputtable": table}
+ keyConv = "org.apache.spark.examples.pythonconverters.ImmutableBytesWritableToStringConverter"
+ valueConv = "org.apache.spark.examples.pythonconverters.HBaseResultToStringConverter"
+
hbase_rdd = sc.newAPIHadoopRDD(
"org.apache.hadoop.hbase.mapreduce.TableInputFormat",
"org.apache.hadoop.hbase.io.ImmutableBytesWritable",
"org.apache.hadoop.hbase.client.Result",
- keyConverter="org.apache.spark.examples.pythonconverters.ImmutableBytesWritableToStringConverter",
- valueConverter="org.apache.spark.examples.pythonconverters.HBaseResultToStringConverter",
+ keyConverter=keyConv,
+ valueConverter=valueConv,
conf=conf)
output = hbase_rdd.collect()
for (k, v) in output: