aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRohit Rai <rohit@tuplejump.com>2013-06-13 14:05:46 +0530
committerRohit Rai <rohit@tuplejump.com>2013-06-13 14:05:46 +0530
commitb5b12823faf62766d880e497c90b44b21f5a433a (patch)
treed7b897dff3c31f69e82cdd177aca48f49e23cc17
parentb104c7f5c7e2b173fe1b10035efbc00e43df13ec (diff)
downloadspark-b5b12823faf62766d880e497c90b44b21f5a433a.tar.gz
spark-b5b12823faf62766d880e497c90b44b21f5a433a.tar.bz2
spark-b5b12823faf62766d880e497c90b44b21f5a433a.zip
Fixing the style as per feedback
-rw-r--r--examples/src/main/scala/spark/examples/CassandraTest.scala72
1 files changed, 37 insertions, 35 deletions
diff --git a/examples/src/main/scala/spark/examples/CassandraTest.scala b/examples/src/main/scala/spark/examples/CassandraTest.scala
index 2cc62b9fe9..0fe1833e83 100644
--- a/examples/src/main/scala/spark/examples/CassandraTest.scala
+++ b/examples/src/main/scala/spark/examples/CassandraTest.scala
@@ -1,9 +1,11 @@
package spark.examples
import org.apache.hadoop.mapreduce.Job
-import org.apache.cassandra.hadoop.{ColumnFamilyOutputFormat, ConfigHelper, ColumnFamilyInputFormat}
+import org.apache.cassandra.hadoop.ColumnFamilyOutputFormat
+import org.apache.cassandra.hadoop.ConfigHelper
+import org.apache.cassandra.hadoop.ColumnFamilyInputFormat
import org.apache.cassandra.thrift._
-import spark.{RDD, SparkContext}
+import spark.SparkContext
import spark.SparkContext._
import java.nio.ByteBuffer
import java.util.SortedMap
@@ -12,9 +14,9 @@ import org.apache.cassandra.utils.ByteBufferUtil
import scala.collection.JavaConversions._
-
/*
- * This example demonstrates using Spark with Cassandra with the New Hadoop API and Cassandra support for Hadoop.
+ * This example demonstrates using Spark with Cassandra with the New Hadoop API and Cassandra
+ * support for Hadoop.
*
* To run this example, run this file with the following command params -
* <spark_master> <cassandra_node> <cassandra_port>
@@ -26,32 +28,31 @@ import scala.collection.JavaConversions._
* 1. You have already created a keyspace called casDemo and it has a column family named Words
* 2. There are column family has a column named "para" which has test content.
*
- * You can create the content by running the following script at the bottom of this file with cassandra-cli.
+ * You can create the content by running the following script at the bottom of this file with
+ * cassandra-cli.
*
*/
object CassandraTest {
+
def main(args: Array[String]) {
- //Get a SparkContext
+ // Get a SparkContext
val sc = new SparkContext(args(0), "casDemo")
- //Build the job configuration with ConfigHelper provided by Cassandra
+ // Build the job configuration with ConfigHelper provided by Cassandra
val job = new Job()
job.setInputFormatClass(classOf[ColumnFamilyInputFormat])
- ConfigHelper.setInputInitialAddress(job.getConfiguration(), args(1))
-
- ConfigHelper.setInputRpcPort(job.getConfiguration(), args(2))
-
- ConfigHelper.setOutputInitialAddress(job.getConfiguration(), args(1))
-
- ConfigHelper.setOutputRpcPort(job.getConfiguration(), args(2))
+ val host: String = args(1)
+ val port: String = args(2)
+ ConfigHelper.setInputInitialAddress(job.getConfiguration(), host)
+ ConfigHelper.setInputRpcPort(job.getConfiguration(), port)
+ ConfigHelper.setOutputInitialAddress(job.getConfiguration(), host)
+ ConfigHelper.setOutputRpcPort(job.getConfiguration(), port)
ConfigHelper.setInputColumnFamily(job.getConfiguration(), "casDemo", "Words")
-
ConfigHelper.setOutputColumnFamily(job.getConfiguration(), "casDemo", "WordCount")
-
val predicate = new SlicePredicate()
val sliceRange = new SliceRange()
sliceRange.setStart(Array.empty[Byte])
@@ -60,11 +61,11 @@ object CassandraTest {
ConfigHelper.setInputSlicePredicate(job.getConfiguration(), predicate)
ConfigHelper.setInputPartitioner(job.getConfiguration(), "Murmur3Partitioner")
-
ConfigHelper.setOutputPartitioner(job.getConfiguration(), "Murmur3Partitioner")
- //Make a new Hadoop RDD
- val casRdd = sc.newAPIHadoopRDD(job.getConfiguration(),
+ // Make a new Hadoop RDD
+ val casRdd = sc.newAPIHadoopRDD(
+ job.getConfiguration(),
classOf[ColumnFamilyInputFormat],
classOf[ByteBuffer],
classOf[SortedMap[ByteBuffer, IColumn]])
@@ -76,7 +77,7 @@ object CassandraTest {
}
}
- //Lets get the word count in paras
+ // Lets get the word count in paras
val counts = paraRdd.flatMap(p => p.split(" ")).map(word => (word, 1)).reduceByKey(_ + _)
counts.collect().foreach {
@@ -95,20 +96,17 @@ object CassandraTest {
colCount.setValue(ByteBufferUtil.bytes(count.toLong))
colCount.setTimestamp(System.currentTimeMillis)
-
val outputkey = ByteBufferUtil.bytes(word + "-COUNT-" + System.currentTimeMillis)
val mutations: java.util.List[Mutation] = new Mutation() :: new Mutation() :: Nil
mutations.get(0).setColumn_or_supercolumn(new ColumnOrSuperColumn())
mutations.get(0).column_or_supercolumn.setColumn(colWord)
-
mutations.get(1).setColumn_or_supercolumn(new ColumnOrSuperColumn())
mutations.get(1).column_or_supercolumn.setColumn(colCount)
(outputkey, mutations)
}
}.saveAsNewAPIHadoopFile("casDemo", classOf[ByteBuffer], classOf[List[Mutation]],
classOf[ColumnFamilyOutputFormat], job.getConfiguration)
-
}
}
@@ -117,16 +115,20 @@ create keyspace casDemo;
use casDemo;
create column family WordCount with comparator = UTF8Type;
-update column family WordCount with column_metadata = [{column_name: word, validation_class: UTF8Type}, {column_name: wcount, validation_class: LongType}];
+update column family WordCount with column_metadata =
+ [{column_name: word, validation_class: UTF8Type},
+ {column_name: wcount, validation_class: LongType}];
create column family Words with comparator = UTF8Type;
-update column family Words with column_metadata = [{column_name: book, validation_class: UTF8Type}, {column_name: para, validation_class: UTF8Type}];
+update column family Words with column_metadata =
+ [{column_name: book, validation_class: UTF8Type},
+ {column_name: para, validation_class: UTF8Type}];
assume Words keys as utf8;
set Words['3musk001']['book'] = 'The Three Musketeers';
-set Words['3musk001']['para'] = 'On the first Monday of the month of April, 1625, the market town of
- Meung, in which the author of ROMANCE OF THE ROSE was born, appeared to
+set Words['3musk001']['para'] = 'On the first Monday of the month of April, 1625, the market
+ town of Meung, in which the author of ROMANCE OF THE ROSE was born, appeared to
be in as perfect a state of revolution as if the Huguenots had just made
a second La Rochelle of it. Many citizens, seeing the women flying
toward the High Street, leaving their children crying at the open doors,
@@ -136,8 +138,8 @@ set Words['3musk001']['para'] = 'On the first Monday of the month of April, 1625
every minute, a compact group, vociferous and full of curiosity.';
set Words['3musk002']['book'] = 'The Three Musketeers';
-set Words['3musk002']['para'] = 'In those times panics were common, and few days passed without some city
- or other registering in its archives an event of this kind. There were
+set Words['3musk002']['para'] = 'In those times panics were common, and few days passed without
+ some city or other registering in its archives an event of this kind. There were
nobles, who made war against each other; there was the king, who made
war against the cardinal; there was Spain, which made war against the
king. Then, in addition to these concealed or public, secret or open
@@ -152,8 +154,8 @@ set Words['3musk002']['para'] = 'In those times panics were common, and few days
cause of the hubbub was apparent to all';
set Words['3musk003']['book'] = 'The Three Musketeers';
-set Words['3musk003']['para'] = 'You ought, I say, then, to husband the means you have, however large
- the sum may be; but you ought also to endeavor to perfect yourself in
+set Words['3musk003']['para'] = 'You ought, I say, then, to husband the means you have, however
+ large the sum may be; but you ought also to endeavor to perfect yourself in
the exercises becoming a gentleman. I will write a letter today to the
Director of the Royal Academy, and tomorrow he will admit you without
any expense to yourself. Do not refuse this little service. Our
@@ -165,8 +167,8 @@ set Words['3musk003']['para'] = 'You ought, I say, then, to husband the means yo
set Words['thelostworld001']['book'] = 'The Lost World';
-set Words['thelostworld001']['para'] = 'She sat with that proud, delicate profile of hers outlined against the
- red curtain. How beautiful she was! And yet how aloof! We had been
+set Words['thelostworld001']['para'] = 'She sat with that proud, delicate profile of hers outlined
+ against the red curtain. How beautiful she was! And yet how aloof! We had been
friends, quite good friends; but never could I get beyond the same
comradeship which I might have established with one of my
fellow-reporters upon the Gazette,--perfectly frank, perfectly kindly,
@@ -180,8 +182,8 @@ set Words['thelostworld001']['para'] = 'She sat with that proud, delicate profil
as that--or had inherited it in that race memory which we call instinct.';
set Words['thelostworld002']['book'] = 'The Lost World';
-set Words['thelostworld002']['para'] = 'I always liked McArdle, the crabbed, old, round-backed, red-headed news
- editor, and I rather hoped that he liked me. Of course, Beaumont was
+set Words['thelostworld002']['para'] = 'I always liked McArdle, the crabbed, old, round-backed,
+ red-headed news editor, and I rather hoped that he liked me. Of course, Beaumont was
the real boss; but he lived in the rarefied atmosphere of some Olympian
height from which he could distinguish nothing smaller than an
international crisis or a split in the Cabinet. Sometimes we saw him