From f4482225c405b9cfe078deac74e4c28e2dcc97c3 Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Fri, 12 Aug 2016 10:00:58 +0100 Subject: [MINOR][DOC] Fix style in examples across documentation ## What changes were proposed in this pull request? This PR fixes the documentation as below: - Python has 4 spaces and Java and Scala has 2 spaces (See https://cwiki.apache.org/confluence/display/SPARK/Spark+Code+Style+Guide). - Avoid excessive parentheses and curly braces for anonymous functions. (See https://github.com/databricks/scala-style-guide#anonymous) ## How was this patch tested? N/A Author: hyukjinkwon Closes #14593 from HyukjinKwon/minor-documentation. --- docs/graphx-programming-guide.md | 8 +++---- docs/programming-guide.md | 4 ++-- docs/spark-standalone.md | 6 ++--- docs/streaming-custom-receivers.md | 48 ++++++++++++++++++------------------- docs/streaming-programming-guide.md | 28 +++++++++++----------- 5 files changed, 47 insertions(+), 47 deletions(-) (limited to 'docs') diff --git a/docs/graphx-programming-guide.md b/docs/graphx-programming-guide.md index bf4b968eb8..6f738f0599 100644 --- a/docs/graphx-programming-guide.md +++ b/docs/graphx-programming-guide.md @@ -421,15 +421,15 @@ val graph = Graph(users, relationships, defaultUser) // Notice that there is a user 0 (for which we have no information) connected to users // 4 (peter) and 5 (franklin). graph.triplets.map( - triplet => triplet.srcAttr._1 + " is the " + triplet.attr + " of " + triplet.dstAttr._1 - ).collect.foreach(println(_)) + triplet => triplet.srcAttr._1 + " is the " + triplet.attr + " of " + triplet.dstAttr._1 +).collect.foreach(println(_)) // Remove missing vertices as well as the edges to connected to them val validGraph = graph.subgraph(vpred = (id, attr) => attr._2 != "Missing") // The valid subgraph will disconnect users 4 and 5 by removing user 0 validGraph.vertices.collect.foreach(println(_)) validGraph.triplets.map( - triplet => triplet.srcAttr._1 + " is the " + triplet.attr + " of " + triplet.dstAttr._1 - ).collect.foreach(println(_)) + triplet => triplet.srcAttr._1 + " is the " + triplet.attr + " of " + triplet.dstAttr._1 +).collect.foreach(println(_)) {% endhighlight %} > Note in the above example only the vertex predicate is provided. The `subgraph` operator defaults diff --git a/docs/programming-guide.md b/docs/programming-guide.md index f82832905e..40287d7702 100644 --- a/docs/programming-guide.md +++ b/docs/programming-guide.md @@ -1516,8 +1516,8 @@ data.map(x -> { accum.add(x); return f(x); }); {% highlight python %} accum = sc.accumulator(0) def g(x): - accum.add(x) - return f(x) + accum.add(x) + return f(x) data.map(g) # Here, accum is still 0 because no actions have caused the `map` to be computed. {% endhighlight %} diff --git a/docs/spark-standalone.md b/docs/spark-standalone.md index 5ae63fe4e6..1097f1fabe 100644 --- a/docs/spark-standalone.md +++ b/docs/spark-standalone.md @@ -298,9 +298,9 @@ application at a time. You can cap the number of cores by setting `spark.cores.m {% highlight scala %} val conf = new SparkConf() - .setMaster(...) - .setAppName(...) - .set("spark.cores.max", "10") + .setMaster(...) + .setAppName(...) + .set("spark.cores.max", "10") val sc = new SparkContext(conf) {% endhighlight %} diff --git a/docs/streaming-custom-receivers.md b/docs/streaming-custom-receivers.md index 479140f519..fae5901e8d 100644 --- a/docs/streaming-custom-receivers.md +++ b/docs/streaming-custom-receivers.md @@ -59,8 +59,8 @@ class CustomReceiver(host: String, port: Int) } def onStop() { - // There is nothing much to do as the thread calling receive() - // is designed to stop by itself if isStopped() returns false + // There is nothing much to do as the thread calling receive() + // is designed to stop by itself if isStopped() returns false } /** Create a socket connection and receive data until receiver is stopped */ @@ -68,29 +68,29 @@ class CustomReceiver(host: String, port: Int) var socket: Socket = null var userInput: String = null try { - // Connect to host:port - socket = new Socket(host, port) - - // Until stopped or connection broken continue reading - val reader = new BufferedReader( - new InputStreamReader(socket.getInputStream(), StandardCharsets.UTF_8)) - userInput = reader.readLine() - while(!isStopped && userInput != null) { - store(userInput) - userInput = reader.readLine() - } - reader.close() - socket.close() - - // Restart in an attempt to connect again when server is active again - restart("Trying to connect again") + // Connect to host:port + socket = new Socket(host, port) + + // Until stopped or connection broken continue reading + val reader = new BufferedReader( + new InputStreamReader(socket.getInputStream(), StandardCharsets.UTF_8)) + userInput = reader.readLine() + while(!isStopped && userInput != null) { + store(userInput) + userInput = reader.readLine() + } + reader.close() + socket.close() + + // Restart in an attempt to connect again when server is active again + restart("Trying to connect again") } catch { - case e: java.net.ConnectException => - // restart if could not connect to server - restart("Error connecting to " + host + ":" + port, e) - case t: Throwable => - // restart if there is any other error - restart("Error receiving data", t) + case e: java.net.ConnectException => + // restart if could not connect to server + restart("Error connecting to " + host + ":" + port, e) + case t: Throwable => + // restart if there is any other error + restart("Error receiving data", t) } } } diff --git a/docs/streaming-programming-guide.md b/docs/streaming-programming-guide.md index 3d40b2c313..aef62ea900 100644 --- a/docs/streaming-programming-guide.md +++ b/docs/streaming-programming-guide.md @@ -863,7 +863,7 @@ Java code, take a look at the example {% highlight python %} def updateFunction(newValues, runningCount): if runningCount is None: - runningCount = 0 + runningCount = 0 return sum(newValues, runningCount) # add the new values with the previous running count to get the new count {% endhighlight %} @@ -903,10 +903,10 @@ spam information (maybe generated with Spark as well) and then filtering based o {% highlight scala %} val spamInfoRDD = ssc.sparkContext.newAPIHadoopRDD(...) // RDD containing spam information -val cleanedDStream = wordCounts.transform(rdd => { +val cleanedDStream = wordCounts.transform { rdd => rdd.join(spamInfoRDD).filter(...) // join data stream with spam information to do data cleaning ... -}) +} {% endhighlight %} @@ -1142,12 +1142,12 @@ val joinedStream = windowedStream.transform { rdd => rdd.join(dataset) } JavaPairRDD dataset = ... JavaPairDStream windowedStream = stream.window(Durations.seconds(20)); JavaPairDStream joinedStream = windowedStream.transform( - new Function>, JavaRDD>>() { - @Override - public JavaRDD> call(JavaRDD> rdd) { - return rdd.join(dataset); - } + new Function>, JavaRDD>>() { + @Override + public JavaRDD> call(JavaRDD> rdd) { + return rdd.join(dataset); } + } ); {% endhighlight %} @@ -1611,7 +1611,7 @@ words.foreachRDD( // Do word count on table using SQL and print it DataFrame wordCountsDataFrame = - spark.sql("select word, count(*) as total from words group by word"); + spark.sql("select word, count(*) as total from words group by word"); wordCountsDataFrame.show(); return null; } @@ -1759,11 +1759,11 @@ This behavior is made simple by using `StreamingContext.getOrCreate`. This is us {% highlight scala %} // Function to create and setup a new StreamingContext def functionToCreateContext(): StreamingContext = { - val ssc = new StreamingContext(...) // new context - val lines = ssc.socketTextStream(...) // create DStreams - ... - ssc.checkpoint(checkpointDirectory) // set checkpoint directory - ssc + val ssc = new StreamingContext(...) // new context + val lines = ssc.socketTextStream(...) // create DStreams + ... + ssc.checkpoint(checkpointDirectory) // set checkpoint directory + ssc } // Get StreamingContext from checkpoint data or create a new one -- cgit v1.2.3