aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala4
1 files changed, 3 insertions, 1 deletions
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala
index 11399a7633..08a93595a2 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala
@@ -173,7 +173,9 @@ object LDAExample {
stopwordFile: String): (RDD[(Long, Vector)], Array[String], Long) = {
// Get dataset of document texts
- // One document per line in each text file.
+ // One document per line in each text file. If the input consists of many small files,
+ // this can result in a large number of small partitions, which can degrade performance.
+ // In this case, consider using coalesce() to create fewer, larger partitions.
val textRDD: RDD[String] = sc.textFile(paths.mkString(","))
// Split text into words