aboutsummaryrefslogtreecommitdiff
path: root/core/src/test/java
diff options
context:
space:
mode:
authorXusen Yin <yinxusen@gmail.com>2014-04-04 11:12:47 -0700
committerMatei Zaharia <matei@databricks.com>2014-04-04 11:12:47 -0700
commitf1fa617023d30d8cdc5acef0274bad8cc3e89cea (patch)
tree5eaf3f485c66a74ea260afbc3a0b941ad1621579 /core/src/test/java
parent01cf4c402b9fda59680e56112bfaa2b748416d0e (diff)
downloadspark-f1fa617023d30d8cdc5acef0274bad8cc3e89cea.tar.gz
spark-f1fa617023d30d8cdc5acef0274bad8cc3e89cea.tar.bz2
spark-f1fa617023d30d8cdc5acef0274bad8cc3e89cea.zip
[SPARK-1133] Add whole text files reader in MLlib
Here is a pointer to the former [PR164](https://github.com/apache/spark/pull/164). I add the pull request for the JIRA issue [SPARK-1133](https://spark-project.atlassian.net/browse/SPARK-1133), which brings a new files reader API in MLlib. Author: Xusen Yin <yinxusen@gmail.com> Closes #252 from yinxusen/whole-files-input and squashes the following commits: 7191be6 [Xusen Yin] refine comments 0af3faf [Xusen Yin] add JavaAPI test 01745ee [Xusen Yin] fix deletion error cc97dca [Xusen Yin] move whole text file API to Spark core d792cee [Xusen Yin] remove the typo character "+" 6bdf2c2 [Xusen Yin] test for small local file system block size a1f1e7e [Xusen Yin] add two extra spaces 28cb0fe [Xusen Yin] add whole text files reader
Diffstat (limited to 'core/src/test/java')
-rw-r--r--core/src/test/java/org/apache/spark/JavaAPISuite.java30
1 files changed, 27 insertions, 3 deletions
diff --git a/core/src/test/java/org/apache/spark/JavaAPISuite.java b/core/src/test/java/org/apache/spark/JavaAPISuite.java
index c6b65c7348..2372f2d992 100644
--- a/core/src/test/java/org/apache/spark/JavaAPISuite.java
+++ b/core/src/test/java/org/apache/spark/JavaAPISuite.java
@@ -17,9 +17,7 @@
package org.apache.spark;
-import java.io.File;
-import java.io.IOException;
-import java.io.Serializable;
+import java.io.*;
import java.util.*;
import scala.Tuple2;
@@ -600,6 +598,32 @@ public class JavaAPISuite implements Serializable {
}
@Test
+ public void wholeTextFiles() throws IOException {
+ byte[] content1 = "spark is easy to use.\n".getBytes();
+ byte[] content2 = "spark is also easy to use.\n".getBytes();
+
+ File tempDir = Files.createTempDir();
+ String tempDirName = tempDir.getAbsolutePath();
+ DataOutputStream ds = new DataOutputStream(new FileOutputStream(tempDirName + "/part-00000"));
+ ds.write(content1);
+ ds.close();
+ ds = new DataOutputStream(new FileOutputStream(tempDirName + "/part-00001"));
+ ds.write(content2);
+ ds.close();
+
+ HashMap<String, String> container = new HashMap<String, String>();
+ container.put(tempDirName+"/part-00000", new Text(content1).toString());
+ container.put(tempDirName+"/part-00001", new Text(content2).toString());
+
+ JavaPairRDD<String, String> readRDD = sc.wholeTextFiles(tempDirName);
+ List<Tuple2<String, String>> result = readRDD.collect();
+
+ for (Tuple2<String, String> res : result) {
+ Assert.assertEquals(res._2(), container.get(res._1()));
+ }
+ }
+
+ @Test
public void textFilesCompressed() throws IOException {
File tempDir = Files.createTempDir();
String outputDir = new File(tempDir, "output").getAbsolutePath();