aboutsummaryrefslogtreecommitdiff
path: root/streaming/src/main/java
diff options
context:
space:
mode:
authorTathagata Das <tathagata.das1565@gmail.com>2015-04-29 13:06:11 -0700
committerTathagata Das <tathagata.das1565@gmail.com>2015-04-29 13:06:11 -0700
commit1868bd40dcce23990b98748b0239bd00452b1ca5 (patch)
treeb350c7f739a52e322f967194814aa433025ecbb4 /streaming/src/main/java
parentc0c0ba6d2a11febc8e874c437c4f676dd36ae059 (diff)
downloadspark-1868bd40dcce23990b98748b0239bd00452b1ca5.tar.gz
spark-1868bd40dcce23990b98748b0239bd00452b1ca5.tar.bz2
spark-1868bd40dcce23990b98748b0239bd00452b1ca5.zip
[SPARK-7056] [STREAMING] Make the Write Ahead Log pluggable
Users may want the WAL data to be written to non-HDFS data storage systems. To allow that, we have to make the WAL pluggable. The following design doc outlines the plan. https://docs.google.com/a/databricks.com/document/d/1A2XaOLRFzvIZSi18i_luNw5Rmm9j2j4AigktXxIYxmY/edit?usp=sharing Things to add. * Unit tests for WriteAheadLogUtils Author: Tathagata Das <tathagata.das1565@gmail.com> Closes #5645 from tdas/wal-pluggable and squashes the following commits: 2c431fd [Tathagata Das] Minor fixes. c2bc7384 [Tathagata Das] More changes based on PR comments. 569a416 [Tathagata Das] fixed long line bde26b1 [Tathagata Das] Renamed segment to record handle everywhere b65e155 [Tathagata Das] More changes based on PR comments. d7cd15b [Tathagata Das] Fixed test 1a32a4b [Tathagata Das] Fixed test e0d19fb [Tathagata Das] Fixed defaults 9310cbf [Tathagata Das] style fix. 86abcb1 [Tathagata Das] Refactored WriteAheadLogUtils, and consolidated all WAL related configuration into it. 84ce469 [Tathagata Das] Added unit test and fixed compilation error. bce5e75 [Tathagata Das] Fixed long lines. 837c4f5 [Tathagata Das] Merge remote-tracking branch 'apache-github/master' into wal-pluggable 754fbf8 [Tathagata Das] Added license and docs. 09bc6fe [Tathagata Das] Merge remote-tracking branch 'apache-github/master' into wal-pluggable 7dd2d4b [Tathagata Das] Added pluggable WriteAheadLog interface, and refactored all code along with it
Diffstat (limited to 'streaming/src/main/java')
-rw-r--r--streaming/src/main/java/org/apache/spark/streaming/util/WriteAheadLog.java60
-rw-r--r--streaming/src/main/java/org/apache/spark/streaming/util/WriteAheadLogRecordHandle.java30
2 files changed, 90 insertions, 0 deletions
diff --git a/streaming/src/main/java/org/apache/spark/streaming/util/WriteAheadLog.java b/streaming/src/main/java/org/apache/spark/streaming/util/WriteAheadLog.java
new file mode 100644
index 0000000000..8c0fdfa9c7
--- /dev/null
+++ b/streaming/src/main/java/org/apache/spark/streaming/util/WriteAheadLog.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.util;
+
+import java.nio.ByteBuffer;
+import java.util.Iterator;
+
+/**
+ * This abstract class represents a write ahead log (aka journal) that is used by Spark Streaming
+ * to save the received data (by receivers) and associated metadata to a reliable storage, so that
+ * they can be recovered after driver failures. See the Spark documentation for more information
+ * on how to plug in your own custom implementation of a write ahead log.
+ */
+@org.apache.spark.annotation.DeveloperApi
+public abstract class WriteAheadLog {
+ /**
+ * Write the record to the log and return a record handle, which contains all the information
+ * necessary to read back the written record. The time is used to the index the record,
+ * such that it can be cleaned later. Note that implementations of this abstract class must
+ * ensure that the written data is durable and readable (using the record handle) by the
+ * time this function returns.
+ */
+ abstract public WriteAheadLogRecordHandle write(ByteBuffer record, long time);
+
+ /**
+ * Read a written record based on the given record handle.
+ */
+ abstract public ByteBuffer read(WriteAheadLogRecordHandle handle);
+
+ /**
+ * Read and return an iterator of all the records that have been written but not yet cleaned up.
+ */
+ abstract public Iterator<ByteBuffer> readAll();
+
+ /**
+ * Clean all the records that are older than the threshold time. It can wait for
+ * the completion of the deletion.
+ */
+ abstract public void clean(long threshTime, boolean waitForCompletion);
+
+ /**
+ * Close this log and release any resources.
+ */
+ abstract public void close();
+}
diff --git a/streaming/src/main/java/org/apache/spark/streaming/util/WriteAheadLogRecordHandle.java b/streaming/src/main/java/org/apache/spark/streaming/util/WriteAheadLogRecordHandle.java
new file mode 100644
index 0000000000..02324189b7
--- /dev/null
+++ b/streaming/src/main/java/org/apache/spark/streaming/util/WriteAheadLogRecordHandle.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.util;
+
+/**
+ * This abstract class represents a handle that refers to a record written in a
+ * {@link org.apache.spark.streaming.util.WriteAheadLog WriteAheadLog}.
+ * It must contain all the information necessary for the record to be read and returned by
+ * an implemenation of the WriteAheadLog class.
+ *
+ * @see org.apache.spark.streaming.util.WriteAheadLog
+ */
+@org.apache.spark.annotation.DeveloperApi
+public abstract class WriteAheadLogRecordHandle implements java.io.Serializable {
+}