Spark 1490 Add kerberos support to the HistoryServer

Here I've added the ability for the History server to login from a kerberos keytab file so that the history server can be run as a super user and stay up for along period of time while reading the history files from HDFS. Author: Thomas Graves <tgraves@apache.org> Closes #513 from tgravescs/SPARK-1490 and squashes the following commits: e204a99 [Thomas Graves] remove extra logging 5418daa [Thomas Graves] fix typo in config 0076b99 [Thomas Graves] Update docs 4d76545 [Thomas Graves] SPARK-1490 Add kerberos support to the HistoryServer
author: Thomas Graves <tgraves@apache.org> 2014-04-24 11:15:12 -0700
committer: Patrick Wendell <pwendell@gmail.com> 2014-04-24 11:16:30 -0700
commit: bd375094a1480b0ff9c16ab8ddd2dba8731506df (patch)
tree: 0d56288f8906d25c3e10f86648469951417ce76f /core
parent: 78a49b2532d4751257654dfe55a564bcd10701b3 (diff)
download: spark-bd375094a1480b0ff9c16ab8ddd2dba8731506df.tar.gz
spark-bd375094a1480b0ff9c16ab8ddd2dba8731506df.tar.bz2
spark-bd375094a1480b0ff9c16ab8ddd2dba8731506df.zip
2 files changed, 20 insertions, 0 deletions
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
index 9bdbfb33bf..498fcc520a 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
@@ -75,6 +75,10 @@ class SparkHadoopUtil {
 
   def getSecretKeyFromUserCredentials(key: String): Array[Byte] = { null }
 
+  def loginUserFromKeytab(principalName: String, keytabFilename: String) { 
+    UserGroupInformation.loginUserFromKeytab(principalName, keytabFilename)
+  }
+
 }
 
 object SparkHadoopUtil {
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
index b8f56234d3..d7a3246bcf 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
@@ -22,6 +22,7 @@ import scala.collection.mutable
 import org.apache.hadoop.fs.{FileStatus, Path}
 
 import org.apache.spark.{Logging, SecurityManager, SparkConf}
+import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.scheduler._
 import org.apache.spark.ui.{WebUI, SparkUI}
 import org.apache.spark.ui.JettyUtils._
@@ -257,6 +258,7 @@ object HistoryServer {
   val STATIC_RESOURCE_DIR = SparkUI.STATIC_RESOURCE_DIR
 
   def main(argStrings: Array[String]) {
+    initSecurity()
     val args = new HistoryServerArguments(argStrings)
     val securityManager = new SecurityManager(conf)
     val server = new HistoryServer(args.logDir, securityManager, conf)
@@ -266,6 +268,20 @@ object HistoryServer {
     while(true) { Thread.sleep(Int.MaxValue) }
     server.stop()
   }
+
+  def initSecurity() {
+    // If we are accessing HDFS and it has security enabled (Kerberos), we have to login
+    // from a keytab file so that we can access HDFS beyond the kerberos ticket expiration.
+    // As long as it is using Hadoop rpc (hdfs://), a relogin will automatically
+    // occur from the keytab.
+    if (conf.getBoolean("spark.history.kerberos.enabled", false)) {
+      // if you have enabled kerberos the following 2 params must be set
+      val principalName = conf.get("spark.history.kerberos.principal")
+      val keytabFilename = conf.get("spark.history.kerberos.keytab")
+      SparkHadoopUtil.get.loginUserFromKeytab(principalName, keytabFilename)
+    }
+  }
+
 }
author	Thomas Graves <tgraves@apache.org>	2014-04-24 11:15:12 -0700
committer	Patrick Wendell <pwendell@gmail.com>	2014-04-24 11:16:30 -0700
commit	bd375094a1480b0ff9c16ab8ddd2dba8731506df (patch)
tree	0d56288f8906d25c3e10f86648469951417ce76f /core
parent	78a49b2532d4751257654dfe55a564bcd10701b3 (diff)
download	spark-bd375094a1480b0ff9c16ab8ddd2dba8731506df.tar.gz spark-bd375094a1480b0ff9c16ab8ddd2dba8731506df.tar.bz2 spark-bd375094a1480b0ff9c16ab8ddd2dba8731506df.zip