From bd375094a1480b0ff9c16ab8ddd2dba8731506df Mon Sep 17 00:00:00 2001 From: Thomas Graves <tgraves@apache.org> Date: Thu, 24 Apr 2014 11:15:12 -0700 Subject: [PATCH] Spark 1490 Add kerberos support to the HistoryServer Here I've added the ability for the History server to login from a kerberos keytab file so that the history server can be run as a super user and stay up for along period of time while reading the history files from HDFS. Author: Thomas Graves <tgraves@apache.org> Closes #513 from tgravescs/SPARK-1490 and squashes the following commits: e204a99 [Thomas Graves] remove extra logging 5418daa [Thomas Graves] fix typo in config 0076b99 [Thomas Graves] Update docs 4d76545 [Thomas Graves] SPARK-1490 Add kerberos support to the HistoryServer --- .../apache/spark/deploy/SparkHadoopUtil.scala | 4 ++++ .../spark/deploy/history/HistoryServer.scala | 16 +++++++++++++ docs/monitoring.md | 24 +++++++++++++++++++ 3 files changed, 44 insertions(+) diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala index 9bdbfb33bf..498fcc520a 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala @@ -75,6 +75,10 @@ class SparkHadoopUtil { def getSecretKeyFromUserCredentials(key: String): Array[Byte] = { null } + def loginUserFromKeytab(principalName: String, keytabFilename: String) { + UserGroupInformation.loginUserFromKeytab(principalName, keytabFilename) + } + } object SparkHadoopUtil { diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala index b8f56234d3..d7a3246bcf 100644 --- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala +++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala @@ -22,6 +22,7 @@ import scala.collection.mutable import org.apache.hadoop.fs.{FileStatus, Path} import org.apache.spark.{Logging, SecurityManager, SparkConf} +import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.scheduler._ import org.apache.spark.ui.{WebUI, SparkUI} import org.apache.spark.ui.JettyUtils._ @@ -257,6 +258,7 @@ object HistoryServer { val STATIC_RESOURCE_DIR = SparkUI.STATIC_RESOURCE_DIR def main(argStrings: Array[String]) { + initSecurity() val args = new HistoryServerArguments(argStrings) val securityManager = new SecurityManager(conf) val server = new HistoryServer(args.logDir, securityManager, conf) @@ -266,6 +268,20 @@ object HistoryServer { while(true) { Thread.sleep(Int.MaxValue) } server.stop() } + + def initSecurity() { + // If we are accessing HDFS and it has security enabled (Kerberos), we have to login + // from a keytab file so that we can access HDFS beyond the kerberos ticket expiration. + // As long as it is using Hadoop rpc (hdfs://), a relogin will automatically + // occur from the keytab. + if (conf.getBoolean("spark.history.kerberos.enabled", false)) { + // if you have enabled kerberos the following 2 params must be set + val principalName = conf.get("spark.history.kerberos.principal") + val keytabFilename = conf.get("spark.history.kerberos.keytab") + SparkHadoopUtil.get.loginUserFromKeytab(principalName, keytabFilename) + } + } + } diff --git a/docs/monitoring.md b/docs/monitoring.md index 144be3daf1..347a9b1f1a 100644 --- a/docs/monitoring.md +++ b/docs/monitoring.md @@ -91,6 +91,30 @@ represents an application's event logs. This creates a web interface at The port to which the web interface of the history server binds. </td> </tr> + <tr> + <td>spark.history.kerberos.enabled</td> + <td>false</td> + <td> + Indicates whether the history server should use kerberos to login. This is useful + if the history server is accessing HDFS files on a secure Hadoop cluster. If this is + true it looks uses the configs <code>spark.history.kerberos.principal</code> and + <code>spark.history.kerberos.keytab</code>. + </td> + </tr> + <tr> + <td>spark.history.kerberos.principal</td> + <td>(none)</td> + <td> + Kerberos principal name for the History Server. + </td> + </tr> + <tr> + <td>spark.history.kerberos.keytab</td> + <td>(none)</td> + <td> + Location of the kerberos keytab file for the History Server. + </td> + </tr> </table> Note that in all of these UIs, the tables are sortable by clicking their headers, -- GitLab