Shawn's fix for some iprop bugs, with some tweaks
authorKen Raeburn <raeburn@mit.edu>
Mon, 1 Dec 2008 06:48:54 +0000 (06:48 +0000)
committerKen Raeburn <raeburn@mit.edu>
Mon, 1 Dec 2008 06:48:54 +0000 (06:48 +0000)
Adds an alarm while waiting for kprop connection or authentication in
iprop mode; on timeout, close down the active file descriptor to force
us to bail out and return to the iprop main loop (which may try a full
resync again next time around).

git-svn-id: svn://anonsvn.mit.edu/krb5/trunk@21204 dc483132-0cff-0310-8789-dd5450dbe970

src/slave/kpropd.c

index 81c490383b20155b763aa38e7443f32f91792514..bee5db0b15c3ee112c786bb069bec2b14175d22e 100644 (file)
@@ -98,10 +98,16 @@ extern int daemon(int, int);
 #endif
 
 #define SYSLOG_CLASS LOG_DAEMON
+#define INITIAL_TIMER 10
 
 char *def_realm = NULL;
 int runonce = 0;
 
+/*
+ * Global fd to close upon alarm time-out.
+ */
+volatile int gfd = -1;
+
 /*
  * This struct simulates the use of _kadm5_server_handle_t
  *
@@ -243,6 +249,14 @@ main(argc, argv)
     exit(ret);
 }
 
+void resync_alarm(int sn)
+{
+    close (gfd);
+    if (debug)
+       fprintf(stderr, _("resync_alarm: closing fd: %d\n"), gfd);
+    gfd = -1;
+}
+
 int do_standalone(iprop_role iproprole)
 {
        struct  sockaddr_in     my_sin, frominet;
@@ -250,6 +264,12 @@ int do_standalone(iprop_role iproprole)
        int     finet, s;
        GETPEERNAME_ARG3_TYPE fromlen;
        int     ret;
+       /*
+        * Timer for accept/read calls, in case of network type errors.
+        */
+       int backoff_timer = INITIAL_TIMER;
+
+retry:
 
        finet = socket(AF_INET, SOCK_STREAM, 0);
        if (finet < 0) {
@@ -281,13 +301,30 @@ int do_standalone(iprop_role iproprole)
            if (setsockopt(finet, SOL_SOCKET, SO_REUSEADDR,
                           (char *)&on, sizeof(on)) < 0)
                com_err(progname, errno,
-                       _("in setsockopt(SO_REUSEADDR)"));
+                       _("while setting socket option (SO_REUSEADDR)"));
            linger.l_onoff = 1;
            linger.l_linger = 2;
            if (setsockopt(finet, SOL_SOCKET, SO_LINGER,
                           (void *)&linger, sizeof(linger)) < 0)
                com_err(progname, errno,
-                       _("in setsockopt(SO_LINGER)"));
+                       _("while setting socket option (SO_LINGER)"));
+           /*
+            * We also want to set a timer so that the slave is not waiting
+            * until infinity for an update from the master.
+            */
+           gfd = finet;
+           signal(SIGALRM, resync_alarm);
+           if (debug) {
+               fprintf(stderr, "do_standalone: setting resync alarm to %d\n",
+                       backoff_timer);
+           }
+           if (alarm(backoff_timer) != 0) {
+               if (debug) {
+                   fprintf(stderr,
+                           _("%s: alarm already set\n"), progname);
+               }
+           }
+           backoff_timer *= 2;
        }
        if ((ret = bind(finet, (struct sockaddr *) &my_sin, sizeof(my_sin))) < 0) {
            if (debug) {
@@ -331,11 +368,30 @@ int do_standalone(iprop_role iproprole)
                s = accept(finet, (struct sockaddr *) &frominet, &fromlen);
 
                if (s < 0) {
-                       if (errno != EINTR)
-                               com_err(progname, errno,
-                                       "from accept system call");
-                       continue;
+                   int e = errno;
+                   if (e != EINTR) {
+                       com_err(progname, e,
+                               _("while accepting connection"));
+                       if (e != EBADF)
+                           backoff_timer = INITIAL_TIMER;
+                   }
+                   /*
+                    * If we got EBADF, an alarm signal handler closed
+                    * the file descriptor on us.
+                    */
+                   if (e != EBADF)
+                       close(finet);
+                   /*
+                    * An alarm could have been set and the fd closed, we
+                    * should retry in case of transient network error for
+                    * up to a couple of minutes.
+                    */
+                   if (backoff_timer > 120)
+                       return EINTR;
+                   goto retry;
                }
+               alarm(0);
+               gfd = -1;
                if (debug && iproprole != IPROP_SLAVE)
                        child_pid = 0;
                else
@@ -351,10 +407,18 @@ int do_standalone(iprop_role iproprole)
                        close(s);
                        _exit(0);
                default:
+                   /*
+                    * Errors should not be considered fatal in the
+                    * iprop case as we could have transient type
+                    * errors, such as network outage, etc.  Sleeping
+                    * 3s for 2s linger interval.
+                    */
                    if (wait(&status) < 0) {
                        com_err(progname, errno,
                                _("while waiting to receive database"));
-                       exit(1);
+                       if (iproprole != IPROP_SLAVE)
+                           exit(1);
+                       sleep(3);
                    }
 
                    close(s);
@@ -384,6 +448,23 @@ void doit(fd)
        krb5_enctype etype;
        int database_fd;
 
+       if (kpropd_context->kdblog_context &&
+           kpropd_context->kdblog_context->iproprole == IPROP_SLAVE) {
+           /*
+            * We also want to set a timer so that the slave is not waiting
+            * until infinity for an update from the master.
+            */
+           if (debug)
+               fprintf(stderr, "doit: setting resync alarm to 5s\n");
+           signal(SIGALRM, resync_alarm);
+           gfd = fd;
+           if (alarm(5) != 0) {
+               if (debug) {
+                   fprintf(stderr,
+                           _("%s: alarm already set\n"), progname);
+               }
+           }
+       }
        fromlen = sizeof (from);
        if (getpeername(fd, (struct sockaddr *) &from, &fromlen) < 0) {
 #ifdef ENOTSOCK
@@ -423,6 +504,11 @@ void doit(fd)
         */
        kerberos_authenticate(kpropd_context, fd, &client, &etype, from);
 
+       /*
+        * Turn off alarm upon successful authentication from master.
+        */
+       alarm(0);
+
        if (!authorized_principal(kpropd_context, client, etype)) {
                char    *name;
 
@@ -724,10 +810,6 @@ reinit:
                                 * the full dump
                                 */
                                ret = do_standalone(log_ctx->iproprole);
-                               if (ret)
-                                       syslog(LOG_WARNING,
-                                           _("kpropd: Full resync, "
-                                           "invalid return."));
                                if (debug) {
                                        if (ret)
                                                fprintf(stderr,
@@ -738,7 +820,12 @@ reinit:
                                                    _("Full resync "
                                                    "was successful\n"));
                                }
-                               frdone = 1;
+                               if (ret) {
+                                   syslog(LOG_WARNING,
+                                          _("kpropd: Full resync, invalid return."));
+                                   frdone = 0;
+                               } else
+                                   frdone = 1;
                                break;
 
                        case UPDATE_BUSY:
@@ -1472,7 +1559,7 @@ recv_error(context, inbuf)
        } else if (error->error) {
                com_err(progname, 
                        (krb5_error_code) error->error + ERROR_TABLE_BASE_krb5,
-                       "signalled from server");
+                       "signaled from server");
                if (error->text.data)
                        fprintf(stderr,
                                "Error text from client: %s\n",