]> git.michaelhowe.org Git - packages/o/openafs.git/commitdiff
bozo: avoid canceling the sigkill timer for hung processes
authorMichael Meffie <mmeffie@sinenomine.net>
Wed, 1 Aug 2012 15:42:34 +0000 (11:42 -0400)
committerPaul Smeddle <paul.smeddle@gmail.com>
Wed, 5 Dec 2012 16:51:48 +0000 (08:51 -0800)
A sigkill signal is sent to fileserver processes when a timeout is
exceeded for shutting down processes for the fs/dafs bnode.
(Currently 30 minutes for the fileserver, 1 minute for the other
server processes.)

If the bnode goal is set to run before this timeout expires, the
timer is incorrectly stopped, and a wedged process is never killed.
Fix this by not canceling the timer when a fs/dafs process has been
signaled to shutdown, regardless of the current goal.

Reviewed-on: http://gerrit.openafs.org/7920
Reviewed-by: Derrick Brashear <shadow@dementix.org>
Tested-by: BuildBot <buildbot@rampaginggeek.com>
(cherry picked from commit 09f5a1e6053e6db3df581543875512d8cff259ae)

Change-Id: I0d5fabed13e597d2571033468688457c38b49283
Reviewed-on: http://gerrit.openafs.org/8583
Tested-by: BuildBot <buildbot@rampaginggeek.com>
Reviewed-by: Ken Dreyer <ktdreyer@ktdreyer.com>
Reviewed-by: Paul Smeddle <paul.smeddle@gmail.com>
src/bozo/fsbnodeops.c

index 285786179139bb5972f5d651877853108ef19c92..b3700353ac603cc0381ea2191a5a22b8c2fa3468 100644 (file)
@@ -784,7 +784,12 @@ fs_procexit(struct bnode *bn, struct bnode_proc *aproc)
 static void
 SetNeedsClock(struct fsbnode *ab)
 {
-    if (ab->b.goal == 1 && ab->fileRunning && ab->volRunning
+    if ((ab->fileSDW && !ab->fileKillSent) || (ab->volSDW && !ab->volKillSent)
+       || (ab->scanSDW && !ab->scanKillSent) || (ab->salSDW && !ab->salKillSent)
+       || (ab->salsrvSDW && !ab->salsrvKillSent)) {
+       /* SIGQUIT sent, will send SIGKILL if process does not exit */
+       ab->needsClock = 1;
+    } else if (ab->b.goal == 1 && ab->fileRunning && ab->volRunning
        && (!ab->scancmd || ab->scanRunning)
        && (!ab->salsrvcmd || ab->salsrvRunning))
        ab->needsClock = 0;     /* running normally */