]> git.michaelhowe.org Git - packages/o/openafs.git/commitdiff
STABLE140-windows-busy-vs-offline-20051006
authorJeffrey Altman <jaltman@secure-endpoints.com>
Fri, 7 Oct 2005 03:25:09 +0000 (03:25 +0000)
committerJeffrey Altman <jaltman@secure-endpoints.com>
Fri, 7 Oct 2005 03:25:09 +0000 (03:25 +0000)
Discovered a failure in the state machine.  There was no method of
distinguishing between all servers being Down (which is handled by the
background thread) and all volumes being offline (perhaps due to a move).

(cherry picked from commit 465273f096b1a1720ca07638537fd4c65d7d1d8d)

src/WINNT/afsd/cm.h
src/WINNT/afsd/cm_conn.c

index 91f3efef27dfd295322784d90c22daabea5ace46..eb5a6c736877b5e4e2e119b42efb542fa869089d 100644 (file)
@@ -251,4 +251,6 @@ int RXAFS_Lookup (struct rx_connection *,
 #define CM_ERROR_TIDIPC                 (CM_ERROR_BASE+44)
 #define CM_ERROR_TOO_MANY_SYMLINKS      (CM_ERROR_BASE+45)
 #define CM_ERROR_PATH_NOT_COVERED       (CM_ERROR_BASE+46)
+/* 47 and 48 are reserved for the byte range lock support */
+#define CM_ERROR_ALLDOWN                (CM_ERROR_BASE+49)
 #endif /*  __CM_H_ENV__ */
index ccb31d1e0e849500a7703a4015745a88894706ff..d33a8e32d57489f0f6cf36e2e38d53d44df56c7b 100644 (file)
@@ -233,13 +233,54 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp,
         }
     }
 
-    else if (errorCode == CM_ERROR_ALLOFFLINE) {
-       osi_Log0(afsd_logp, "cm_Analyze passed CM_ERROR_ALLOFFLINE.");
+    else if (errorCode == CM_ERROR_ALLDOWN) {
+       osi_Log0(afsd_logp, "cm_Analyze passed CM_ERROR_ALLDOWN.");
        /* Servers marked DOWN will be restored by the background daemon
         * thread as they become available.
         */
     }
 
+    else if (errorCode == CM_ERROR_ALLOFFLINE) {
+        if (timeLeft > 7) {
+            osi_Log0(afsd_logp, "cm_Analyze passed CM_ERROR_ALLOFFLINE.");
+            thrd_Sleep(5000);
+            
+           if (fidp) { /* Not a VLDB call */
+               if (!serversp) {
+                   code = cm_GetServerList(fidp, userp, reqp, &serverspp);
+                   if (code == 0) {
+                       serversp = *serverspp;
+                       free_svr_list = 1;
+                   }
+               }
+               if (serversp) {
+                   lock_ObtainWrite(&cm_serverLock);
+                   for (tsrp = serversp; tsrp; tsrp=tsrp->next)
+                       tsrp->status = not_busy;
+                   lock_ReleaseWrite(&cm_serverLock);
+                   if (free_svr_list) {
+                       cm_FreeServerList(&serversp);
+                       *serverspp = serversp;
+                   }
+                   retry = 1;
+               }
+
+                cm_ForceUpdateVolume(fidp, userp, reqp);
+           } else { /* VLDB call */
+               if (serversp) {
+                   lock_ObtainWrite(&cm_serverLock);
+                   for (tsrp = serversp; tsrp; tsrp=tsrp->next)
+                       tsrp->status = not_busy;
+                   lock_ReleaseWrite(&cm_serverLock);
+                   if (free_svr_list) {
+                       cm_FreeServerList(&serversp);
+                       *serverspp = serversp;
+                   }
+               }
+           }   
+        }
+    }
+
     /* if all servers are busy, mark them non-busy and start over */
     else if (errorCode == CM_ERROR_ALLBUSY) {
        osi_Log0(afsd_logp, "cm_Analyze passed CM_ERROR_ALLBUSY.");
@@ -465,12 +506,17 @@ long cm_ConnByMServers(cm_serverRef_t *serversp, cm_user_t *usersp,
     cm_serverRef_t *tsrp;
     cm_server_t *tsp;
     long firstError = 0;
-    int someBusy = 0, someOffline = 0, allBusy = 1, allDown = 1;
+    int someBusy = 0, someOffline = 0, allOffline = 1, allBusy = 1, allDown = 1;
     long timeUsed, timeLeft, hardTimeLeft;
 #ifdef DJGPP
     struct timeval now;
 #endif /* DJGPP */        
 
+    if (serversp == NULL) {
+       osi_Log1(afsd_logp, "cm_ConnByMServers returning 0x%x", CM_ERROR_NOSUCHVOLUME);
+       return CM_ERROR_NOSUCHVOLUME;
+    }
+
     *connpp = NULL;
 
 #ifndef DJGPP
@@ -490,13 +536,15 @@ long cm_ConnByMServers(cm_serverRef_t *serversp, cm_user_t *usersp,
         cm_GetServerNoLock(tsp);
         lock_ReleaseWrite(&cm_serverLock);
         if (!(tsp->flags & CM_SERVERFLAG_DOWN)) {
+           allDown = 0;
             if (tsrp->status == busy) {
-                allDown = 0;
+               allOffline = 0;
                 someBusy = 1;
             } else if (tsrp->status == offline) {
-                someOffline = 1;
+               allBusy = 0;
+               someOffline = 1;
             } else {
-                allDown = 0;
+               allOffline = 0;
                 allBusy = 0;
                 code = cm_ConnByServer(tsp, usersp, connpp);
                 if (code == 0) {        /* cm_CBS only returns 0 */
@@ -523,15 +571,15 @@ long cm_ConnByMServers(cm_serverRef_t *serversp, cm_user_t *usersp,
         lock_ObtainWrite(&cm_serverLock);
         cm_PutServerNoLock(tsp);
     }   
-
     lock_ReleaseWrite(&cm_serverLock);
+
     if (firstError == 0) {
-        if (serversp == NULL)
-            firstError = CM_ERROR_NOSUCHVOLUME;
-        else if (allDown) 
-            firstError = CM_ERROR_ALLOFFLINE;
+        if (allDown) 
+            firstError = CM_ERROR_ALLDOWN;
         else if (allBusy) 
             firstError = CM_ERROR_ALLBUSY;
+       else if (allOffline || (someBusy && someOffline))
+           firstError = CM_ERROR_ALLOFFLINE;
         else {
             osi_Log0(afsd_logp, "cm_ConnByMServers returning impossible error TIMEDOUT");
             firstError = CM_ERROR_TIMEDOUT;