]> git.michaelhowe.org Git - packages/o/openafs.git/commitdiff
STABLE12-winnt-avoid-nosuchvolume-for-down-servers-20030619
authorRyan Lantzer <lantzer@umr.edu>
Tue, 1 Jul 2003 20:51:15 +0000 (20:51 +0000)
committerDerrick Brashear <shadow@dementia.org>
Tue, 1 Jul 2003 20:51:15 +0000 (20:51 +0000)
FIXES 1568

a volume could be marked "no such volume" and hence disappear if a server
went down until fs checkv.

(cherry picked from commit 43011a3a0013aa17a941f8ac0abf87f3efe7df7b)

src/WINNT/afsd/cm.h
src/WINNT/afsd/cm_conn.c

index fbf6af631f1c9ccd2b6a0e2c396bf79e6aff3a60..600199761a5ff11c4fefbf3914100969683bcef9 100644 (file)
@@ -245,5 +245,6 @@ int RXAFS_Lookup (struct rx_connection *,
 #define CM_ERROR_BADNTFILENAME         (CM_ERROR_BASE+37)
 #define CM_ERROR_BUFFERTOOSMALL                (CM_ERROR_BASE+38)
 #define CM_ERROR_RENAME_IDENTICAL      (CM_ERROR_BASE+39)
+#define CM_ERROR_ALLOFFLINE             (CM_ERROR_BASE+40)
 
 #endif /*  __CM_H_ENV__ */
index f38d6f6f8f72189c931e2cd437cd36d6933c73fb..6ebdf65221f75f12659b3974745256623985998d 100644 (file)
@@ -134,6 +134,15 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp,
        if (reqp->flags & CM_REQ_NORETRY)
                goto out;
 
+       /* if all servers are offline, mark them non-busy and start over */
+       if (errorCode == CM_ERROR_ALLOFFLINE) {
+           osi_Log0(afsd_logp, "cm_Analyze passed CM_ERROR_ALLOFFLINE.");
+           thrd_Sleep(5000);
+           /* cm_ForceUpdateVolume marks all servers as non_busy */
+           cm_ForceUpdateVolume(fidp, userp, reqp);
+           retry = 1;
+       }
+
        /* if all servers are busy, mark them non-busy and start over */
        if (errorCode == CM_ERROR_ALLBUSY) {
                cm_GetServerList(fidp, userp, reqp, &serversp);
@@ -164,23 +173,37 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp,
                long oldSum, newSum;
                int same;
 
-               /* Back off to allow move to complete */
-               thrd_Sleep(2000);
+               /* Log server being offline for this volume */
+               osi_Log4(afsd_logp, "cm_Analyze found server %d.%d.%d.%d
+marked offline for a volume",
+                        ((serverp->addr.sin_addr.s_addr & 0xff)),
+                        ((serverp->addr.sin_addr.s_addr & 0xff00)>> 8),
+                        ((serverp->addr.sin_addr.s_addr & 0xff0000)>> 16),
+                        ((serverp->addr.sin_addr.s_addr & 0xff000000)>> 24));
+               /* Create Event Log message */ 
+               {
+                   HANDLE h;
+                   char *ptbuf[1];
+                   char s[100];
+                   h = RegisterEventSource(NULL, AFS_DAEMON_EVENT_NAME);
+                   sprintf(s, "cm_Analyze: Server %d.%d.%d.%d reported volume %d as missing.",
+                           ((serverp->addr.sin_addr.s_addr & 0xff)),
+                           ((serverp->addr.sin_addr.s_addr & 0xff00)>> 8),
+                           ((serverp->addr.sin_addr.s_addr & 0xff0000)>> 16),
+                           ((serverp->addr.sin_addr.s_addr & 0xff000000)>> 24),
+                           fidp->volume);
+                   ptbuf[0] = s;
+                   ReportEvent(h, EVENTLOG_WARNING_TYPE, 0, 1009, NULL,
+                               1, 0, ptbuf, NULL);
+                   DeregisterEventSource(h);
+               }
 
-               /* Update the volume location and see if it changed */
-               cm_GetServerList(fidp, userp, reqp, &serversp);
-               oldSum = cm_ChecksumServerList(serversp);
-               cm_ForceUpdateVolume(fidp, userp, reqp);
+               /* Mark server offline for this volume */
                cm_GetServerList(fidp, userp, reqp, &serversp);
-               newSum = cm_ChecksumServerList(serversp);
-               same = (oldSum == newSum);
 
-               /* mark servers as appropriate */
                for (tsrp = serversp; tsrp; tsrp=tsrp->next) {
                        if (tsrp->server == serverp)
                                tsrp->status = offline;
-                       else if (!same)
-                               tsrp->status = not_busy;
                }
                retry = 1;
        }
@@ -312,8 +335,11 @@ long cm_ConnByMServers(cm_serverRef_t *serversp, cm_user_t *usersp,
        lock_ReleaseWrite(&cm_serverLock);
        if (firstError == 0) {
                if (someBusy) firstError = CM_ERROR_ALLBUSY;
-               else if (someOffline) firstError = CM_ERROR_NOSUCHVOLUME;
-               else firstError = CM_ERROR_TIMEDOUT;
+               else if (someOffline) firstError = CM_ERROR_ALLOFFLINE;
+               else if (serversp) firstError = CM_ERROR_TIMEDOUT;
+               /* Only return CM_ERROR_NOSUCHVOLUME if there are no
+                  servers for this volume */
+               else firstError = CM_ERROR_NOSUCHVOLUME;
        }
        osi_Log1(afsd_logp, "cm_ConnByMServers returning %x", firstError);
         return firstError;