From 465273f096b1a1720ca07638537fd4c65d7d1d8d Mon Sep 17 00:00:00 2001 From: Jeffrey Altman Date: Fri, 7 Oct 2005 03:09:48 +0000 Subject: [PATCH] windows-busy-vs-offline-20051006 Discovered a failure in the state machine. There was no method of distinguishing between all servers being Down (which is handled by the background thread) and all volumes being offline (perhaps due to a move). --- src/WINNT/afsd/cm.h | 1 + src/WINNT/afsd/cm_conn.c | 70 +++++++++++++++++++++++++++++++++------- 2 files changed, 60 insertions(+), 11 deletions(-) diff --git a/src/WINNT/afsd/cm.h b/src/WINNT/afsd/cm.h index b551b31b0..28aa54539 100644 --- a/src/WINNT/afsd/cm.h +++ b/src/WINNT/afsd/cm.h @@ -253,4 +253,5 @@ int RXAFS_Lookup (struct rx_connection *, #define CM_ERROR_PATH_NOT_COVERED (CM_ERROR_BASE+46) #define CM_ERROR_LOCK_CONFLICT (CM_ERROR_BASE+47) #define CM_ERROR_SHARING_VIOLATION (CM_ERROR_BASE+48) +#define CM_ERROR_ALLDOWN (CM_ERROR_BASE+49) #endif /* __CM_H_ENV__ */ diff --git a/src/WINNT/afsd/cm_conn.c b/src/WINNT/afsd/cm_conn.c index 16c1ac4df..c7cc7eebe 100644 --- a/src/WINNT/afsd/cm_conn.c +++ b/src/WINNT/afsd/cm_conn.c @@ -233,13 +233,54 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp, } } - else if (errorCode == CM_ERROR_ALLOFFLINE) { - osi_Log0(afsd_logp, "cm_Analyze passed CM_ERROR_ALLOFFLINE."); + else if (errorCode == CM_ERROR_ALLDOWN) { + osi_Log0(afsd_logp, "cm_Analyze passed CM_ERROR_ALLDOWN."); /* Servers marked DOWN will be restored by the background daemon * thread as they become available. */ } + else if (errorCode == CM_ERROR_ALLOFFLINE) { + if (timeLeft > 7) { + osi_Log0(afsd_logp, "cm_Analyze passed CM_ERROR_ALLOFFLINE."); + thrd_Sleep(5000); + + if (fidp) { /* Not a VLDB call */ + if (!serversp) { + code = cm_GetServerList(fidp, userp, reqp, &serverspp); + if (code == 0) { + serversp = *serverspp; + free_svr_list = 1; + } + } + if (serversp) { + lock_ObtainWrite(&cm_serverLock); + for (tsrp = serversp; tsrp; tsrp=tsrp->next) + tsrp->status = not_busy; + lock_ReleaseWrite(&cm_serverLock); + if (free_svr_list) { + cm_FreeServerList(&serversp); + *serverspp = serversp; + } + retry = 1; + } + + cm_ForceUpdateVolume(fidp, userp, reqp); + } else { /* VLDB call */ + if (serversp) { + lock_ObtainWrite(&cm_serverLock); + for (tsrp = serversp; tsrp; tsrp=tsrp->next) + tsrp->status = not_busy; + lock_ReleaseWrite(&cm_serverLock); + if (free_svr_list) { + cm_FreeServerList(&serversp); + *serverspp = serversp; + } + } + } + } + } + /* if all servers are busy, mark them non-busy and start over */ else if (errorCode == CM_ERROR_ALLBUSY) { osi_Log0(afsd_logp, "cm_Analyze passed CM_ERROR_ALLBUSY."); @@ -467,12 +508,17 @@ long cm_ConnByMServers(cm_serverRef_t *serversp, cm_user_t *usersp, cm_serverRef_t *tsrp; cm_server_t *tsp; long firstError = 0; - int someBusy = 0, someOffline = 0, allBusy = 1, allDown = 1; + int someBusy = 0, someOffline = 0, allOffline = 1, allBusy = 1, allDown = 1; long timeUsed, timeLeft, hardTimeLeft; #ifdef DJGPP struct timeval now; #endif /* DJGPP */ + if (serversp == NULL) { + osi_Log1(afsd_logp, "cm_ConnByMServers returning 0x%x", CM_ERROR_NOSUCHVOLUME); + return CM_ERROR_NOSUCHVOLUME; + } + *connpp = NULL; #ifndef DJGPP @@ -492,13 +538,15 @@ long cm_ConnByMServers(cm_serverRef_t *serversp, cm_user_t *usersp, cm_GetServerNoLock(tsp); lock_ReleaseWrite(&cm_serverLock); if (!(tsp->flags & CM_SERVERFLAG_DOWN)) { + allDown = 0; if (tsrp->status == busy) { - allDown = 0; + allOffline = 0; someBusy = 1; } else if (tsrp->status == offline) { - someOffline = 1; + allBusy = 0; + someOffline = 1; } else { - allDown = 0; + allOffline = 0; allBusy = 0; code = cm_ConnByServer(tsp, usersp, connpp); if (code == 0) { /* cm_CBS only returns 0 */ @@ -525,15 +573,15 @@ long cm_ConnByMServers(cm_serverRef_t *serversp, cm_user_t *usersp, lock_ObtainWrite(&cm_serverLock); cm_PutServerNoLock(tsp); } - lock_ReleaseWrite(&cm_serverLock); + if (firstError == 0) { - if (serversp == NULL) - firstError = CM_ERROR_NOSUCHVOLUME; - else if (allDown) - firstError = CM_ERROR_ALLOFFLINE; + if (allDown) + firstError = CM_ERROR_ALLDOWN; else if (allBusy) firstError = CM_ERROR_ALLBUSY; + else if (allOffline || (someBusy && someOffline)) + firstError = CM_ERROR_ALLOFFLINE; else { osi_Log0(afsd_logp, "cm_ConnByMServers returning impossible error TIMEDOUT"); firstError = CM_ERROR_TIMEDOUT; -- 2.39.5