From: Jeffrey Altman Date: Wed, 18 Jan 2012 00:46:30 +0000 (-0500) Subject: Windows: failover and retry for VBUSY X-Git-Tag: upstream/1.6.1.pre4^2~10 X-Git-Url: https://git.michaelhowe.org/gitweb/?a=commitdiff_plain;h=a7bcd00051aead840f4d85a21f0ef36b6b9819d0;p=packages%2Fo%2Fopenafs.git Windows: failover and retry for VBUSY When a file server returns the VBUSY error for an RPC the cache manager records the 'srv_busy' state in the cm_serverRef_t structure binding that file server to the active cm_volume_t object. The 'srv_busy' was never cleared which prevents the volume from being accessed. Clear the 'srv_busy' flag whenever cm_Analyze() receives a CM_ERROR_ALLBUSY error which means that all replicas have been tried or whenever the error is not VBUSY or VRESTARTING. FIXES 130537 Reviewed-on: http://gerrit.openafs.org/6563 Reviewed-by: Derrick Brashear Reviewed-by: Jeffrey Altman Tested-by: Jeffrey Altman (cherry picked from commit 9056d09887c84a480e0a9ee3457a9469fbb97064) Change-Id: Ifd8204a3bba83a893188e96f85b1ad3ba078fe49 Reviewed-on: http://gerrit.openafs.org/6831 Tested-by: BuildBot Reviewed-by: Jeffrey Altman Tested-by: Jeffrey Altman --- diff --git a/src/WINNT/afsd/cm_conn.c b/src/WINNT/afsd/cm_conn.c index 257efbae3..896834a9f 100644 --- a/src/WINNT/afsd/cm_conn.c +++ b/src/WINNT/afsd/cm_conn.c @@ -216,6 +216,39 @@ static long cm_GetServerList(struct cm_fid *fidp, struct cm_user *userp, return (*serversppp ? 0 : CM_ERROR_NOSUCHVOLUME); } +void +cm_SetServerBusyStatus(cm_serverRef_t *serversp, cm_server_t *serverp) +{ + cm_serverRef_t *tsrp; + + lock_ObtainWrite(&cm_serverLock); + for (tsrp = serversp; tsrp; tsrp=tsrp->next) { + if (tsrp->status == srv_deleted) + continue; + if (tsrp->server == serverp && tsrp->status == srv_not_busy) { + tsrp->status = srv_busy; + break; + } + } + lock_ReleaseWrite(&cm_serverLock); +} + +void +cm_ResetServerBusyStatus(cm_serverRef_t *serversp) +{ + cm_serverRef_t *tsrp; + + lock_ObtainWrite(&cm_serverLock); + for (tsrp = serversp; tsrp; tsrp=tsrp->next) { + if (tsrp->status == srv_deleted) + continue; + if (tsrp->status == srv_busy) { + tsrp->status = srv_not_busy; + } + } + lock_ReleaseWrite(&cm_serverLock); +} + /* * Analyze the error return from an RPC. Determine whether or not to retry, * and if we're going to retry, determine whether failover is appropriate, @@ -372,6 +405,20 @@ cm_Analyze(cm_conn_t *connp, format = "All servers are offline when accessing cell %s volume %d."; LogEvent(EVENTLOG_WARNING_TYPE, msgID, cellp->name, fidp->volume); + if (!serversp) { + code = cm_GetServerList(fidp, userp, reqp, &replicated, &serverspp); + if (code == 0) { + serversp = *serverspp; + free_svr_list = 1; + } + } + cm_ResetServerBusyStatus(serversp); + if (free_svr_list) { + cm_FreeServerList(serverspp, 0); + free_svr_list = 0; + serversp = NULL; + } + code = cm_FindVolumeByID(cellp, fidp->volume, userp, reqp, CM_GETVOL_FLAG_NO_LRU_UPDATE, &volp); @@ -409,47 +456,30 @@ cm_Analyze(cm_conn_t *connp, format = "All servers are busy when accessing cell %s volume %d."; LogEvent(EVENTLOG_WARNING_TYPE, msgID, cellp->name, fidp->volume); + if (!serversp) { + code = cm_GetServerList(fidp, userp, reqp, &replicated, &serverspp); + if (code == 0) { + serversp = *serverspp; + free_svr_list = 1; + } + } + cm_ResetServerBusyStatus(serversp); + if (free_svr_list) { + cm_FreeServerList(serverspp, 0); + free_svr_list = 0; + serversp = NULL; + } + code = cm_FindVolumeByID(cellp, fidp->volume, userp, reqp, CM_GETVOL_FLAG_NO_LRU_UPDATE, &volp); if (code == 0) { if (timeLeft > 7) { thrd_Sleep(5000); - statep = cm_VolumeStateByID(volp, fidp->volume); - if (statep->state != vl_offline && - statep->state != vl_busy && - statep->state != vl_unknown) { - retry = 1; - } else { - if (!serversp) { - code = cm_GetServerList(fidp, userp, reqp, &replicated, &serverspp); - if (code == 0) { - serversp = *serverspp; - free_svr_list = 1; - } - } - lock_ObtainWrite(&cm_serverLock); - for (tsrp = serversp; tsrp; tsrp=tsrp->next) { - if (tsrp->status == srv_deleted) - continue; - if (tsrp->status == srv_busy) { - tsrp->status = srv_not_busy; - } - } - lock_ReleaseWrite(&cm_serverLock); - if (free_svr_list) { - cm_FreeServerList(serverspp, 0); - serversp = NULL; - free_svr_list = 0; - } - - cm_UpdateVolumeStatus(volp, fidp->volume); - retry = 1; - } - } else { - cm_UpdateVolumeStatus(volp, fidp->volume); + retry = 1; } + cm_UpdateVolumeStatus(volp, fidp->volume); lock_ObtainRead(&cm_volumeLock); cm_PutVolume(volp); @@ -463,15 +493,7 @@ cm_Analyze(cm_conn_t *connp, thrd_Sleep(5000); if (serversp) { - lock_ObtainWrite(&cm_serverLock); - for (tsrp = serversp; tsrp; tsrp=tsrp->next) { - if (tsrp->status == srv_deleted) - continue; - if (tsrp->status == srv_busy) { - tsrp->status = srv_not_busy; - } - } - lock_ReleaseWrite(&cm_serverLock); + cm_ResetServerBusyStatus(serversp); retry = 1; } } @@ -511,32 +533,23 @@ cm_Analyze(cm_conn_t *connp, LogEvent(EVENTLOG_WARNING_TYPE, msgID, addr, fidp->volume, cellp->name); } - lock_ObtainWrite(&cm_serverLock); - for (tsrp = serversp; tsrp; tsrp=tsrp->next) { - if (tsrp->status == srv_deleted) - continue; - if (tsrp->server == serverp && tsrp->status == srv_not_busy) { - tsrp->status = srv_busy; - if (fidp) { /* File Server query */ - lock_ReleaseWrite(&cm_serverLock); - code = cm_FindVolumeByID(cellp, fidp->volume, userp, reqp, - CM_GETVOL_FLAG_NO_LRU_UPDATE, - &volp); - if (code == 0) - statep = cm_VolumeStateByID(volp, fidp->volume); - lock_ObtainWrite(&cm_serverLock); - } - break; - } - } - lock_ReleaseWrite(&cm_serverLock); + cm_SetServerBusyStatus(serversp, serverp); - if (statep) { - cm_UpdateVolumeStatus(volp, statep->ID); - lock_ObtainRead(&cm_volumeLock); - cm_PutVolume(volp); - lock_ReleaseRead(&cm_volumeLock); - volp = NULL; + if (fidp) { /* File Server query */ + code = cm_FindVolumeByID(cellp, fidp->volume, userp, reqp, + CM_GETVOL_FLAG_NO_LRU_UPDATE, + &volp); + if (code == 0) { + statep = cm_VolumeStateByID(volp, fidp->volume); + + if (statep) + cm_UpdateVolumeStatus(volp, statep->ID); + + lock_ObtainRead(&cm_volumeLock); + cm_PutVolume(volp); + lock_ReleaseRead(&cm_volumeLock); + volp = NULL; + } } if (free_svr_list) { @@ -1165,6 +1178,14 @@ cm_Analyze(cm_conn_t *connp, reqp->flags &= ~CM_REQ_VOLUME_UPDATED; } + if ( serversp && + errorCode != VBUSY && + errorCode != VRESTARTING && + errorCode != CM_ERROR_ALLBUSY) + { + cm_ResetServerBusyStatus(serversp); + } + /* retry until we fail to find a connection */ return retry; }