From: Jeffrey Altman Date: Fri, 18 Apr 2008 17:46:07 +0000 (+0000) Subject: DEVEL15-windows-volume-status-20080418 X-Git-Tag: openafs-devel-1_5_35~11 X-Git-Url: https://git.michaelhowe.org/gitweb/?a=commitdiff_plain;h=a03cad17d8c84860340b32785163d358657766f4;p=packages%2Fo%2Fopenafs.git DEVEL15-windows-volume-status-20080418 LICENSE MIT A problem was discovered with cm_Analyze when serverRef instances are marked offline. cm_CheckOfflineVolume() was not resetting the serverRef state. cm_Analyze was also waiting in sleep calls when it was it is clear that waiting would not result in a change of state. cm_CheckOfflineVolume() was updated to always reset the serverRef state and indicate that the volume should be considered online for the purpose of retrying when the serverRef state is reset to srv_not_busy. The problems identified in the MIT dev.mit.edu stress test environment are resolved by these changes. (cherry picked from commit c3eb805fc0309efca059a26b946eb4350417867e) --- diff --git a/src/WINNT/afsd/cm_conn.c b/src/WINNT/afsd/cm_conn.c index baca5e385..d31aa4f2a 100644 --- a/src/WINNT/afsd/cm_conn.c +++ b/src/WINNT/afsd/cm_conn.c @@ -133,7 +133,9 @@ static long cm_GetServerList(struct cm_fid *fidp, struct cm_user *userp, *serversppp = cm_GetVolServers(volp, fidp->volume); + lock_ObtainRead(&cm_volumeLock); cm_PutVolume(volp); + lock_ReleaseRead(&cm_volumeLock); return 0; } @@ -272,7 +274,8 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp, else if (errorCode == CM_ERROR_ALLDOWN) { osi_Log0(afsd_logp, "cm_Analyze passed CM_ERROR_ALLDOWN."); /* Servers marked DOWN will be restored by the background daemon - * thread as they become available. + * thread as they become available. The volume status is + * updated as the server state changes. */ } @@ -281,43 +284,45 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp, /* Volume instances marked offline will be restored by the * background daemon thread as they become available */ - if (timeLeft > 7 && fidp) { - thrd_Sleep(5000); - + if (fidp) { code = cm_FindVolumeByID(cellp, fidp->volume, userp, reqp, - CM_GETVOL_FLAG_NO_LRU_UPDATE, - &volp); + CM_GETVOL_FLAG_NO_LRU_UPDATE, + &volp); if (code == 0) { - statep = cm_VolumeStateByID(volp, fidp->volume); - if (statep->state != vl_offline && statep->state != vl_unknown) { - retry = 1; - } else { - if (cm_CheckOfflineVolume(volp, statep->ID)) + if (timeLeft > 7) { + thrd_Sleep(5000); + + /* cm_CheckOfflineVolume() resets the serverRef state */ + if (cm_CheckOfflineVolume(volp, fidp->volume)) retry = 1; + } else { + cm_UpdateVolumeStatus(volp, fidp->volume); } - + lock_ObtainRead(&cm_volumeLock); cm_PutVolume(volp); + lock_ReleaseRead(&cm_volumeLock); + volp = NULL; } - } + } } else if (errorCode == CM_ERROR_ALLBUSY) { /* Volumes that are busy cannot be determined to be non-busy * without actually attempting to access them. */ osi_Log0(afsd_logp, "cm_Analyze passed CM_ERROR_ALLBUSY."); - if (timeLeft > 7) { - thrd_Sleep(5000); - - if (fidp) { /* File Server query */ - code = cm_FindVolumeByID(cellp, fidp->volume, userp, reqp, - CM_GETVOL_FLAG_NO_LRU_UPDATE, - &volp); - if (code == 0) { + if (fidp) { /* File Server query */ + code = cm_FindVolumeByID(cellp, fidp->volume, userp, reqp, + CM_GETVOL_FLAG_NO_LRU_UPDATE, + &volp); + if (code == 0) { + if (timeLeft > 7) { + thrd_Sleep(5000); + statep = cm_VolumeStateByID(volp, fidp->volume); if (statep->state != vl_offline && - statep->state != vl_busy && - statep->state != vl_unknown) { + statep->state != vl_busy && + statep->state != vl_unknown) { retry = 1; } else { if (!serversp) { @@ -331,7 +336,7 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp, for (tsrp = serversp; tsrp; tsrp=tsrp->next) { if (tsrp->status == srv_busy) { tsrp->status = srv_not_busy; - } + } } lock_ReleaseWrite(&cm_serverLock); if (free_svr_list) { @@ -339,13 +344,22 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp, *serverspp = serversp; } - cm_UpdateVolumeStatus(volp, statep->ID); + cm_UpdateVolumeStatus(volp, fidp->volume); retry = 1; } - - cm_PutVolume(volp); + } else { + cm_UpdateVolumeStatus(volp, fidp->volume); } - } else { /* VL Server query */ + + lock_ObtainRead(&cm_volumeLock); + cm_PutVolume(volp); + lock_ReleaseRead(&cm_volumeLock); + volp = NULL; + } + } else { /* VL Server query */ + if (timeLeft > 7) { + thrd_Sleep(5000); + if (serversp) { lock_ObtainWrite(&cm_serverLock); for (tsrp = serversp; tsrp; tsrp=tsrp->next) { @@ -387,7 +401,10 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp, if (statep) { cm_UpdateVolumeStatus(volp, statep->ID); + lock_ObtainRead(&cm_volumeLock); cm_PutVolume(volp); + lock_ReleaseRead(&cm_volumeLock); + volp = NULL; } if (free_svr_list) { @@ -494,7 +511,10 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp, if (statep) { cm_UpdateVolumeStatus(volp, statep->ID); + lock_ObtainRead(&cm_volumeLock); cm_PutVolume(volp); + lock_ReleaseRead(&cm_volumeLock); + volp = NULL; } if (free_svr_list) { diff --git a/src/WINNT/afsd/cm_volume.c b/src/WINNT/afsd/cm_volume.c index 4bdd89b6d..7df7a0390 100644 --- a/src/WINNT/afsd/cm_volume.c +++ b/src/WINNT/afsd/cm_volume.c @@ -698,9 +698,11 @@ long cm_FindVolumeByID(cm_cell_t *cellp, afs_uint32 volumeID, cm_user_t *userp, cm_AdjustVolumeLRU(volp); lock_ReleaseWrite(&cm_volumeLock); } - } else + } else { + lock_ObtainRead(&cm_volumeLock); cm_PutVolume(volp); - + lock_ReleaseRead(&cm_volumeLock); + } return code; } @@ -863,9 +865,11 @@ long cm_FindVolumeByName(struct cm_cell *cellp, char *volumeNamep, cm_AdjustVolumeLRU(volp); lock_ReleaseWrite(&cm_volumeLock); } - } else + } else { + lock_ObtainRead(&cm_volumeLock); cm_PutVolume(volp); - + lock_ReleaseRead(&cm_volumeLock); + } return code; } @@ -928,6 +932,9 @@ void cm_ForceUpdateVolume(cm_fid_t *fidp, cm_user_t *userp, cm_req_t *reqp) lock_ReleaseRead(&cm_volumeLock); + if (!volp) + return; + /* update it */ cm_data.mountRootGen = time(NULL); lock_ObtainWrite(&volp->rw); @@ -949,7 +956,9 @@ void cm_ForceUpdateVolume(cm_fid_t *fidp, cm_user_t *userp, cm_req_t *reqp) #endif lock_ReleaseWrite(&volp->rw); + lock_ObtainRead(&cm_volumeLock); cm_PutVolume(volp); + lock_ReleaseRead(&cm_volumeLock); } /* find the appropriate servers from a volume */ @@ -1079,110 +1088,122 @@ cm_CheckOfflineVolume(cm_volume_t *volp, afs_uint32 volID) } if (volp->vol[RWVOL].ID != 0 && (!volID || volID == volp->vol[RWVOL].ID) && - volp->vol[RWVOL].serversp && - (volp->vol[RWVOL].state == vl_busy || volp->vol[RWVOL].state == vl_offline || volp->vol[RWVOL].state == vl_unknown)) { - cm_InitReq(&req); - + volp->vol[RWVOL].serversp) { + for (serversp = volp->vol[RWVOL].serversp; serversp; serversp = serversp->next) { - if (serversp->status == srv_busy || serversp->status == srv_offline) + if (serversp->status == srv_busy || serversp->status == srv_offline) { serversp->status = srv_not_busy; + online = 1; + } } - lock_ReleaseWrite(&volp->rw); - do { - code = cm_ConnFromVolume(volp, volp->vol[RWVOL].ID, cm_rootUserp, &req, &connp); - if (code) - continue; + if (volp->vol[RWVOL].state == vl_busy || volp->vol[RWVOL].state == vl_offline || volp->vol[RWVOL].state == vl_unknown) { + cm_InitReq(&req); - callp = cm_GetRxConn(connp); - code = RXAFS_GetVolumeStatus(callp, volp->vol[RWVOL].ID, - &volStat, &Name, &OfflineMsg, &MOTD); - rx_PutConnection(callp); + lock_ReleaseWrite(&volp->rw); + do { + code = cm_ConnFromVolume(volp, volp->vol[RWVOL].ID, cm_rootUserp, &req, &connp); + if (code) + continue; - } while (cm_Analyze(connp, cm_rootUserp, &req, NULL, NULL, NULL, NULL, code)); - code = cm_MapRPCError(code, &req); + callp = cm_GetRxConn(connp); + code = RXAFS_GetVolumeStatus(callp, volp->vol[RWVOL].ID, + &volStat, &Name, &OfflineMsg, &MOTD); + rx_PutConnection(callp); - lock_ObtainWrite(&volp->rw); - if (code == 0 && volStat.Online) { - cm_VolumeStatusNotification(volp, volp->vol[RWVOL].ID, volp->vol[RWVOL].state, vl_online); - volp->vol[RWVOL].state = vl_online; - online = 1; - } else if (code == CM_ERROR_NOACCESS) { - cm_VolumeStatusNotification(volp, volp->vol[RWVOL].ID, volp->vol[RWVOL].state, vl_unknown); - volp->vol[RWVOL].state = vl_unknown; - online = 1; + } while (cm_Analyze(connp, cm_rootUserp, &req, NULL, NULL, NULL, NULL, code)); + code = cm_MapRPCError(code, &req); + + lock_ObtainWrite(&volp->rw); + if (code == 0 && volStat.Online) { + cm_VolumeStatusNotification(volp, volp->vol[RWVOL].ID, volp->vol[RWVOL].state, vl_online); + volp->vol[RWVOL].state = vl_online; + online = 1; + } else if (code == CM_ERROR_NOACCESS) { + cm_VolumeStatusNotification(volp, volp->vol[RWVOL].ID, volp->vol[RWVOL].state, vl_unknown); + volp->vol[RWVOL].state = vl_unknown; + online = 1; + } } } if (volp->vol[ROVOL].ID != 0 && (!volID || volID == volp->vol[ROVOL].ID) && - volp->vol[ROVOL].serversp && - (volp->vol[ROVOL].state == vl_busy || volp->vol[ROVOL].state == vl_offline || volp->vol[ROVOL].state == vl_unknown)) { - cm_InitReq(&req); + volp->vol[ROVOL].serversp) { for (serversp = volp->vol[ROVOL].serversp; serversp; serversp = serversp->next) { - if (serversp->status == srv_busy || serversp->status == srv_offline) + if (serversp->status == srv_busy || serversp->status == srv_offline) { serversp->status = srv_not_busy; + online = 1; + } } - lock_ReleaseWrite(&volp->rw); - do { - code = cm_ConnFromVolume(volp, volp->vol[ROVOL].ID, cm_rootUserp, &req, &connp); - if (code) - continue; + if (volp->vol[ROVOL].state == vl_busy || volp->vol[ROVOL].state == vl_offline || volp->vol[ROVOL].state == vl_unknown) { + cm_InitReq(&req); - callp = cm_GetRxConn(connp); - code = RXAFS_GetVolumeStatus(callp, volp->vol[ROVOL].ID, - &volStat, &Name, &OfflineMsg, &MOTD); - rx_PutConnection(callp); + lock_ReleaseWrite(&volp->rw); + do { + code = cm_ConnFromVolume(volp, volp->vol[ROVOL].ID, cm_rootUserp, &req, &connp); + if (code) + continue; - } while (cm_Analyze(connp, cm_rootUserp, &req, NULL, NULL, NULL, NULL, code)); - code = cm_MapRPCError(code, &req); + callp = cm_GetRxConn(connp); + code = RXAFS_GetVolumeStatus(callp, volp->vol[ROVOL].ID, + &volStat, &Name, &OfflineMsg, &MOTD); + rx_PutConnection(callp); - lock_ObtainWrite(&volp->rw); - if (code == 0 && volStat.Online) { - cm_VolumeStatusNotification(volp, volp->vol[ROVOL].ID, volp->vol[ROVOL].state, vl_online); - volp->vol[ROVOL].state = vl_online; - online = 1; - } else if (code == CM_ERROR_NOACCESS) { - cm_VolumeStatusNotification(volp, volp->vol[ROVOL].ID, volp->vol[ROVOL].state, vl_unknown); - volp->vol[ROVOL].state = vl_unknown; - online = 1; + } while (cm_Analyze(connp, cm_rootUserp, &req, NULL, NULL, NULL, NULL, code)); + code = cm_MapRPCError(code, &req); + + lock_ObtainWrite(&volp->rw); + if (code == 0 && volStat.Online) { + cm_VolumeStatusNotification(volp, volp->vol[ROVOL].ID, volp->vol[ROVOL].state, vl_online); + volp->vol[ROVOL].state = vl_online; + online = 1; + } else if (code == CM_ERROR_NOACCESS) { + cm_VolumeStatusNotification(volp, volp->vol[ROVOL].ID, volp->vol[ROVOL].state, vl_unknown); + volp->vol[ROVOL].state = vl_unknown; + online = 1; + } } } if (volp->vol[BACKVOL].ID != 0 && (!volID || volID == volp->vol[BACKVOL].ID) && - volp->vol[BACKVOL].serversp && - (volp->vol[BACKVOL].state == vl_busy || volp->vol[BACKVOL].state == vl_offline || volp->vol[BACKVOL].state == vl_unknown)) { - cm_InitReq(&req); - + volp->vol[BACKVOL].serversp) { + for (serversp = volp->vol[BACKVOL].serversp; serversp; serversp = serversp->next) { - if (serversp->status == srv_busy || serversp->status == srv_offline) + if (serversp->status == srv_busy || serversp->status == srv_offline) { serversp->status = srv_not_busy; + online = 1; + } } - lock_ReleaseWrite(&volp->rw); - do { - code = cm_ConnFromVolume(volp, volp->vol[BACKVOL].ID, cm_rootUserp, &req, &connp); - if (code) - continue; + if (volp->vol[BACKVOL].state == vl_busy || volp->vol[BACKVOL].state == vl_offline || volp->vol[BACKVOL].state == vl_unknown) { + cm_InitReq(&req); - callp = cm_GetRxConn(connp); - code = RXAFS_GetVolumeStatus(callp, volp->vol[BACKVOL].ID, - &volStat, &Name, &OfflineMsg, &MOTD); - rx_PutConnection(callp); + lock_ReleaseWrite(&volp->rw); + do { + code = cm_ConnFromVolume(volp, volp->vol[BACKVOL].ID, cm_rootUserp, &req, &connp); + if (code) + continue; - } while (cm_Analyze(connp, cm_rootUserp, &req, NULL, NULL, NULL, NULL, code)); - code = cm_MapRPCError(code, &req); + callp = cm_GetRxConn(connp); + code = RXAFS_GetVolumeStatus(callp, volp->vol[BACKVOL].ID, + &volStat, &Name, &OfflineMsg, &MOTD); + rx_PutConnection(callp); - lock_ObtainWrite(&volp->rw); - if (code == 0 && volStat.Online) { - cm_VolumeStatusNotification(volp, volp->vol[BACKVOL].ID, volp->vol[BACKVOL].state, vl_online); - volp->vol[BACKVOL].state = vl_online; - online = 1; - } else if (code == CM_ERROR_NOACCESS) { - cm_VolumeStatusNotification(volp, volp->vol[BACKVOL].ID, volp->vol[BACKVOL].state, vl_unknown); - volp->vol[BACKVOL].state = vl_unknown; - online = 1; + } while (cm_Analyze(connp, cm_rootUserp, &req, NULL, NULL, NULL, NULL, code)); + code = cm_MapRPCError(code, &req); + + lock_ObtainWrite(&volp->rw); + if (code == 0 && volStat.Online) { + cm_VolumeStatusNotification(volp, volp->vol[BACKVOL].ID, volp->vol[BACKVOL].state, vl_online); + volp->vol[BACKVOL].state = vl_online; + online = 1; + } else if (code == CM_ERROR_NOACCESS) { + cm_VolumeStatusNotification(volp, volp->vol[BACKVOL].ID, volp->vol[BACKVOL].state, vl_unknown); + volp->vol[BACKVOL].state = vl_unknown; + online = 1; + } } }