From: Jeffrey Altman Date: Sat, 13 Sep 2008 05:20:48 +0000 (+0000) Subject: DEVEL15-windows-vnovol-20080912 X-Git-Tag: openafs-devel-1_5_53~34 X-Git-Url: https://git.michaelhowe.org/gitweb/?a=commitdiff_plain;h=00e8c7e1c61b314e48ae753b606c7b29dc4c882c;p=packages%2Fo%2Fopenafs.git DEVEL15-windows-vnovol-20080912 LICENSE MIT The cm_serverRef_t list reference counts were undercounting and prematurely freeing the server lists for volumes that experienced VNOVOL and VMOVED errors. cm_Analyze() must release the server list before forcibly updating the volume location info. Otherwise, the list that gets freed is the old one concatenated with the new one. Add more trace messages. (cherry picked from commit 1456a67c5ca024c523e0fc3edcba720780d4be9e) --- diff --git a/src/WINNT/afsd/cm_callback.c b/src/WINNT/afsd/cm_callback.c index 899c9b173..1ce5b001d 100644 --- a/src/WINNT/afsd/cm_callback.c +++ b/src/WINNT/afsd/cm_callback.c @@ -1867,6 +1867,8 @@ long cm_CBServersUp(cm_scache_t *scp, time_t * downTime) return 1; for (found = 0,tsrp = statep->serversp; tsrp; tsrp=tsrp->next) { + if (tsrp->status == srv_deleted) + continue; if (tsrp->server == scp->cbServerp) found = 1; if (tsrp->server->downTime > *downTime) diff --git a/src/WINNT/afsd/cm_conn.c b/src/WINNT/afsd/cm_conn.c index 81b17ec86..a09c41fff 100644 --- a/src/WINNT/afsd/cm_conn.c +++ b/src/WINNT/afsd/cm_conn.c @@ -232,6 +232,8 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp, if (cellp == NULL && serversp) { struct cm_serverRef * refp; for ( refp=serversp ; cellp == NULL && refp != NULL; refp=refp->next) { + if (refp->status == srv_deleted) + continue; if ( refp->server ) cellp = refp->server->cellp; } @@ -335,6 +337,8 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp, } lock_ObtainWrite(&cm_serverLock); for (tsrp = serversp; tsrp; tsrp=tsrp->next) { + if (tsrp->status == srv_deleted) + continue; if (tsrp->status == srv_busy) { tsrp->status = srv_not_busy; } @@ -342,7 +346,8 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp, lock_ReleaseWrite(&cm_serverLock); if (free_svr_list) { cm_FreeServerList(&serversp, 0); - *serverspp = serversp; + *serverspp = serversp = NULL; + free_svr_list = 0; } cm_UpdateVolumeStatus(volp, fidp->volume); @@ -364,6 +369,8 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp, if (serversp) { lock_ObtainWrite(&cm_serverLock); for (tsrp = serversp; tsrp; tsrp=tsrp->next) { + if (tsrp->status == srv_deleted) + continue; if (tsrp->status == srv_busy) { tsrp->status = srv_not_busy; } @@ -386,6 +393,8 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp, } lock_ObtainWrite(&cm_serverLock); for (tsrp = serversp; tsrp; tsrp=tsrp->next) { + if (tsrp->status == srv_deleted) + continue; if (tsrp->server == serverp && tsrp->status == srv_not_busy) { tsrp->status = srv_busy; if (fidp) { /* File Server query */ @@ -412,7 +421,8 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp, if (free_svr_list) { cm_FreeServerList(&serversp, 0); - *serverspp = serversp; + *serverspp = serversp = NULL; + free_svr_list = 0; } retry = 1; } @@ -467,13 +477,15 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp, if (!serversp && fidp) { code = cm_GetServerList(fidp, userp, reqp, &serverspp); if (code == 0) { - serversp = *serverspp; + serversp = *serverspp = NULL; free_svr_list = 1; } } lock_ObtainWrite(&cm_serverLock); for (tsrp = serversp; tsrp; tsrp=tsrp->next) { + if (tsrp->status == srv_deleted) + continue; if (tsrp->server == serverp) { /* REDIRECT */ if (errorCode == VMOVED || errorCode == VNOVOL) { @@ -483,38 +495,46 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp, } else { tsrp->status = srv_offline; } - - if (fidp) { /* File Server query */ - lock_ReleaseWrite(&cm_serverLock); - code = cm_FindVolumeByID(cellp, fidp->volume, userp, reqp, - CM_GETVOL_FLAG_NO_LRU_UPDATE, - &volp); - if (code == 0) - cm_VolumeStateByID(volp, fidp->volume); - lock_ObtainWrite(&cm_serverLock); - } } } lock_ReleaseWrite(&cm_serverLock); - if (fidp && (errorCode == VMOVED || errorCode == VNOVOL)) { - code = cm_ForceUpdateVolume(fidp, userp, reqp); - if (code) - timeLeft = 0; /* prevent a retry on failure */ + /* Free the server list before cm_ForceUpdateVolume is called */ + if (free_svr_list) { + cm_FreeServerList(&serversp, 0); + *serverspp = serversp = NULL; + free_svr_list = 0; } - if (statep) { - cm_UpdateVolumeStatus(volp, statep->ID); - lock_ObtainRead(&cm_volumeLock); - cm_PutVolume(volp); - lock_ReleaseRead(&cm_volumeLock); - volp = NULL; - } + if (fidp) { /* File Server query */ + code = cm_FindVolumeByID(cellp, fidp->volume, userp, reqp, + CM_GETVOL_FLAG_NO_LRU_UPDATE, + &volp); + if (code == 0) + statep = cm_VolumeStateByID(volp, fidp->volume); + + if (errorCode == VMOVED || errorCode == VNOVOL) { + code = cm_ForceUpdateVolume(fidp, userp, reqp); + if (code) + timeLeft = 0; /* prevent a retry on failure */ + osi_Log3(afsd_logp, "cm_Analyze called cm_ForceUpdateVolume cell %u vol %u code 0x%x", + fidp->cell, fidp->volume, code); + } - if (free_svr_list) { - cm_FreeServerList(&serversp, 0); - *serverspp = serversp; + if (statep) { + cm_UpdateVolumeStatus(volp, statep->ID); + osi_Log3(afsd_logp, "cm_Analyze NewVolState cell %u vol %u state %u", + fidp->cell, fidp->volume, statep->state); + } + + if (volp) { + lock_ObtainRead(&cm_volumeLock); + cm_PutVolume(volp); + lock_ReleaseRead(&cm_volumeLock); + volp = NULL; + } } + if ( timeLeft > 2 ) retry = 1; } else if ( errorCode == VNOVNODE ) { @@ -798,6 +818,9 @@ long cm_ConnByMServers(cm_serverRef_t *serversp, cm_user_t *usersp, lock_ObtainRead(&cm_serverLock); for (tsrp = serversp; tsrp; tsrp=tsrp->next) { + if (tsrp->status == srv_deleted) + continue; + tsp = tsrp->server; if (reqp->tokenIdleErrorServp) { /* @@ -813,7 +836,7 @@ long cm_ConnByMServers(cm_serverRef_t *serversp, cm_user_t *usersp, if (tsp) { cm_GetServerNoLock(tsp); lock_ReleaseRead(&cm_serverLock); - if ((tsrp->status != srv_deleted) && !(tsp->flags & CM_SERVERFLAG_DOWN)) { + if (!(tsp->flags & CM_SERVERFLAG_DOWN)) { allDown = 0; if (tsrp->status == srv_busy) { allOffline = 0; @@ -1042,8 +1065,10 @@ long cm_ServerAvailable(struct cm_fid *fidp, struct cm_user *userp) lock_ObtainRead(&cm_serverLock); for (tsrp = *serverspp; tsrp; tsrp=tsrp->next) { + if (tsrp->status == srv_deleted) + continue; tsp = tsrp->server; - if ((tsrp->status != srv_deleted) && !(tsp->flags & CM_SERVERFLAG_DOWN)) { + if (!(tsp->flags & CM_SERVERFLAG_DOWN)) { allDown = 0; if (tsrp->status == srv_busy) { allOffline = 0; diff --git a/src/WINNT/afsd/cm_ioctl.c b/src/WINNT/afsd/cm_ioctl.c index 0bda7f65b..46dcf80f5 100644 --- a/src/WINNT/afsd/cm_ioctl.c +++ b/src/WINNT/afsd/cm_ioctl.c @@ -3049,10 +3049,12 @@ cm_CheckServersStatus(cm_serverRef_t *serversp) lock_ObtainRead(&cm_serverLock); for (tsrp = serversp; tsrp; tsrp=tsrp->next) { + if (tsrp->status == srv_deleted) + continue; if (tsp = tsrp->server) { cm_GetServerNoLock(tsp); lock_ReleaseRead(&cm_serverLock); - if ((tsrp->status == srv_deleted) && !(tsp->flags & CM_SERVERFLAG_DOWN)) { + if (!(tsp->flags & CM_SERVERFLAG_DOWN)) { allDown = 0; if (tsrp->status == srv_busy) { allOffline = 0; diff --git a/src/WINNT/afsd/cm_server.c b/src/WINNT/afsd/cm_server.c index eba1fd246..a2748d5f1 100644 --- a/src/WINNT/afsd/cm_server.c +++ b/src/WINNT/afsd/cm_server.c @@ -1034,6 +1034,8 @@ LONG_PTR cm_ChecksumServerList(cm_serverRef_t *serversp) lock_ObtainRead(&cm_serverLock); for (tsrp = serversp; tsrp; tsrp=tsrp->next) { + if (tsrp->status == srv_deleted) + continue; if (first) first = 0; else @@ -1259,6 +1261,9 @@ void cm_FreeServerList(cm_serverRef_t** list, afs_uint32 flags) cm_serverRef_t **nextp = 0; cm_serverRef_t * next = 0; + if (*list == NULL) + return; + lock_ObtainWrite(&cm_serverLock); while (*current) diff --git a/src/WINNT/afsd/cm_volume.c b/src/WINNT/afsd/cm_volume.c index 5af199377..d0dd1d601 100644 --- a/src/WINNT/afsd/cm_volume.c +++ b/src/WINNT/afsd/cm_volume.c @@ -227,7 +227,9 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t * cm_UpdateCell(cellp, 0); /* now we have volume structure locked and held; make RPC to fill it */ - osi_Log2(afsd_logp, "CALL VL_GetEntryByName{UNO} name %s:%s", volp->cellp->name, volp->namep); + osi_Log2(afsd_logp, "CALL VL_GetEntryByName{UNO} name %s:%s", + osi_LogSaveString(afsd_logp,volp->cellp->name), + osi_LogSaveString(afsd_logp,volp->namep)); do { struct rx_connection * rxconnp; @@ -254,10 +256,12 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t * code = cm_MapVLRPCError(code, reqp); if ( code ) osi_Log3(afsd_logp, "CALL VL_GetEntryByName{UNO} name %s:%s FAILURE, code 0x%x", - volp->cellp->name, volp->namep, code); + osi_LogSaveString(afsd_logp,volp->cellp->name), + osi_LogSaveString(afsd_logp,volp->namep), code); else osi_Log2(afsd_logp, "CALL VL_GetEntryByName{UNO} name %s:%s SUCCESS", - volp->cellp->name, volp->namep); + osi_LogSaveString(afsd_logp,volp->cellp->name), + osi_LogSaveString(afsd_logp,volp->namep)); } /* We can end up here with code == CM_ERROR_NOSUCHVOLUME if the base volume name @@ -271,7 +275,8 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t * snprintf(name, VL_MAXNAMELEN, "%s.readonly", volp->namep); /* now we have volume structure locked and held; make RPC to fill it */ - osi_Log2(afsd_logp, "CALL VL_GetEntryByName{UNO} name %s:%s", volp->cellp->name, + osi_Log2(afsd_logp, "CALL VL_GetEntryByName{UNO} name %s:%s", + osi_LogSaveString(afsd_logp,volp->cellp->name), osi_LogSaveString(afsd_logp,name)); do { struct rx_connection * rxconnp; @@ -299,10 +304,12 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t * code = cm_MapVLRPCError(code, reqp); if ( code ) osi_Log3(afsd_logp, "CALL VL_GetEntryByName{UNO} name %s:%s FAILURE, code 0x%x", - volp->cellp->name, osi_LogSaveString(afsd_logp,name), code); + osi_LogSaveString(afsd_logp,volp->cellp->name), + osi_LogSaveString(afsd_logp,name), code); else osi_Log2(afsd_logp, "CALL VL_GetEntryByName{UNO} name %s:%s SUCCESS", - volp->cellp->name, osi_LogSaveString(afsd_logp,name)); + osi_LogSaveString(afsd_logp,volp->cellp->name), + osi_LogSaveString(afsd_logp,name)); } lock_ObtainWrite(&volp->rw); @@ -427,7 +434,8 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t * name[len - 9] = '\0'; } - osi_Log2(afsd_logp, "cm_UpdateVolume name %s -> %s", volp->namep, name); + osi_Log2(afsd_logp, "cm_UpdateVolume name %s -> %s", + osi_LogSaveString(afsd_logp,volp->namep), osi_LogSaveString(afsd_logp,name)); if (volp->flags & CM_VOLUMEFLAG_IN_HASH) cm_RemoveVolumeFromNameHashTable(volp); @@ -558,6 +566,7 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t * cm_RandomizeServer(&volp->vol[ROVOL].serversp); } + rwNewstate = rwServers_alldown ? vl_alldown : vl_online; roNewstate = roServers_alldown ? vl_alldown : vl_online; bkNewstate = bkServers_alldown ? vl_alldown : vl_online; @@ -604,7 +613,8 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t * volp->flags &= ~CM_VOLUMEFLAG_UPDATING_VL; osi_Log4(afsd_logp, "cm_UpdateVolumeLocation done, waking others name %s:%s flags 0x%x code 0x%x", - volp->cellp->name, volp->namep, volp->flags, code); + osi_LogSaveString(afsd_logp,volp->cellp->name), + osi_LogSaveString(afsd_logp,volp->namep), volp->flags, code); osi_Wakeup((LONG_PTR) &volp->flags); return code; @@ -846,6 +856,7 @@ long cm_FindVolumeByName(struct cm_cell *cellp, char *volumeNamep, cm_VolumeStatusNotification(volp, volp->vol[volType].ID, volp->vol[volType].state, vl_unknown); volp->vol[volType].ID = 0; cm_SetFid(&volp->vol[volType].dotdotFid, 0, 0, 0, 0); + cm_FreeServerList(&volp->vol[volType].serversp, CM_FREESERVERLIST_DELETE); } } else { volp = &cm_data.volumeBaseAddress[cm_data.currentVolumes++]; @@ -999,7 +1010,7 @@ long cm_ForceUpdateVolume(cm_fid_t *fidp, cm_user_t *userp, cm_req_t *reqp) cm_serverRef_t **cm_GetVolServers(cm_volume_t *volp, afs_uint32 volume) { cm_serverRef_t **serverspp; - cm_serverRef_t *current;; + cm_serverRef_t *current; lock_ObtainWrite(&cm_serverLock); @@ -1012,7 +1023,11 @@ cm_serverRef_t **cm_GetVolServers(cm_volume_t *volp, afs_uint32 volume) else osi_panic("bad volume ID in cm_GetVolServers", __FILE__, __LINE__); - for (current = *serverspp; current; current = current->next) + /* + * Increment the refCount on deleted items as well. + * They will be freed by cm_FreeServerList when they get to zero + */ + for (current = *serverspp; current; current = current->next) current->refCount++; lock_ReleaseWrite(&cm_serverLock); @@ -1120,12 +1135,14 @@ cm_CheckOfflineVolumeState(cm_volume_t *volp, cm_vol_state_t *statep, afs_uint32 alldown = 1; alldeleted = 1; for (serversp = statep->serversp; serversp; serversp = serversp->next) { - if (serversp->status != srv_deleted) { - alldeleted = 0; - *onlinep = 1; - alldown = 0; - } - if (serversp->status == srv_busy || serversp->status == srv_offline) + if (serversp->status == srv_deleted) + continue; + + alldeleted = 0; + *onlinep = 1; + alldown = 0; + + if (serversp->status == srv_busy || serversp->status == srv_offline) serversp->status = srv_not_busy; } @@ -1240,6 +1257,8 @@ cm_UpdateVolumeStatusInt(cm_volume_t *volp, struct cm_vol_state *statep) lock_ObtainWrite(&cm_serverLock); for (tsrp = statep->serversp; tsrp; tsrp=tsrp->next) { + if (tsrp->status == srv_deleted) + continue; tsp = tsrp->server; if (tsp) { cm_GetServerNoLock(tsp);