From: Jeffrey Altman Date: Wed, 24 Sep 2008 23:42:53 +0000 (+0000) Subject: DEVEL15-windows-vlserver-updates-20080924 X-Git-Tag: openafs-devel-1_5_53~24 X-Git-Url: https://git.michaelhowe.org/gitweb/?a=commitdiff_plain;h=1ef154f7f3cee9b41c12629ca1dadf8e1de06bd3;p=packages%2Fo%2Fopenafs.git DEVEL15-windows-vlserver-updates-20080924 LICENSE MIT if a vlserver returns a ubik error, skip the server and retry (cherry picked from commit dd05bd3ba7be90a6348b144eb1c0a6ac1c5f43ba) --- diff --git a/src/WINNT/afsd/cm_conn.c b/src/WINNT/afsd/cm_conn.c index a09c41fff..f89efbe4f 100644 --- a/src/WINNT/afsd/cm_conn.c +++ b/src/WINNT/afsd/cm_conn.c @@ -141,12 +141,12 @@ static long cm_GetServerList(struct cm_fid *fidp, struct cm_user *userp, if (code) return code; - *serversppp = cm_GetVolServers(volp, fidp->volume); + *serversppp = cm_GetVolServers(volp, fidp->volume, userp, reqp); lock_ObtainRead(&cm_volumeLock); cm_PutVolume(volp); lock_ReleaseRead(&cm_volumeLock); - return 0; + return (*serversppp ? 0 : CM_ERROR_NOSUCHVOLUME); } /* @@ -635,6 +635,19 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp, reqp->tokenError = errorCode; retry = 1; } + } else if (errorCode >= ERROR_TABLE_BASE_U && errorCode < ERROR_TABLE_BASE_U + 256) { + /* + * We received a ubik error. its possible that the server we are + * communicating with has a corrupted database or is partitioned + * from the rest of the servers and another server might be able + * to answer our query. Therefore, we will retry the request + * and force the use of another server. + */ + if (serverp) { + reqp->tokenIdleErrorServp = serverp; + reqp->tokenError = errorCode; + retry = 1; + } } else if (errorCode == VICECONNBAD || errorCode == VICETOKENDEAD) { cm_ForceNewConnections(serverp); if ( timeLeft > 2 ) @@ -1126,7 +1139,7 @@ long cm_ConnFromVolume(struct cm_volume *volp, unsigned long volid, struct cm_us *connpp = NULL; - serverspp = cm_GetVolServers(volp, volid); + serverspp = cm_GetVolServers(volp, volid, userp, reqp); code = cm_ConnByMServers(*serverspp, userp, reqp, connpp); cm_FreeServerList(serverspp, 0); diff --git a/src/WINNT/afsd/cm_ioctl.c b/src/WINNT/afsd/cm_ioctl.c index 46dcf80f5..41f7fd1e2 100644 --- a/src/WINNT/afsd/cm_ioctl.c +++ b/src/WINNT/afsd/cm_ioctl.c @@ -908,7 +908,7 @@ cm_IoctlGetOwner(struct cm_ioctl *ioctlp, struct cm_user *userp, cm_scache_t *sc afs_int32 cm_IoctlWhereIs(struct cm_ioctl *ioctlp, struct cm_user *userp, cm_scache_t *scp, cm_req_t *reqp) { - afs_int32 code; + afs_int32 code = 0; cm_cell_t *cellp; cm_volume_t *tvp; cm_serverRef_t **tsrpp, *current; @@ -952,16 +952,19 @@ cm_IoctlWhereIs(struct cm_ioctl *ioctlp, struct cm_user *userp, cm_scache_t *scp cp = ioctlp->outDatap; - tsrpp = cm_GetVolServers(tvp, volume); - lock_ObtainRead(&cm_serverLock); - for (current = *tsrpp; current; current = current->next) { - tsp = current->server; - memcpy(cp, (char *)&tsp->addr.sin_addr.s_addr, sizeof(long)); - cp += sizeof(long); + tsrpp = cm_GetVolServers(tvp, volume, userp, reqp); + if (tsrpp == NULL) { + code = CM_ERROR_NOSUCHVOLUME; + } else { + lock_ObtainRead(&cm_serverLock); + for (current = *tsrpp; current; current = current->next) { + tsp = current->server; + memcpy(cp, (char *)&tsp->addr.sin_addr.s_addr, sizeof(long)); + cp += sizeof(long); + } + lock_ReleaseRead(&cm_serverLock); + cm_FreeServerList(tsrpp, 0); } - lock_ReleaseRead(&cm_serverLock); - cm_FreeServerList(tsrpp, 0); - /* still room for terminating NULL, add it on */ volume = 0; /* reuse vbl */ memcpy(cp, (char *)&volume, sizeof(long)); @@ -970,7 +973,7 @@ cm_IoctlWhereIs(struct cm_ioctl *ioctlp, struct cm_user *userp, cm_scache_t *scp ioctlp->outDatap = cp; cm_PutVolume(tvp); } - return 0; + return code; } /* @@ -3023,6 +3026,8 @@ cm_IoctlMemoryDump(struct cm_ioctl *ioctlp, struct cm_user *userp) cm_DumpSCache(hLogFile, cookie, 1); cm_DumpBufHashTable(hLogFile, cookie, 1); smb_DumpVCP(hLogFile, cookie, 1); + rx_DumpCalls(hLogFile, cookie); + rx_DumpPackets(hLogFile, cookie); CloseHandle(hLogFile); diff --git a/src/WINNT/afsd/cm_volume.c b/src/WINNT/afsd/cm_volume.c index 5a34d0c32..7d23c0a89 100644 --- a/src/WINNT/afsd/cm_volume.c +++ b/src/WINNT/afsd/cm_volume.c @@ -1009,11 +1009,13 @@ long cm_ForceUpdateVolume(cm_fid_t *fidp, cm_user_t *userp, cm_req_t *reqp) } /* find the appropriate servers from a volume */ -cm_serverRef_t **cm_GetVolServers(cm_volume_t *volp, afs_uint32 volume) +cm_serverRef_t **cm_GetVolServers(cm_volume_t *volp, afs_uint32 volume, cm_user_t *userp, cm_req_t *reqp) { cm_serverRef_t **serverspp; cm_serverRef_t *current; + int firstTry = 1; + start: lock_ObtainWrite(&cm_serverLock); if (volume == volp->vol[RWVOL].ID) @@ -1022,9 +1024,21 @@ cm_serverRef_t **cm_GetVolServers(cm_volume_t *volp, afs_uint32 volume) serverspp = &volp->vol[ROVOL].serversp; else if (volume == volp->vol[BACKVOL].ID) serverspp = &volp->vol[BACKVOL].serversp; - else - osi_panic("bad volume ID in cm_GetVolServers", __FILE__, __LINE__); - + else { + lock_ReleaseWrite(&cm_serverLock); + if (firstTry) { + afs_int32 code; + firstTry = 0; + lock_ObtainWrite(&volp->rw); + volp->flags |= CM_VOLUMEFLAG_RESET; + code = cm_UpdateVolumeLocation(volp->cellp, userp, reqp, volp); + lock_ReleaseWrite(&volp->rw); + if (code == 0) + goto start; + } + return NULL; + } + /* * Increment the refCount on deleted items as well. * They will be freed by cm_FreeServerList when they get to zero diff --git a/src/WINNT/afsd/cm_volume.h b/src/WINNT/afsd/cm_volume.h index 9a2763926..b32d72e0c 100644 --- a/src/WINNT/afsd/cm_volume.h +++ b/src/WINNT/afsd/cm_volume.h @@ -92,7 +92,8 @@ extern long cm_GetROVolumeID(cm_volume_t *volp); extern long cm_ForceUpdateVolume(struct cm_fid *fidp, cm_user_t *userp, cm_req_t *reqp); -extern cm_serverRef_t **cm_GetVolServers(cm_volume_t *volp, afs_uint32 volume); +extern cm_serverRef_t **cm_GetVolServers(cm_volume_t *volp, afs_uint32 volume, + cm_user_t *userp, cm_req_t *reqp); extern void cm_ChangeRankVolume(cm_server_t *tsp);