From 68dc637db6d99a48d7be0556916a8cc084843286 Mon Sep 17 00:00:00 2001 From: Jeffrey Altman Date: Fri, 25 Nov 2011 09:28:18 -0500 Subject: [PATCH] Windows: improved idle dead time handling RX_CALL_IDLE has been treated the same as RX_CALL_DEAD which is a fatal error that results in the server being marked down. This is not the appropriate behavior for an idle dead timeout error which should not result in servers being marked down. Idle dead timeouts are locally generated and are an indication that the server: a. is severely overloaded and cannot process all incoming requests in a timely fashion. b. has a partition whose underlying disk (or iSCSI, etc) is failing and all I/O requests on that device are blocking. c. has a large number of threads blocking on a single vnode and cannot process requests for other vnodes as a result. d. is malicious. RX_CALL_IDLE is distinct from RX_DEAD_CALL in that idle dead timeout handling should permit failover to replicas when they exist in a timely fashion but in the non-replica case should not be triggered until the hard dead timeout. If the request cannot be retried, it should fail with an I/O error. The client should not retry a request to the same server as a result of an idle dead timeout. In addition, RX_CALL_IDLE indicates that the client has abandoned the call but the server has not. Therefore, the client cannot determine whether or not the RPC will eventually succeed and it must discard any status information it has about the object of the RPC if the RPC could have altered the object state upon success. This patchset splits the RX_CALL_DEAD processing in cm_Analyze() to clarify that only RX_CALL_DEAD errors result in the server being marked down. Since Rx idle dead timeout processing is per connection and idle dead timeouts must differ depending upon whether or not replica sites exist, cm_ConnBy*() are extended to select a connection based upon whether or not replica sites exist. A separate connection object is used for RPCs to replicated objects as compared to RPCs to non-replicated objects (volumes or vldb). For non-replica connections the idle dead timeout is set to the hard dead timeout. For replica connections the idle dead timeout is set to the configured idle dead timeout. Idle dead timeout events and whether or not a retry was triggered are logged to the Windows Event Log. cm_Analyze() is given a new 'storeOp' parameter which is non-zero when the execute RPC could modify the data on the file server. Reviewed-on: http://gerrit.openafs.org/6118 Reviewed-by: Jeffrey Altman Tested-by: BuildBot (cherry picked from commit f768fb95f3eb3815d6225e074c43341ed2ad5347) Change-Id: If7194292be0fc2350af9f26c397bd3a1e840abdc Reviewed-on: http://gerrit.openafs.org/6830 Tested-by: BuildBot Reviewed-by: Jeffrey Altman Tested-by: Jeffrey Altman --- src/WINNT/afsd/afsd_eventlog.c | 1 + src/WINNT/afsd/afsd_eventmessages.mc | 8 + src/WINNT/afsd/cm_callback.c | 6 +- src/WINNT/afsd/cm_conn.c | 270 ++++++++++++++++++++++----- src/WINNT/afsd/cm_conn.h | 19 +- src/WINNT/afsd/cm_dcache.c | 8 +- src/WINNT/afsd/cm_ioctl.c | 8 +- src/WINNT/afsd/cm_server.c | 6 +- src/WINNT/afsd/cm_utils.c | 3 +- src/WINNT/afsd/cm_vnodeops.c | 27 ++- src/WINNT/afsd/cm_volume.c | 18 +- src/WINNT/afsd/cm_volume.h | 2 + 12 files changed, 284 insertions(+), 92 deletions(-) diff --git a/src/WINNT/afsd/afsd_eventlog.c b/src/WINNT/afsd/afsd_eventlog.c index 22f203d83..a5c8a7d95 100644 --- a/src/WINNT/afsd/afsd_eventlog.c +++ b/src/WINNT/afsd/afsd_eventlog.c @@ -252,6 +252,7 @@ LogEvent(WORD wEventType, DWORD dwEventID, ...) case MSG_ALL_SERVERS_BUSY: case MSG_ALL_SERVERS_OFFLINE: case MSG_ALL_SERVERS_DOWN: + case MSG_RX_IDLE_DEAD_TIMEOUT: wNumArgs = 2; lpArgs[0] = va_arg(listArgs, LPTSTR); StringCbPrintf(lpStrings[1],STRLEN,"%d",va_arg(listArgs,afs_int32)); diff --git a/src/WINNT/afsd/afsd_eventmessages.mc b/src/WINNT/afsd/afsd_eventmessages.mc index 31e2a0f76..f6ce08bca 100644 --- a/src/WINNT/afsd/afsd_eventmessages.mc +++ b/src/WINNT/afsd/afsd_eventmessages.mc @@ -446,4 +446,12 @@ Language=English OpenAFS Shutdown Complete. . +MessageId= +Severity=Warning +Facility=System +SymbolicName=MSG_RX_IDLE_DEAD_TIMEOUT +Language=English +Idle dead timeout when communicating with server %1. +. + ;#endif /* __AFSD_EVENTMESSAGES_H_ 1 */ diff --git a/src/WINNT/afsd/cm_callback.c b/src/WINNT/afsd/cm_callback.c index e4fd3a366..e3aaceac8 100644 --- a/src/WINNT/afsd/cm_callback.c +++ b/src/WINNT/afsd/cm_callback.c @@ -1824,7 +1824,7 @@ long cm_GetCallback(cm_scache_t *scp, struct cm_user *userp, &afsStatus, &callback, &volSync); rx_PutConnection(rxconnp); - } while (cm_Analyze(connp, userp, reqp, &sfid, &volSync, NULL, + } while (cm_Analyze(connp, userp, reqp, &sfid, 0, &volSync, NULL, &cbr, code)); code = cm_MapRPCError(code, reqp); if (code) @@ -2033,7 +2033,7 @@ cm_GiveUpAllCallbacks(cm_server_t *tsp, afs_int32 markDown) if ((tsp->type == CM_SERVER_FILE) && !(tsp->flags & CM_SERVERFLAG_DOWN)) { - code = cm_ConnByServer(tsp, cm_rootUserp, &connp); + code = cm_ConnByServer(tsp, cm_rootUserp, FALSE, &connp); if (code == 0) { rxconnp = cm_GetRxConn(connp); rx_SetConnDeadTime(rxconnp, 10); @@ -2128,7 +2128,7 @@ cm_GiveUpAllCallbacksAllServersMulti(afs_int32 markDown) lock_ReleaseRead(&cm_serverLock); serversp[nconns] = tsp; - code = cm_ConnByServer(tsp, cm_rootUserp, &conns[nconns]); + code = cm_ConnByServer(tsp, cm_rootUserp, FALSE, &conns[nconns]); if (code) { lock_ObtainRead(&cm_serverLock); cm_PutServerNoLock(tsp); diff --git a/src/WINNT/afsd/cm_conn.c b/src/WINNT/afsd/cm_conn.c index 453d3463e..257efbae3 100644 --- a/src/WINNT/afsd/cm_conn.c +++ b/src/WINNT/afsd/cm_conn.c @@ -27,6 +27,7 @@ DWORD RDRtimeout = CM_CONN_DEFAULTRDRTIMEOUT; unsigned short ConnDeadtimeout = CM_CONN_CONNDEADTIME; unsigned short HardDeadtimeout = CM_CONN_HARDDEADTIME; unsigned short IdleDeadtimeout = CM_CONN_IDLEDEADTIME; +unsigned short ReplicaIdleDeadtimeout = CM_CONN_IDLEDEADTIME_REP; unsigned short NatPingInterval = CM_CONN_NATPINGINTERVAL; #define LANMAN_WKS_PARAM_KEY "SYSTEM\\CurrentControlSet\\Services\\lanmanworkstation\\parameters" @@ -124,6 +125,13 @@ void cm_InitConn(void) afsi_log("IdleDeadTimeout is %d", IdleDeadtimeout); } dummyLen = sizeof(DWORD); + code = RegQueryValueEx(parmKey, "ReplicaIdleDeadTimeout", NULL, NULL, + (BYTE *) &dwValue, &dummyLen); + if (code == ERROR_SUCCESS) { + ReplicaIdleDeadtimeout = (unsigned short)dwValue; + afsi_log("ReplicaIdleDeadTimeout is %d", ReplicaIdleDeadtimeout); + } + dummyLen = sizeof(DWORD); code = RegQueryValueEx(parmKey, "NatPingInterval", NULL, NULL, (BYTE *) &dwValue, &dummyLen); if (code == ERROR_SUCCESS) { @@ -139,23 +147,34 @@ void cm_InitConn(void) * the smb redirector session timeout (RDRtimeout). * * The UNIX cache manager uses 120 seconds for the hard dead - * timeout and 50 seconds for the connection and idle timeouts. + * timeout and 1200 seconds for the connection and idle timeouts. * * We base our values on those while making sure we leave * enough time for overhead. + * + * To further complicate matters we need to take into account + * file server hard dead timeouts as they affect the length + * of time it takes the file server to give up when attempting + * to break callbacks to unresponsive clients. The file + * server hard dead timeout is 120 seconds. */ - if (ConnDeadtimeout == 0) { - ConnDeadtimeout = (unsigned short) ((RDRtimeout / 2) < 50 ? (RDRtimeout / 2) : 50); + afsi_log("lanmanworkstation : SessTimeout %u", RDRtimeout); + if (ConnDeadtimeout == 0) { + ConnDeadtimeout = (unsigned short) ((RDRtimeout / 2) < 50 ? (RDRtimeout / 2) : 50); afsi_log("ConnDeadTimeout is %d", ConnDeadtimeout); } - if (HardDeadtimeout == 0) { - HardDeadtimeout = (unsigned short) (RDRtimeout > 125 ? 120 : (RDRtimeout - 5)); + if (HardDeadtimeout == 0) { + HardDeadtimeout = (unsigned short) (RDRtimeout > 125 ? 120 : (RDRtimeout - 5)); afsi_log("HardDeadTimeout is %d", HardDeadtimeout); } - if (IdleDeadtimeout == 0) { - IdleDeadtimeout = (unsigned short) ConnDeadtimeout; + if (IdleDeadtimeout == 0) { + IdleDeadtimeout = 10 * (unsigned short) HardDeadtimeout; afsi_log("IdleDeadTimeout is %d", IdleDeadtimeout); } + if (ReplicaIdleDeadtimeout == 0) { + ReplicaIdleDeadtimeout = (unsigned short) HardDeadtimeout; + afsi_log("ReplicaIdleDeadTimeout is %d", ReplicaIdleDeadtimeout); + } osi_EndOnce(&once); } } @@ -167,10 +186,11 @@ void cm_InitReq(cm_req_t *reqp) } static long cm_GetServerList(struct cm_fid *fidp, struct cm_user *userp, - struct cm_req *reqp, cm_serverRef_t ***serversppp) + struct cm_req *reqp, afs_uint32 *replicated, cm_serverRef_t ***serversppp) { long code; cm_volume_t *volp = NULL; + cm_vol_state_t *volstatep = NULL; cm_cell_t *cellp = NULL; if (!fidp) { @@ -186,6 +206,8 @@ static long cm_GetServerList(struct cm_fid *fidp, struct cm_user *userp, if (code) return code; + volstatep = cm_VolumeStateByID(volp, fidp->volume); + *replicated = (volstatep->flags & CM_VOL_STATE_FLAG_REPLICATED); *serversppp = cm_GetVolServers(volp, fidp->volume, userp, reqp); lock_ObtainRead(&cm_volumeLock); @@ -211,11 +233,15 @@ static long cm_GetServerList(struct cm_fid *fidp, struct cm_user *userp, * volSyncp and/or cbrp may also be NULL. */ int -cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp, +cm_Analyze(cm_conn_t *connp, + cm_user_t *userp, + cm_req_t *reqp, struct cm_fid *fidp, + afs_uint32 storeOp, AFSVolSync *volSyncp, cm_serverRef_t * serversp, - cm_callbackRequest_t *cbrp, long errorCode) + cm_callbackRequest_t *cbrp, + long errorCode) { cm_server_t *serverp = NULL; cm_serverRef_t **serverspp = NULL; @@ -224,6 +250,8 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp, cm_ucell_t *ucellp; cm_volume_t * volp = NULL; cm_vol_state_t *statep = NULL; + cm_scache_t * scp = NULL; + afs_uint32 replicated; int retry = 0; int free_svr_list = 0; int dead_session; @@ -395,7 +423,7 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp, retry = 1; } else { if (!serversp) { - code = cm_GetServerList(fidp, userp, reqp, &serverspp); + code = cm_GetServerList(fidp, userp, reqp, &replicated, &serverspp); if (code == 0) { serversp = *serverspp; free_svr_list = 1; @@ -453,7 +481,7 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp, /* special codes: VBUSY and VRESTARTING */ else if (errorCode == VBUSY || errorCode == VRESTARTING) { if (!serversp && fidp) { - code = cm_GetServerList(fidp, userp, reqp, &serverspp); + code = cm_GetServerList(fidp, userp, reqp, &replicated, &serverspp); if (code == 0) { serversp = *serverspp; free_svr_list = 1; @@ -604,7 +632,7 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp, * from the server list if it was moved or is not present. */ if (!serversp || location_updated) { - code = cm_GetServerList(fidp, userp, reqp, &serverspp); + code = cm_GetServerList(fidp, userp, reqp, &replicated, &serverspp); if (code == 0) { serversp = *serverspp; free_svr_list = 1; @@ -655,7 +683,6 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp, retry = 1; } else if ( errorCode == VNOVNODE ) { if ( fidp ) { - cm_scache_t * scp; osi_Log4(afsd_logp, "cm_Analyze passed VNOVNODE cell %u vol %u vn %u uniq %u.", fidp->cell, fidp->volume, fidp->vnode, fidp->unique); @@ -713,6 +740,24 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp, reqp->idleError++; } + if (fidp && storeOp) + scp = cm_FindSCache(fidp); + if (scp) { + if (cm_HaveCallback(scp)) { + lock_ObtainWrite(&scp->rw); + cm_DiscardSCache(scp); + lock_ReleaseWrite(&scp->rw); + + /* + * We really should notify the redirector that we discarded + * the status information but doing so in this case is not + * safe as it can result in a deadlock with extent release + * processing. + */ + } + cm_ReleaseSCache(scp); + } + if (timeLeft > 2) { if (!fidp) { /* vldb */ retry = 1; @@ -765,7 +810,84 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp, osi_LogSaveString(afsd_logp,addr)); retry = 1; } - else if (errorCode >= -64 && errorCode < 0) { + else if (errorCode == RX_CALL_IDLE) { + /* + * RPC failed because the server failed to respond with data + * within the idle dead timeout period. This could be for a variety + * of reasons: + * 1. The server could have a bad partition such as a failed + * disk or iSCSI target and all I/O to that partition is + * blocking on the server and will never complete. + * + * 2. The server vnode may be locked by another client request + * that is taking a very long time. + * + * 3. The server may have a very long queue of requests + * pending and is unable to process this request. + * + * 4. The server could be malicious and is performing a denial + * of service attack against the client. + * + * If this is a request against a .readonly with alternate sites + * the server should be marked down for this request and the + * client should fail over to another server. If this is a + * request against a single source, the client may retry once. + */ + if (serverp) + sprintf(addr, "%d.%d.%d.%d", + ((serverp->addr.sin_addr.s_addr & 0xff)), + ((serverp->addr.sin_addr.s_addr & 0xff00)>> 8), + ((serverp->addr.sin_addr.s_addr & 0xff0000)>> 16), + ((serverp->addr.sin_addr.s_addr & 0xff000000)>> 24)); + + if (fidp) { + code = cm_FindVolumeByID(cellp, fidp->volume, userp, reqp, + CM_GETVOL_FLAG_NO_LRU_UPDATE, + &volp); + if (code == 0) { + statep = cm_VolumeStateByID(volp, fidp->volume); + + if (statep) + replicated = (statep->flags & CM_VOL_STATE_FLAG_REPLICATED); + + lock_ObtainRead(&cm_volumeLock); + cm_PutVolume(volp); + lock_ReleaseRead(&cm_volumeLock); + volp = NULL; + } + + if (storeOp) + scp = cm_FindSCache(fidp); + if (scp) { + if (cm_HaveCallback(scp)) { + lock_ObtainWrite(&scp->rw); + cm_DiscardSCache(scp); + lock_ReleaseWrite(&scp->rw); + + /* + * We really should notify the redirector that we discarded + * the status information but doing so in this case is not + * safe as it can result in a deadlock with extent release + * processing. + */ + } + cm_ReleaseSCache(scp); + } + } + + if (replicated && serverp) { + reqp->tokenIdleErrorServp = serverp; + reqp->tokenError = errorCode; + + if (timeLeft > 2) + retry = 1; + } + + LogEvent(EVENTLOG_WARNING_TYPE, MSG_RX_IDLE_DEAD_TIMEOUT, addr, retry); + osi_Log2(afsd_logp, "cm_Analyze: RPC failed due to idle dead timeout addr[%s] retry=%u", + osi_LogSaveString(afsd_logp,addr), retry); + } + else if (errorCode == RX_CALL_DEAD) { /* mark server as down */ if (serverp) sprintf(addr, "%d.%d.%d.%d", @@ -774,35 +896,69 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp, ((serverp->addr.sin_addr.s_addr & 0xff0000)>> 16), ((serverp->addr.sin_addr.s_addr & 0xff000000)>> 24)); - if (errorCode == RX_CALL_DEAD) - osi_Log2(afsd_logp, "cm_Analyze: Rx Call Dead addr[%s] forcedNew[%s]", - osi_LogSaveString(afsd_logp,addr), - (reqp->flags & CM_REQ_NEW_CONN_FORCED ? "yes" : "no")); - else - osi_Log3(afsd_logp, "cm_Analyze: Rx Misc Error[%d] addr[%s] forcedNew[%s]", - errorCode, - osi_LogSaveString(afsd_logp,addr), - (reqp->flags & CM_REQ_NEW_CONN_FORCED ? "yes" : "no")); + osi_Log2(afsd_logp, "cm_Analyze: Rx Call Dead addr[%s] forcedNew[%s]", + osi_LogSaveString(afsd_logp,addr), + (reqp->flags & CM_REQ_NEW_CONN_FORCED ? "yes" : "no")); if (serverp) { - lock_ObtainMutex(&serverp->mx); - if (errorCode == RX_CALL_DEAD && - (reqp->flags & CM_REQ_NEW_CONN_FORCED)) { + if ((reqp->flags & CM_REQ_NEW_CONN_FORCED)) { + lock_ObtainMutex(&serverp->mx); if (!(serverp->flags & CM_SERVERFLAG_DOWN)) { _InterlockedOr(&serverp->flags, CM_SERVERFLAG_DOWN); serverp->downTime = time(NULL); } + lock_ReleaseMutex(&serverp->mx); } else { - if (reqp->flags & CM_REQ_NEW_CONN_FORCED) { - reqp->tokenIdleErrorServp = serverp; - reqp->tokenError = errorCode; - } else { - reqp->flags |= CM_REQ_NEW_CONN_FORCED; - forcing_new = 1; - } + reqp->flags |= CM_REQ_NEW_CONN_FORCED; + forcing_new = 1; + cm_ForceNewConnections(serverp); + } + } + + if (fidp && storeOp) + scp = cm_FindSCache(fidp); + if (scp) { + if (cm_HaveCallback(scp)) { + lock_ObtainWrite(&scp->rw); + cm_DiscardSCache(scp); + lock_ReleaseWrite(&scp->rw); + + /* + * We really should notify the redirector that we discarded + * the status information but doing so in this case is not + * safe as it can result in a deadlock with extent release + * processing. + */ + } + cm_ReleaseSCache(scp); + } + + if ( timeLeft > 2 ) + retry = 1; + } + else if (errorCode >= -64 && errorCode < 0) { + /* mark server as down */ + if (serverp) + sprintf(addr, "%d.%d.%d.%d", + ((serverp->addr.sin_addr.s_addr & 0xff)), + ((serverp->addr.sin_addr.s_addr & 0xff00)>> 8), + ((serverp->addr.sin_addr.s_addr & 0xff0000)>> 16), + ((serverp->addr.sin_addr.s_addr & 0xff000000)>> 24)); + + osi_Log3(afsd_logp, "cm_Analyze: Rx Misc Error[%d] addr[%s] forcedNew[%s]", + errorCode, + osi_LogSaveString(afsd_logp,addr), + (reqp->flags & CM_REQ_NEW_CONN_FORCED ? "yes" : "no")); + + if (serverp) { + if (reqp->flags & CM_REQ_NEW_CONN_FORCED) { + reqp->tokenIdleErrorServp = serverp; + reqp->tokenError = errorCode; + } else { + reqp->flags |= CM_REQ_NEW_CONN_FORCED; + forcing_new = 1; + cm_ForceNewConnections(serverp); } - lock_ReleaseMutex(&serverp->mx); - cm_ForceNewConnections(serverp); } if ( timeLeft > 2 ) retry = 1; @@ -1013,8 +1169,8 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp, return retry; } -long cm_ConnByMServers(cm_serverRef_t *serversp, cm_user_t *usersp, - cm_req_t *reqp, cm_conn_t **connpp) +long cm_ConnByMServers(cm_serverRef_t *serversp, afs_uint32 replicated, cm_user_t *usersp, + cm_req_t *reqp, cm_conn_t **connpp) { long code; cm_serverRef_t *tsrp; @@ -1070,7 +1226,7 @@ long cm_ConnByMServers(cm_serverRef_t *serversp, cm_user_t *usersp, } else { allOffline = 0; allBusy = 0; - code = cm_ConnByServer(tsp, usersp, connpp); + code = cm_ConnByServer(tsp, usersp, replicated, connpp); if (code == 0) { /* cm_CBS only returns 0 */ cm_PutServer(tsp); #ifdef SET_RX_TIMEOUTS_TO_TIMELEFT @@ -1155,7 +1311,7 @@ void cm_GCConnections(cm_server_t *serverp) } static void cm_NewRXConnection(cm_conn_t *tcp, cm_ucell_t *ucellp, - cm_server_t *serverp) + cm_server_t *serverp, afs_uint32 replicated) { unsigned short port; int serviceID; @@ -1211,7 +1367,12 @@ static void cm_NewRXConnection(cm_conn_t *tcp, cm_ucell_t *ucellp, /* * Setting idle dead timeout to a non-zero value activates RX_CALL_IDLE errors */ - rx_SetConnIdleDeadTime(tcp->rxconnp, IdleDeadtimeout); + if (replicated) { + tcp->flags &= CM_CONN_FLAG_REPLICATION; + rx_SetConnIdleDeadTime(tcp->rxconnp, ReplicaIdleDeadtimeout); + } else { + rx_SetConnIdleDeadTime(tcp->rxconnp, IdleDeadtimeout); + } /* * Let the Rx library know that we can auto-retry if an @@ -1235,7 +1396,7 @@ static void cm_NewRXConnection(cm_conn_t *tcp, cm_ucell_t *ucellp, rxs_Release(secObjp); /* Decrement the initial refCount */ } -long cm_ConnByServer(cm_server_t *serverp, cm_user_t *userp, cm_conn_t **connpp) +long cm_ConnByServer(cm_server_t *serverp, cm_user_t *userp, afs_uint32 replicated, cm_conn_t **connpp) { cm_conn_t *tcp; cm_ucell_t *ucellp; @@ -1248,7 +1409,9 @@ long cm_ConnByServer(cm_server_t *serverp, cm_user_t *userp, cm_conn_t **connpp) lock_ObtainMutex(&userp->mx); lock_ObtainRead(&cm_connLock); for (tcp = serverp->connsp; tcp; tcp=tcp->nextp) { - if (tcp->userp == userp) + if (tcp->userp == userp && + (replicated && (tcp->flags & CM_CONN_FLAG_REPLICATION) || + !replicated && !(tcp->flags & CM_CONN_FLAG_REPLICATION))) break; } @@ -1275,7 +1438,7 @@ long cm_ConnByServer(cm_server_t *serverp, cm_user_t *userp, cm_conn_t **connpp) lock_ObtainMutex(&tcp->mx); tcp->serverp = serverp; tcp->cryptlevel = rxkad_clear; - cm_NewRXConnection(tcp, ucellp, serverp); + cm_NewRXConnection(tcp, ucellp, serverp, replicated); tcp->refCount = 1; lock_ReleaseMutex(&tcp->mx); lock_ReleaseWrite(&cm_connLock); @@ -1296,7 +1459,7 @@ long cm_ConnByServer(cm_server_t *serverp, cm_user_t *userp, cm_conn_t **connpp) tcp->flags &= ~CM_CONN_FLAG_FORCE_NEW; rx_SetConnSecondsUntilNatPing(tcp->rxconnp, 0); rx_DestroyConnection(tcp->rxconnp); - cm_NewRXConnection(tcp, ucellp, serverp); + cm_NewRXConnection(tcp, ucellp, serverp, replicated); } lock_ReleaseMutex(&tcp->mx); } @@ -1317,10 +1480,11 @@ long cm_ServerAvailable(struct cm_fid *fidp, struct cm_user *userp) cm_serverRef_t *tsrp; cm_server_t *tsp; int someBusy = 0, someOffline = 0, allOffline = 1, allBusy = 1, allDown = 1; + afs_uint32 replicated; cm_InitReq(&req); - code = cm_GetServerList(fidp, userp, &req, &serverspp); + code = cm_GetServerList(fidp, userp, &req, &replicated, &serverspp); if (code) return 0; @@ -1365,15 +1529,15 @@ long cm_ConnFromFID(struct cm_fid *fidp, struct cm_user *userp, cm_req_t *reqp, { long code; cm_serverRef_t **serverspp; + afs_uint32 replicated; *connpp = NULL; - code = cm_GetServerList(fidp, userp, reqp, &serverspp); - if (code) { + code = cm_GetServerList(fidp, userp, reqp, &replicated, &serverspp); + if (code) return code; - } - code = cm_ConnByMServers(*serverspp, userp, reqp, connpp); + code = cm_ConnByMServers(*serverspp, replicated, userp, reqp, connpp); cm_FreeServerList(serverspp, 0); return code; } @@ -1384,12 +1548,16 @@ long cm_ConnFromVolume(struct cm_volume *volp, unsigned long volid, struct cm_us { long code; cm_serverRef_t **serverspp; + afs_uint32 replicated; + cm_vol_state_t * volstatep; *connpp = NULL; + volstatep = cm_VolumeStateByID(volp, volid); + replicated = (volstatep->flags & CM_VOL_STATE_FLAG_REPLICATED); serverspp = cm_GetVolServers(volp, volid, userp, reqp); - code = cm_ConnByMServers(*serverspp, userp, reqp, connpp); + code = cm_ConnByMServers(*serverspp, replicated, userp, reqp, connpp); cm_FreeServerList(serverspp, 0); return code; } diff --git a/src/WINNT/afsd/cm_conn.h b/src/WINNT/afsd/cm_conn.h index 044c05405..7b6f00a04 100644 --- a/src/WINNT/afsd/cm_conn.h +++ b/src/WINNT/afsd/cm_conn.h @@ -20,6 +20,9 @@ #ifndef CM_CONN_IDLEDEADTIME #define CM_CONN_IDLEDEADTIME 0 #endif +#ifndef CM_CONN_IDLEDEADTIME_REP +#define CM_CONN_IDLEDEADTIME_REP 0 +#endif #ifndef CM_CONN_NATPINGINTERVAL #define CM_CONN_NATPINGINTERVAL 0 #endif @@ -41,7 +44,8 @@ typedef struct cm_conn { int cryptlevel; /* encrytion status */ } cm_conn_t; -#define CM_CONN_FLAG_FORCE_NEW 1 +#define CM_CONN_FLAG_FORCE_NEW 1 +#define CM_CONN_FLAG_REPLICATION 2 /* * structure used for tracking RPC progress @@ -127,15 +131,16 @@ extern void cm_InitConn(void); extern void cm_InitReq(cm_req_t *reqp); extern int cm_Analyze(cm_conn_t *connp, struct cm_user *up, struct cm_req *reqp, - struct cm_fid *fidp, - struct AFSVolSync *volInfop, - cm_serverRef_t * serversp, - struct cm_callbackRequest *cbrp, long code); + struct cm_fid *fidp, + afs_uint32 storeOp, + struct AFSVolSync *volInfop, + cm_serverRef_t * serversp, + struct cm_callbackRequest *cbrp, long code); -extern long cm_ConnByMServers(struct cm_serverRef *, struct cm_user *, +extern long cm_ConnByMServers(struct cm_serverRef *, afs_uint32, struct cm_user *, cm_req_t *, cm_conn_t **); -extern long cm_ConnByServer(struct cm_server *, struct cm_user *, cm_conn_t **); +extern long cm_ConnByServer(struct cm_server *, struct cm_user *, afs_uint32, cm_conn_t **); extern long cm_ConnFromFID(struct cm_fid *, struct cm_user *, struct cm_req *, cm_conn_t **); diff --git a/src/WINNT/afsd/cm_dcache.c b/src/WINNT/afsd/cm_dcache.c index 78212d050..00f841e58 100644 --- a/src/WINNT/afsd/cm_dcache.c +++ b/src/WINNT/afsd/cm_dcache.c @@ -335,7 +335,7 @@ long cm_BufWrite(void *vscp, osi_hyper_t *offsetp, long length, long flags, /* Prefer StoreData error over rx_EndCall error */ if (code1 != 0) code = code1; - } while (cm_Analyze(connp, userp, reqp, &scp->fid, &volSync, NULL, NULL, code)); + } while (cm_Analyze(connp, userp, reqp, &scp->fid, 1, &volSync, NULL, NULL, code)); code = cm_MapRPCError(code, reqp); @@ -512,7 +512,7 @@ long cm_StoreMini(cm_scache_t *scp, cm_user_t *userp, cm_req_t *reqp) /* prefer StoreData error over rx_EndCall error */ if (code == 0 && code1 != 0) code = code1; - } while (cm_Analyze(connp, userp, reqp, &scp->fid, &volSync, NULL, NULL, code)); + } while (cm_Analyze(connp, userp, reqp, &scp->fid, 1, &volSync, NULL, NULL, code)); code = cm_MapRPCError(code, reqp); /* now, clean up our state */ @@ -2068,7 +2068,7 @@ long cm_GetBuffer(cm_scache_t *scp, cm_buf_t *bufp, int *cpffp, cm_user_t *userp code = code1; osi_Log0(afsd_logp, "CALL FetchData DONE"); - } while (cm_Analyze(connp, userp, reqp, &scp->fid, &volSync, NULL, NULL, code)); + } while (cm_Analyze(connp, userp, reqp, &scp->fid, 0, &volSync, NULL, NULL, code)); fetchingcompleted: code = cm_MapRPCError(code, reqp); @@ -2445,7 +2445,7 @@ long cm_GetData(cm_scache_t *scp, osi_hyper_t *offsetp, char *datap, int data_le code = code1; osi_Log0(afsd_logp, "CALL FetchData DONE"); - } while (cm_Analyze(connp, userp, reqp, &scp->fid, &volSync, NULL, NULL, code)); + } while (cm_Analyze(connp, userp, reqp, &scp->fid, 0, &volSync, NULL, NULL, code)); fetchingcompleted: code = cm_MapRPCError(code, reqp); diff --git a/src/WINNT/afsd/cm_ioctl.c b/src/WINNT/afsd/cm_ioctl.c index 8381de7ef..db32eecad 100644 --- a/src/WINNT/afsd/cm_ioctl.c +++ b/src/WINNT/afsd/cm_ioctl.c @@ -422,7 +422,7 @@ cm_IoctlGetACL(cm_ioctl_t *ioctlp, cm_user_t *userp, cm_scache_t *scp, cm_req_t code = RXAFS_FetchACL(rxconnp, &afid, &acl, &fileStatus, &volSync); rx_PutConnection(rxconnp); - } while (cm_Analyze(connp, userp, reqp, &scp->fid, &volSync, NULL, NULL, code)); + } while (cm_Analyze(connp, userp, reqp, &scp->fid, 0, &volSync, NULL, NULL, code)); code = cm_MapRPCError(code, reqp); if (code) @@ -520,7 +520,7 @@ cm_IoctlSetACL(struct cm_ioctl *ioctlp, struct cm_user *userp, cm_scache_t *scp, code = RXAFS_StoreACL(rxconnp, &fid, &acl, &fileStatus, &volSync); rx_PutConnection(rxconnp); - } while (cm_Analyze(connp, userp, reqp, &scp->fid, &volSync, NULL, NULL, code)); + } while (cm_Analyze(connp, userp, reqp, &scp->fid, 1, &volSync, NULL, NULL, code)); code = cm_MapRPCError(code, reqp); /* invalidate cache info, since we just trashed the ACL cache */ @@ -689,7 +689,7 @@ cm_IoctlSetVolumeStatus(struct cm_ioctl *ioctlp, struct cm_user *userp, cm_scach &storeStat, volName, offLineMsg, motd); rx_PutConnection(rxconnp); - } while (cm_Analyze(tcp, userp, reqp, &scp->fid, NULL, NULL, NULL, code)); + } while (cm_Analyze(tcp, userp, reqp, &scp->fid, 1, NULL, NULL, NULL, code)); code = cm_MapRPCError(code, reqp); } @@ -765,7 +765,7 @@ cm_IoctlGetVolumeStatus(struct cm_ioctl *ioctlp, struct cm_user *userp, cm_scach &volStat, &Name, &OfflineMsg, &MOTD); rx_PutConnection(rxconnp); - } while (cm_Analyze(connp, userp, reqp, &scp->fid, NULL, NULL, NULL, code)); + } while (cm_Analyze(connp, userp, reqp, &scp->fid, 0, NULL, NULL, NULL, code)); code = cm_MapRPCError(code, reqp); } diff --git a/src/WINNT/afsd/cm_server.c b/src/WINNT/afsd/cm_server.c index 45925c7c2..d880b34ab 100644 --- a/src/WINNT/afsd/cm_server.c +++ b/src/WINNT/afsd/cm_server.c @@ -139,7 +139,7 @@ cm_PingServer(cm_server_t *tsp) afs_inet_ntoa_r(tsp->addr.sin_addr.S_un.S_addr, hoststr); lock_ReleaseMutex(&tsp->mx); - code = cm_ConnByServer(tsp, cm_rootUserp, &connp); + code = cm_ConnByServer(tsp, cm_rootUserp, FALSE, &connp); if (code == 0) { /* now call the appropriate ping call. Drop the timeout if * the server is known to be down, so that we don't waste a @@ -415,7 +415,7 @@ static void cm_CheckServersMulti(afs_uint32 flags, cm_cell_t *cellp) lock_ReleaseMutex(&tsp->mx); serversp[nconns] = tsp; - code = cm_ConnByServer(tsp, cm_rootUserp, &conns[nconns]); + code = cm_ConnByServer(tsp, cm_rootUserp, FALSE, &conns[nconns]); if (code) { lock_ObtainRead(&cm_serverLock); cm_PutServerNoLock(tsp); @@ -581,7 +581,7 @@ static void cm_CheckServersMulti(afs_uint32 flags, cm_cell_t *cellp) lock_ReleaseMutex(&tsp->mx); serversp[nconns] = tsp; - code = cm_ConnByServer(tsp, cm_rootUserp, &conns[nconns]); + code = cm_ConnByServer(tsp, cm_rootUserp, FALSE, &conns[nconns]); if (code) { lock_ObtainRead(&cm_serverLock); cm_PutServerNoLock(tsp); diff --git a/src/WINNT/afsd/cm_utils.c b/src/WINNT/afsd/cm_utils.c index a13fc692c..9d8ae78e9 100644 --- a/src/WINNT/afsd/cm_utils.c +++ b/src/WINNT/afsd/cm_utils.c @@ -211,9 +211,10 @@ long cm_MapRPCError(long error, cm_req_t *reqp) if (error == RX_CALL_DEAD || error == RX_CALL_TIMEOUT || error == RX_CALL_BUSY || - error == RX_CALL_IDLE || error == RX_MSGSIZE) error = CM_ERROR_RETRY; + else if (error == RX_CALL_IDLE) + error = EIO; else if (error < 0) error = CM_ERROR_UNKNOWN; else if (error == EINVAL) diff --git a/src/WINNT/afsd/cm_vnodeops.c b/src/WINNT/afsd/cm_vnodeops.c index 0c387d22d..89bbbaed1 100644 --- a/src/WINNT/afsd/cm_vnodeops.c +++ b/src/WINNT/afsd/cm_vnodeops.c @@ -1637,7 +1637,7 @@ long cm_Unlink(cm_scache_t *dscp, fschar_t *fnamep, clientchar_t * cnamep, &newDirStatus, &volSync); rx_PutConnection(rxconnp); - } while (cm_Analyze(connp, userp, reqp, &dscp->fid, &volSync, NULL, NULL, code)); + } while (cm_Analyze(connp, userp, reqp, &dscp->fid, 1, &volSync, NULL, NULL, code)); code = cm_MapRPCError(code, reqp); if (code) @@ -2426,7 +2426,7 @@ cm_TryBulkStatRPC(cm_scache_t *dscp, cm_bulkStat_t *bbp, cm_user_t *userp, cm_re code = (&bbp->stats[0])->errorCode; } } - } while (cm_Analyze(connp, userp, reqp, &tfid, &volSync, NULL, &cbReq, code)); + } while (cm_Analyze(connp, userp, reqp, &tfid, 0, &volSync, NULL, &cbReq, code)); code = cm_MapRPCError(code, reqp); /* @@ -2457,7 +2457,7 @@ cm_TryBulkStatRPC(cm_scache_t *dscp, cm_bulkStat_t *bbp, cm_user_t *userp, cm_re if (inlinebulk && (&bbp->stats[j])->errorCode) { cm_req_t treq = *reqp; - cm_Analyze(NULL, userp, &treq, &tfid, &volSync, NULL, &cbReq, (&bbp->stats[j])->errorCode); + cm_Analyze(NULL, userp, &treq, &tfid, 0, &volSync, NULL, &cbReq, (&bbp->stats[j])->errorCode); } else { code = cm_GetSCache(&tfid, &scp, userp, reqp); if (code != 0) @@ -2746,7 +2746,7 @@ long cm_SetAttr(cm_scache_t *scp, cm_attr_t *attrp, cm_user_t *userp, rx_PutConnection(rxconnp); } while (cm_Analyze(connp, userp, reqp, - &scp->fid, &volSync, NULL, NULL, code)); + &scp->fid, 1, &volSync, NULL, NULL, code)); code = cm_MapRPCError(code, reqp); if (code) @@ -2857,7 +2857,7 @@ long cm_Create(cm_scache_t *dscp, clientchar_t *cnamep, long flags, cm_attr_t *a rx_PutConnection(rxconnp); } while (cm_Analyze(connp, userp, reqp, - &dscp->fid, &volSync, NULL, &cbReq, code)); + &dscp->fid, 1, &volSync, NULL, &cbReq, code)); code = cm_MapRPCError(code, reqp); if (code) @@ -3044,7 +3044,7 @@ long cm_MakeDir(cm_scache_t *dscp, clientchar_t *cnamep, long flags, cm_attr_t * rx_PutConnection(rxconnp); } while (cm_Analyze(connp, userp, reqp, - &dscp->fid, &volSync, NULL, &cbReq, code)); + &dscp->fid, 1, &volSync, NULL, &cbReq, code)); code = cm_MapRPCError(code, reqp); if (code) @@ -3170,8 +3170,7 @@ long cm_Link(cm_scache_t *dscp, clientchar_t *cnamep, cm_scache_t *sscp, long fl rx_PutConnection(rxconnp); osi_Log1(afsd_logp," RXAFS_Link returns 0x%x", code); - } while (cm_Analyze(connp, userp, reqp, - &dscp->fid, &volSync, NULL, NULL, code)); + } while (cm_Analyze(connp, userp, reqp, &dscp->fid, 1, &volSync, NULL, NULL, code)); code = cm_MapRPCError(code, reqp); @@ -3280,7 +3279,7 @@ long cm_SymLink(cm_scache_t *dscp, clientchar_t *cnamep, fschar_t *contentsp, lo rx_PutConnection(rxconnp); } while (cm_Analyze(connp, userp, reqp, - &dscp->fid, &volSync, NULL, NULL, code)); + &dscp->fid, 1, &volSync, NULL, NULL, code)); code = cm_MapRPCError(code, reqp); if (code) @@ -3436,7 +3435,7 @@ long cm_RemoveDir(cm_scache_t *dscp, fschar_t *fnamep, clientchar_t *cnamep, cm_ rx_PutConnection(rxconnp); } while (cm_Analyze(connp, userp, reqp, - &dscp->fid, &volSync, NULL, NULL, code)); + &dscp->fid, 1, &volSync, NULL, NULL, code)); code = cm_MapRPCErrorRmdir(code, reqp); if (code) @@ -3772,7 +3771,7 @@ long cm_Rename(cm_scache_t *oldDscp, fschar_t *oldNamep, clientchar_t *cOldNamep &volSync); rx_PutConnection(rxconnp); - } while (cm_Analyze(connp, userp, reqp, &oldDscp->fid, + } while (cm_Analyze(connp, userp, reqp, &oldDscp->fid, 1, &volSync, NULL, NULL, code)); code = cm_MapRPCError(code, reqp); @@ -4429,7 +4428,7 @@ long cm_IntSetLock(cm_scache_t * scp, cm_user_t * userp, int lockType, &volSync); rx_PutConnection(rxconnp); - } while (cm_Analyze(connp, userp, reqp, &cfid, &volSync, + } while (cm_Analyze(connp, userp, reqp, &cfid, 1, &volSync, NULL, NULL, code)); code = cm_MapRPCError(code, reqp); @@ -4479,7 +4478,7 @@ long cm_IntReleaseLock(cm_scache_t * scp, cm_user_t * userp, code = RXAFS_ReleaseLock(rxconnp, &tfid, &volSync); rx_PutConnection(rxconnp); - } while (cm_Analyze(connp, userp, reqp, &cfid, &volSync, + } while (cm_Analyze(connp, userp, reqp, &cfid, 1, &volSync, NULL, NULL, code)); code = cm_MapRPCError(code, reqp); if (code) @@ -5454,7 +5453,7 @@ void cm_CheckLocks() osi_Log1(afsd_logp, " ExtendLock returns %d", code); } while (cm_Analyze(connp, userp, &req, - &cfid, &volSync, NULL, NULL, + &cfid, 1, &volSync, NULL, NULL, code)); code = cm_MapRPCError(code, &req); diff --git a/src/WINNT/afsd/cm_volume.c b/src/WINNT/afsd/cm_volume.c index 81ad0f7ba..a8f345562 100644 --- a/src/WINNT/afsd/cm_volume.c +++ b/src/WINNT/afsd/cm_volume.c @@ -181,7 +181,7 @@ cm_GetEntryByName( struct cm_cell *cellp, const char *name, osi_LogSaveString(afsd_logp,name)); do { - code = cm_ConnByMServers(cellp->vlServersp, userp, reqp, &connp); + code = cm_ConnByMServers(cellp->vlServersp, FALSE, userp, reqp, &connp); if (code) continue; @@ -198,7 +198,7 @@ cm_GetEntryByName( struct cm_cell *cellp, const char *name, *methodp = 0; } rx_PutConnection(rxconnp); - } while (cm_Analyze(connp, userp, reqp, NULL, NULL, cellp->vlServersp, NULL, code)); + } while (cm_Analyze(connp, userp, reqp, NULL, 0, NULL, cellp->vlServersp, NULL, code)); code = cm_MapVLRPCError(code, reqp); if ( code ) osi_Log3(afsd_logp, "CALL VL_GetEntryByName{UNO} name %s:%s FAILURE, code 0x%x", @@ -254,6 +254,7 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t * #endif afs_uint32 volType; time_t now; + int replicated = 0; lock_AssertWrite(&volp->rw); @@ -385,6 +386,7 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t * case 0: flags = vldbEntry.flags; nServers = vldbEntry.nServers; + replicated = (nServers > 0); rwID = vldbEntry.volumeId[0]; roID = vldbEntry.volumeId[1]; bkID = vldbEntry.volumeId[2]; @@ -398,6 +400,7 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t * case 1: flags = nvldbEntry.flags; nServers = nvldbEntry.nServers; + replicated = (nServers > 0); rwID = nvldbEntry.volumeId[0]; roID = nvldbEntry.volumeId[1]; bkID = nvldbEntry.volumeId[2]; @@ -411,6 +414,7 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t * case 2: flags = uvldbEntry.flags; nServers = uvldbEntry.nServers; + replicated = (nServers > 0); rwID = uvldbEntry.volumeId[0]; roID = uvldbEntry.volumeId[1]; bkID = uvldbEntry.volumeId[2]; @@ -432,14 +436,14 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t * memset(&addrs, 0, sizeof(addrs)); do { - code = cm_ConnByMServers(cellp->vlServersp, userp, reqp, &connp); + code = cm_ConnByMServers(cellp->vlServersp, FALSE, userp, reqp, &connp); if (code) continue; rxconnp = cm_GetRxConn(connp); code = VL_GetAddrsU(rxconnp, &attrs, &uuid, &unique, &nentries, &addrs); rx_PutConnection(rxconnp); - } while (cm_Analyze(connp, userp, reqp, NULL, NULL, cellp->vlServersp, NULL, code)); + } while (cm_Analyze(connp, userp, reqp, NULL, 0, NULL, cellp->vlServersp, NULL, code)); if ( code ) { code = cm_MapVLRPCError(code, reqp); @@ -517,6 +521,10 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t * volp->vol[ROVOL].ID = roID; cm_AddVolumeToIDHashTable(volp, ROVOL); } + if (replicated) + _InterlockedOr(&volp->vol[ROVOL].flags, CM_VOL_STATE_FLAG_REPLICATED); + else + _InterlockedAnd(&volp->vol[ROVOL].flags, ~CM_VOL_STATE_FLAG_REPLICATED); } else { if (volp->vol[ROVOL].qflags & CM_VOLUME_QFLAG_IN_HASH) cm_RemoveVolumeFromIDHashTable(volp, ROVOL); @@ -1283,7 +1291,7 @@ cm_CheckOfflineVolumeState(cm_volume_t *volp, cm_vol_state_t *statep, afs_uint32 code = RXAFS_GetVolumeStatus(rxconnp, statep->ID, &volStat, &Name, &OfflineMsg, &MOTD); rx_PutConnection(rxconnp); - } while (cm_Analyze(connp, cm_rootUserp, &req, &fid, NULL, NULL, NULL, code)); + } while (cm_Analyze(connp, cm_rootUserp, &req, &fid, 0, NULL, NULL, NULL, code)); code = cm_MapRPCError(code, &req); lock_ObtainWrite(&volp->rw); diff --git a/src/WINNT/afsd/cm_volume.h b/src/WINNT/afsd/cm_volume.h index ff0589abf..af4d160a9 100644 --- a/src/WINNT/afsd/cm_volume.h +++ b/src/WINNT/afsd/cm_volume.h @@ -31,6 +31,8 @@ typedef struct cm_vol_state { /* RWVOL, ROVOL, BACKVOL are defined in cm.h */ #define NUM_VOL_TYPES 3 +#define CM_VOL_STATE_FLAG_REPLICATED 1 + typedef struct cm_volume { osi_queue_t q; /* LRU queue; cm_volumeLock */ afs_uint32 qflags; /* by cm_volumeLock */ -- 2.39.5