From 18336df014ce708683cb12370c685dda047b48be Mon Sep 17 00:00:00 2001 From: Jeffrey Altman Date: Thu, 5 Oct 2006 21:30:38 +0000 Subject: [PATCH] DEVEL15-windows-misc-fix-20061005 still tracking down issues when large numbers of temporary files are created and deleted in the same directory from multiple machines. VNOVNODE must return CM_ERROR_BADFD ensure that all calls to cm_GetCallback are made from cm_SyncOp. add CM_SCACHESYNC_FORCECB logic to cm_SyncOp so that it can be removed from cm_GetAccess don't mix CM_SCACHESYNC_NEEDCALLBACK with CM_SCACHESYNC_STOREDATA or CM_SCACHESYNC_FETCHDATA (cherry picked from commit 4cd9eee0155edba89b77779af9aea4f1aa55cd96) --- src/WINNT/afsd/cm_access.c | 18 ++++++--- src/WINNT/afsd/cm_callback.c | 52 ++++++++++++++----------- src/WINNT/afsd/cm_conn.c | 8 ++-- src/WINNT/afsd/cm_dcache.c | 74 ++++++++++++++++++------------------ src/WINNT/afsd/cm_scache.c | 23 ++++++----- src/WINNT/afsd/cm_utils.c | 8 +--- src/WINNT/afsd/smb.c | 6 +-- 7 files changed, 99 insertions(+), 90 deletions(-) diff --git a/src/WINNT/afsd/cm_access.c b/src/WINNT/afsd/cm_access.c index d9bdceb88..3073d838b 100644 --- a/src/WINNT/afsd/cm_access.c +++ b/src/WINNT/afsd/cm_access.c @@ -128,9 +128,9 @@ long cm_GetAccessRights(struct cm_scache *scp, struct cm_user *userp, /* first, start by finding out whether we have a directory or something * else, so we can find what object's ACL we need. */ - if (!cm_HaveCallback(scp)) { + if (scp->fileType == CM_SCACHETYPE_DIRECTORY || !cm_HaveCallback(scp)) { code = cm_SyncOp(scp, NULL, userp, reqp, 0, - CM_SCACHESYNC_NEEDCALLBACK | CM_SCACHESYNC_GETSTATUS); + CM_SCACHESYNC_NEEDCALLBACK | CM_SCACHESYNC_GETSTATUS | CM_SCACHESYNC_FORCECB); if (code) return code; @@ -156,16 +156,24 @@ long cm_GetAccessRights(struct cm_scache *scp, struct cm_user *userp, code = cm_SyncOp(aclScp, NULL, userp, reqp, 0, CM_SCACHESYNC_NEEDCALLBACK | CM_SCACHESYNC_GETSTATUS); if (!code) { - code = cm_GetCallback(aclScp, userp, reqp, 1); - cm_SyncOpDone(aclScp, NULL, CM_SCACHESYNC_NEEDCALLBACK | CM_SCACHESYNC_GETSTATUS); +#if 0 + /* cm_GetCallback was called by cm_SyncOp */ + code = cm_GetCallback(aclScp, userp, reqp, 1); +#endif + cm_SyncOpDone(aclScp, NULL, + CM_SCACHESYNC_NEEDCALLBACK | CM_SCACHESYNC_GETSTATUS | CM_SCACHESYNC_FORCECB); } lock_ReleaseMutex(&aclScp->mx); } cm_ReleaseSCache(aclScp); lock_ObtainMutex(&scp->mx); - } else if (!got_cb) { + } +#if 0 + else if (!got_cb) { + /* cm_GetCallback was called by cm_SyncOp */ code = cm_GetCallback(scp, userp, reqp, 1); } +#endif _done: if (got_cb) diff --git a/src/WINNT/afsd/cm_callback.c b/src/WINNT/afsd/cm_callback.c index 3493a9009..e405b0b41 100644 --- a/src/WINNT/afsd/cm_callback.c +++ b/src/WINNT/afsd/cm_callback.c @@ -1618,7 +1618,7 @@ void cm_EndCallbackGrantingCall(cm_scache_t *scp, cm_callbackRequest_t *cbrp, long cm_GetCallback(cm_scache_t *scp, struct cm_user *userp, struct cm_req *reqp, long flags) { - long code; + long code = 0; cm_conn_t *connp = NULL; AFSFetchStatus afsStatus; AFSVolSync volSync; @@ -1628,6 +1628,7 @@ long cm_GetCallback(cm_scache_t *scp, struct cm_user *userp, int mustCall; cm_fid_t sfid; struct rx_connection * callp = NULL; + int syncop_done = 0; osi_Log4(afsd_logp, "GetCallback scp 0x%p cell %d vol %d flags %lX", scp, scp->fid.cell, scp->fid.volume, flags); @@ -1674,23 +1675,20 @@ long cm_GetCallback(cm_scache_t *scp, struct cm_user *userp, mustCall = (flags & 1); cm_AFSFidFromFid(&tfid, &scp->fid); while (1) { - if (!mustCall && cm_HaveCallback(scp)) { - osi_Log3(afsd_logp, "GetCallback Complete scp 0x%p cell %d vol %d", - scp, scp->fid.cell, scp->fid.volume); - return 0; - } + if (!mustCall && cm_HaveCallback(scp)) + break; /* turn off mustCall, since it has now forced us past the check above */ mustCall = 0; - /* 20060929 jaltman - We are being called from within cm_SyncOp. - * if we call cm_SyncOp again and another thread has attempted - * to obtain current status CM_SCACHEFLAG_WAITING will be set - * and we will deadlock. - */ /* otherwise, we have to make an RPC to get the status */ - cm_SyncOp(scp, NULL, userp, reqp, 0, - CM_SCACHESYNC_FETCHSTATUS | CM_SCACHESYNC_GETCALLBACK); + if (!syncop_done) { + code = cm_SyncOp(scp, NULL, userp, reqp, 0, + CM_SCACHESYNC_FETCHSTATUS | CM_SCACHESYNC_GETCALLBACK); + if (code) + break; + syncop_done = 1; + } cm_StartCallbackGrantingCall(scp, &cbr); sfid = scp->fid; lock_ReleaseMutex(&scp->mx); @@ -1725,17 +1723,25 @@ long cm_GetCallback(cm_scache_t *scp, struct cm_user *userp, } else { cm_EndCallbackGrantingCall(NULL, &cbr, NULL, 0); } - /* 20060929 jaltman - don't deadlock */ - cm_SyncOpDone(scp, NULL, CM_SCACHESYNC_FETCHSTATUS | CM_SCACHESYNC_GETCALLBACK); - - /* now check to see if we got an error */ - if (code) { - osi_Log2(afsd_logp, "GetCallback Failed code 0x%x scp 0x%p -->",code, scp); - osi_Log4(afsd_logp, " cell %u vol %u vn %u uniq %u", - scp->fid.cell, scp->fid.volume, scp->fid.vnode, scp->fid.unique); - return code; - } + + /* if we got an error, return to caller */ + if (code) + break; + } + + if (syncop_done) + cm_SyncOpDone(scp, NULL, CM_SCACHESYNC_FETCHSTATUS | CM_SCACHESYNC_GETCALLBACK); + + if (code) { + osi_Log2(afsd_logp, "GetCallback Failed code 0x%x scp 0x%p -->",code, scp); + osi_Log4(afsd_logp, " cell %u vol %u vn %u uniq %u", + scp->fid.cell, scp->fid.volume, scp->fid.vnode, scp->fid.unique); + } else { + osi_Log3(afsd_logp, "GetCallback Complete scp 0x%p cell %d vol %d", + scp, scp->fid.cell, scp->fid.volume); } + + return code; } /* called periodically by cm_daemon to shut down use of expired callbacks */ diff --git a/src/WINNT/afsd/cm_conn.c b/src/WINNT/afsd/cm_conn.c index f47abc197..384ed98f3 100644 --- a/src/WINNT/afsd/cm_conn.c +++ b/src/WINNT/afsd/cm_conn.c @@ -77,23 +77,23 @@ void cm_InitConn(void) code = RegQueryValueEx(parmKey, "ConnDeadTimeout", NULL, NULL, (BYTE *) &dwValue, &dummyLen); if (code == ERROR_SUCCESS) - ConnDeadtimeout = dwValue; + ConnDeadtimeout = (unsigned short)dwValue; dummyLen = sizeof(DWORD); code = RegQueryValueEx(parmKey, "HardDeadTimeout", NULL, NULL, (BYTE *) &dwValue, &dummyLen); if (code == ERROR_SUCCESS) - HardDeadtimeout = dwValue; + HardDeadtimeout = (unsigned short)dwValue; afsi_log("HardDeadTimeout is %d", HardDeadtimeout); RegCloseKey(parmKey); } afsi_log("lanmanworkstation : SessTimeout %d", RDRtimeout); if (ConnDeadtimeout == 0) - ConnDeadtimeout = RDRtimeout / 2; + ConnDeadtimeout = (unsigned short) (RDRtimeout / 2); afsi_log("ConnDeadTimeout is %d", ConnDeadtimeout); if (HardDeadtimeout == 0) - HardDeadtimeout = RDRtimeout; + HardDeadtimeout = (unsigned short) RDRtimeout; afsi_log("HardDeadTimeout is %d", HardDeadtimeout); osi_EndOnce(&once); diff --git a/src/WINNT/afsd/cm_dcache.c b/src/WINNT/afsd/cm_dcache.c index ba9e893ab..d3c7ce7e2 100644 --- a/src/WINNT/afsd/cm_dcache.c +++ b/src/WINNT/afsd/cm_dcache.c @@ -770,8 +770,7 @@ long cm_SetupStoreBIOD(cm_scache_t *scp, osi_hyper_t *inOffsetp, long inSize, lock_ObtainMutex(&bufp->mx); lock_ObtainMutex(&scp->mx); - flags = CM_SCACHESYNC_NEEDCALLBACK - | CM_SCACHESYNC_GETSTATUS + flags = CM_SCACHESYNC_GETSTATUS | CM_SCACHESYNC_STOREDATA | CM_SCACHESYNC_BUFLOCKED; code = cm_SyncOp(scp, bufp, userp, reqp, 0, flags); @@ -829,8 +828,7 @@ long cm_SetupStoreBIOD(cm_scache_t *scp, osi_hyper_t *inOffsetp, long inSize, thyper.HighPart = 0; scanEnd = LargeIntegerAdd(scanStart, thyper); - flags = CM_SCACHESYNC_NEEDCALLBACK - | CM_SCACHESYNC_GETSTATUS + flags = CM_SCACHESYNC_GETSTATUS | CM_SCACHESYNC_STOREDATA | CM_SCACHESYNC_BUFLOCKED | CM_SCACHESYNC_NOWAIT; @@ -1113,8 +1111,7 @@ long cm_SetupFetchBIOD(cm_scache_t *scp, osi_hyper_t *offsetp, break; } - flags = CM_SCACHESYNC_NEEDCALLBACK | CM_SCACHESYNC_FETCHDATA - | CM_SCACHESYNC_BUFLOCKED; + flags = CM_SCACHESYNC_FETCHDATA | CM_SCACHESYNC_BUFLOCKED; if (!isFirst) flags |= CM_SCACHESYNC_NOWAIT; @@ -1204,42 +1201,47 @@ void cm_ReleaseBIOD(cm_bulkIO_t *biop, int isStore) if (biop->reserved) buf_UnreserveBuffers(cm_chunkSize / cm_data.buf_blockSize); - flags = CM_SCACHESYNC_NEEDCALLBACK; if (isStore) - flags |= CM_SCACHESYNC_STOREDATA; + flags = CM_SCACHESYNC_STOREDATA; else - flags |= CM_SCACHESYNC_FETCHDATA; + flags = CM_SCACHESYNC_FETCHDATA; scp = biop->scp; - for(qdp = biop->bufListp; qdp; qdp = nqdp) { - /* lookup next guy first, since we're going to free this one */ - nqdp = (osi_queueData_t *) osi_QNext(&qdp->q); + if (biop->bufListp) { + for(qdp = biop->bufListp; qdp; qdp = nqdp) { + /* lookup next guy first, since we're going to free this one */ + nqdp = (osi_queueData_t *) osi_QNext(&qdp->q); - /* extract buffer and free queue data */ - bufp = osi_GetQData(qdp); - osi_QRemoveHT((osi_queue_t **) &biop->bufListp, - (osi_queue_t **) &biop->bufListEndp, - &qdp->q); - osi_QDFree(qdp); - - /* now, mark I/O as done, unlock the buffer and release it */ - lock_ObtainMutex(&bufp->mx); - lock_ObtainMutex(&scp->mx); - cm_SyncOpDone(scp, bufp, flags); + /* extract buffer and free queue data */ + bufp = osi_GetQData(qdp); + osi_QRemoveHT((osi_queue_t **) &biop->bufListp, + (osi_queue_t **) &biop->bufListEndp, + &qdp->q); + osi_QDFree(qdp); + + /* now, mark I/O as done, unlock the buffer and release it */ + lock_ObtainMutex(&bufp->mx); + lock_ObtainMutex(&scp->mx); + cm_SyncOpDone(scp, bufp, flags); - /* turn off writing and wakeup users */ - if (isStore) { - if (bufp->flags & CM_BUF_WAITING) { - osi_Log2(afsd_logp, "cm_ReleaseBIOD Waking [scp 0x%p] bp 0x%p", scp, bufp); - osi_Wakeup((LONG_PTR) bufp); - } - bufp->flags &= ~(CM_BUF_WRITING | CM_BUF_DIRTY); - } - - lock_ReleaseMutex(&scp->mx); - lock_ReleaseMutex(&bufp->mx); - buf_Release(bufp); - bufp = NULL; + /* turn off writing and wakeup users */ + if (isStore) { + if (bufp->flags & CM_BUF_WAITING) { + osi_Log2(afsd_logp, "cm_ReleaseBIOD Waking [scp 0x%p] bp 0x%p", scp, bufp); + osi_Wakeup((LONG_PTR) bufp); + } + bufp->flags &= ~(CM_BUF_WRITING | CM_BUF_DIRTY); + } + + lock_ReleaseMutex(&scp->mx); + lock_ReleaseMutex(&bufp->mx); + buf_Release(bufp); + bufp = NULL; + } + } else { + lock_ObtainMutex(&scp->mx); + cm_SyncOpDone(scp, NULL, flags); + lock_ReleaseMutex(&scp->mx); } /* clean things out */ diff --git a/src/WINNT/afsd/cm_scache.c b/src/WINNT/afsd/cm_scache.c index 138f609be..d2bf000b0 100644 --- a/src/WINNT/afsd/cm_scache.c +++ b/src/WINNT/afsd/cm_scache.c @@ -242,7 +242,6 @@ cm_scache_t *cm_GetNewSCache(void) /* There were no deleted scache objects that we could use. Try to find * one that simply hasn't been used in a while. */ - while (1) { for ( scp = cm_data.scacheLRULastp; scp; scp = (cm_scache_t *) osi_QPrev(&scp->q)) @@ -269,17 +268,7 @@ cm_scache_t *cm_GetNewSCache(void) } osi_Log1(afsd_logp, "GetNewSCache all scache entries in use (retry = %d)", retry); - /* If get here it means that every scache is either in use or has dirty buffers. - * We used to panic. Now we will give up our lock and wait. - */ - if (++retry < 10) { - lock_ReleaseWrite(&cm_scacheLock); - Sleep(1000); - lock_ObtainWrite(&cm_scacheLock); - } else { return NULL; - } - } /* forever */ } /* if we get here, we should allocate a new scache entry. We either are below @@ -821,6 +810,9 @@ long cm_SyncOp(cm_scache_t *scp, cm_buf_t *bufp, cm_user_t *userp, cm_req_t *req cm_buf_t *tbufp; afs_uint32 outRights; int bufLocked; + afs_uint32 sleep_scp_flags = 0; + afs_uint32 sleep_buf_cmflags = 0; + afs_uint32 sleep_scp_bufs = 0; /* lookup this first */ bufLocked = flags & CM_SCACHESYNC_BUFLOCKED; @@ -996,7 +988,7 @@ long cm_SyncOp(cm_scache_t *scp, cm_buf_t *bufp, cm_user_t *userp, cm_req_t *req cm_fakeDirCallback < 2) #endif /* AFS_FREELANCE_CLIENT */ ) { - if (!cm_HaveCallback(scp)) { + if ((flags & CM_SCACHESYNC_FORCECB) || !cm_HaveCallback(scp)) { osi_Log1(afsd_logp, "CM SyncOp getting callback on scp 0x%p", scp); if (bufLocked) @@ -1009,6 +1001,7 @@ long cm_SyncOp(cm_scache_t *scp, cm_buf_t *bufp, cm_user_t *userp, cm_req_t *req } if (code) return code; + flags &= ~CM_SCACHESYNC_FORCECB; /* only force once */ continue; } } @@ -1049,6 +1042,10 @@ long cm_SyncOp(cm_scache_t *scp, cm_buf_t *bufp, cm_user_t *userp, cm_req_t *req if (flags & CM_SCACHESYNC_NOWAIT) return CM_ERROR_WOULDBLOCK; + sleep_scp_flags = scp->flags; /* so we know why we slept */ + sleep_buf_cmflags = bufp ? bufp->cmFlags : 0; + sleep_scp_bufs = (scp->bufReadsp ? 1 : 0) | (scp->bufWritesp ? 2 : 0); + /* wait here, then try again */ osi_Log1(afsd_logp, "CM SyncOp sleeping scp 0x%p", scp); if ( scp->flags & CM_SCACHEFLAG_WAITING ) { @@ -1146,6 +1143,8 @@ void cm_SyncOpDone(cm_scache_t *scp, cm_buf_t *bufp, afs_uint32 flags) osi_queueData_t *qdp; cm_buf_t *tbufp; + lock_AssertMutex(&scp->mx); + /* now, update the recorded state for RPC-type calls */ if (flags & CM_SCACHESYNC_FETCHSTATUS) scp->flags &= ~CM_SCACHEFLAG_FETCHING; diff --git a/src/WINNT/afsd/cm_utils.c b/src/WINNT/afsd/cm_utils.c index d764a6570..62a2b0a98 100644 --- a/src/WINNT/afsd/cm_utils.c +++ b/src/WINNT/afsd/cm_utils.c @@ -261,13 +261,9 @@ long cm_MapRPCError(long error, cm_req_t *reqp) || error == 122 /* EDQUOT on Linux */ || error == 1133) /* EDQUOT on Irix */ error = CM_ERROR_QUOTA; - else if (error == VNOVNODE) { -#ifdef COMMENT + else if (error == VNOVNODE) error = CM_ERROR_BADFD; -#else - error = CM_ERROR_RETRY; -#endif - } else if (error == 21) + else if (error == 21) return CM_ERROR_ISDIR; return error; } diff --git a/src/WINNT/afsd/smb.c b/src/WINNT/afsd/smb.c index 55fb583c9..4fa6b8991 100644 --- a/src/WINNT/afsd/smb.c +++ b/src/WINNT/afsd/smb.c @@ -894,6 +894,7 @@ smb_vc_t *smb_FindVC(unsigned short lsn, int flags, int lana) { smb_vc_t *vcp; + lock_ObtainWrite(&smb_globalLock); /* for numVCs */ lock_ObtainWrite(&smb_rctLock); for (vcp = smb_allVCsp; vcp; vcp=vcp->nextp) { if (vcp->magic != SMB_VC_MAGIC) @@ -909,9 +910,7 @@ smb_vc_t *smb_FindVC(unsigned short lsn, int flags, int lana) if (!vcp && (flags & SMB_FLAG_CREATE)) { vcp = malloc(sizeof(*vcp)); memset(vcp, 0, sizeof(*vcp)); - lock_ObtainWrite(&smb_globalLock); vcp->vcID = ++numVCs; - lock_ReleaseWrite(&smb_globalLock); vcp->magic = SMB_VC_MAGIC; vcp->refCount = 2; /* smb_allVCsp and caller */ vcp->tidCounter = 1; @@ -954,13 +953,12 @@ smb_vc_t *smb_FindVC(unsigned short lsn, int flags, int lana) memset(vcp->encKey, 0, MSV1_0_CHALLENGE_LENGTH); if (numVCs >= CM_SESSION_RESERVED) { - lock_ObtainWrite(&smb_globalLock); numVCs = 0; - lock_ReleaseWrite(&smb_globalLock); osi_Log0(smb_logp, "WARNING: numVCs wrapping around"); } } lock_ReleaseWrite(&smb_rctLock); + lock_ReleaseWrite(&smb_globalLock); return vcp; } -- 2.39.5