From bada49cff904c1e783ca9fa81a92036ceae7378d Mon Sep 17 00:00:00 2001 From: Jeffrey Altman Date: Mon, 8 Jun 2009 02:22:09 +0000 Subject: [PATCH] windows-buf-infinite-looping-on-dirty-buffers-20090607 LICENSE MIT If there are dirty buffers when the file server becomes unresponsive, the cache manager will attempt to write them repeatedly resulting in use of 100% of the cpu of the machine until the file server becomes responsive. This patch reduces the cpu utilization by ensuring that only the first in a list of buffers on the same file needs to fail with timed out, all down, all busy, all offline, or clock skew. The other dirty buffers will just be skipped. A small delay is enforced in the buf_IncrSync thread between invocation. This patch is not a complete fix for the problem. The buf_IncrSync thread needs to become more intelligent in order to avoid attempts to write to volumes that are known to be inaccessible. --- src/WINNT/afsd/cm_buf.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/src/WINNT/afsd/cm_buf.c b/src/WINNT/afsd/cm_buf.c index e191bb9a8..245e0947c 100644 --- a/src/WINNT/afsd/cm_buf.c +++ b/src/WINNT/afsd/cm_buf.c @@ -292,7 +292,9 @@ void buf_IncrSyncer(long parm) i = SleepEx(5000, 1); if (i != 0) continue; - } + } else { + Sleep(50); + } wasDirty = buf_Sync(1); } /* whole daemon's while loop */ @@ -721,7 +723,14 @@ afs_uint32 buf_CleanAsyncLocked(cm_buf_t *bp, cm_req_t *reqp, afs_uint32 *pisdir */ if (reqp->flags & CM_REQ_NORETRY) break; - }; + + /* Ditto if the hardDeadTimeout or idleTimeout was reached */ + if (code == CM_ERROR_TIMEDOUT || code == CM_ERROR_ALLDOWN || + code == CM_ERROR_ALLBUSY || code == CM_ERROR_ALLOFFLINE || + code == CM_ERROR_CLOCKSKEW) { + break; + } + } /* if someone was waiting for the I/O that just completed or failed, * wake them up. @@ -1230,7 +1239,7 @@ long buf_CountFreeList(void) } /* clean a buffer synchronously */ -long buf_CleanAsync(cm_buf_t *bp, cm_req_t *reqp, afs_uint32 *pisdirty) +afs_uint32 buf_CleanAsync(cm_buf_t *bp, cm_req_t *reqp, afs_uint32 *pisdirty) { long code; osi_assertx(bp->magic == CM_BUF_MAGIC, "invalid cm_buf_t magic"); @@ -1728,6 +1737,15 @@ long buf_CleanVnode(struct cm_scache *scp, cm_user_t *userp, cm_req_t *reqp) bp->dataVersion = CM_BUF_VERSION_BAD; bp->dirtyCounter++; break; + case CM_ERROR_TIMEDOUT: + case CM_ERROR_ALLDOWN: + case CM_ERROR_ALLBUSY: + case CM_ERROR_ALLOFFLINE: + case CM_ERROR_CLOCKSKEW: + /* do not mark the buffer in error state but do + * not attempt to complete the rest either. + */ + break; default: code = buf_CleanAsyncLocked(bp, reqp, &wasDirty); if (bp->flags & CM_BUF_ERROR) { -- 2.39.5