From a9ea20c2718bfd3837254e0b44301430d7d9e69d Mon Sep 17 00:00:00 2001 From: Jeffrey Altman Date: Wed, 29 Nov 2006 06:23:44 +0000 Subject: [PATCH] DEVEL15-windows-dirty-buffer-optimization-20061128 The old dirty buffer synchronization algorithm had a buf_IncrSyncer thread walking the all buffer list periodically searching for dirty buffers to write to the file server. This had several negative results. The alogirithm ate up ever increasing amounts of CPU time even when AFS is idle as the size of the cache increases. Also, buffers were written to the file server in an order based upon the original buffer allocation which has nothing to do with the order in which the buffers became dirty. The new algorithm maintains a dirty buffer list. Items are added when the buffer is originally marked dirty. A buffer is only removed from the list by the buf_IncrSyncer when the buffer is no longer dirty. If the list is empty the thread goes back to thread immediately without additional processing requirements. (cherry picked from commit d253bde4574e34ee08cf326ec4c481b57c230476) --- src/WINNT/afsd/cm_buf.c | 110 +++++++++++++++++++++++++------------ src/WINNT/afsd/cm_buf.h | 1 + src/WINNT/afsd/cm_memmap.c | 3 + src/WINNT/afsd/cm_memmap.h | 2 + 4 files changed, 80 insertions(+), 36 deletions(-) diff --git a/src/WINNT/afsd/cm_buf.c b/src/WINNT/afsd/cm_buf.c index 1fd19bf35..dba6f2e97 100644 --- a/src/WINNT/afsd/cm_buf.c +++ b/src/WINNT/afsd/cm_buf.c @@ -134,20 +134,14 @@ void buf_Release(cm_buf_t *bp) lock_ReleaseWrite(&buf_globalLock); } -/* incremental sync daemon. Writes 1/10th of all the buffers every 5000 ms */ +/* incremental sync daemon. Writes all dirty buffers every 5000 ms */ void buf_IncrSyncer(long parm) { - cm_buf_t *bp; /* buffer we're hacking on; held */ + cm_buf_t **bpp, *bp; long i; /* counter */ - long wasDirty; + long wasDirty = 0; cm_req_t req; - lock_ObtainWrite(&buf_globalLock); - bp = cm_data.buf_allp; - buf_HoldLocked(bp); - lock_ReleaseWrite(&buf_globalLock); - wasDirty = 0; - while (buf_ShutdownFlag == 0) { if (!wasDirty) { #ifndef DJGPP @@ -158,36 +152,49 @@ void buf_IncrSyncer(long parm) #endif /* DJGPP */ } - if (buf_ShutdownFlag == 1) - return; - wasDirty = 0; /* now go through our percentage of the buffers */ - for (i=0; iallp; - if (!bp) - bp = cm_data.buf_allp; - buf_HoldLocked(bp); - lock_ReleaseWrite(&buf_globalLock); + if (bp->flags & CM_BUF_DIRTY) { + /* start cleaning the buffer; don't touch log pages since + * the log code counts on knowing exactly who is writing + * a log page at any given instant. + */ + cm_InitReq(&req); + req.flags |= CM_REQ_NORETRY; + wasDirty |= buf_CleanAsync(bp, &req); + } + + /* the buffer may or may not have been dirty + * and if dirty may or may not have been cleaned + * successfully. check the dirty flag again. + */ + if (!(bp->flags & CM_BUF_DIRTY)) { + lock_ObtainMutex(&bp->mx); + if (!(bp->flags & CM_BUF_DIRTY)) { + /* remove the buffer from the dirty list */ + lock_ObtainWrite(&buf_globalLock); + *bpp = bp->dirtyp; + bp->dirtyp = NULL; + if (cm_data.buf_dirtyListp == NULL) + cm_data.buf_dirtyListEndp = NULL; + buf_ReleaseLocked(bp); + lock_ReleaseWrite(&buf_globalLock); + } else { + /* advance the pointer so we don't loop forever */ + bpp = &bp->dirtyp; + } + lock_ReleaseMutex(&bp->mx); + } else { + /* advance the pointer so we don't loop forever */ + bpp = &bp->dirtyp; + } } /* for loop over a bunch of buffers */ } /* whole daemon's while loop */ } @@ -635,6 +642,11 @@ long buf_CleanAsyncLocked(cm_buf_t *bp, cm_req_t *reqp) break; }; + if (!(bp->flags & CM_BUF_DIRTY)) { + /* remove buffer from dirty buffer queue */ + + } + /* do logging after call to GetLastError, or else */ /* if someone was waiting for the I/O that just completed or failed, @@ -1163,13 +1175,39 @@ void buf_SetDirty(cm_buf_t *bp) osi_assert(bp->magic == CM_BUF_MAGIC); osi_assert(bp->refCount > 0); - osi_Log1(buf_logp, "buf_SetDirty 0x%p", bp); - + if (bp->flags & CM_BUF_DIRTY) { + osi_Log1(buf_logp, "buf_SetDirty 0x%p already dirty", bp); + } else { + osi_Log1(buf_logp, "buf_SetDirty 0x%p", bp); + } /* set dirty bit */ bp->flags |= CM_BUF_DIRTY; /* and turn off EOF flag, since it has associated data now */ bp->flags &= ~CM_BUF_EOF; + + /* and add to the dirty list. + * we obtain a hold on the buffer for as long as it remains + * in the list. buffers are only removed from the list by + * the buf_IncrSyncer function regardless of when else the + * dirty flag might be cleared. + * + * This should never happen but just in case there is a bug + * elsewhere, never add to the dirty list if the buffer is + * already there. + */ + lock_ObtainWrite(&buf_globalLock); + if (bp->dirtyp == NULL && cm_data.buf_dirtyListEndp != bp) { + buf_HoldLocked(bp); + if (!cm_data.buf_dirtyListp) { + cm_data.buf_dirtyListp = cm_data.buf_dirtyListEndp = bp; + } else { + cm_data.buf_dirtyListEndp->dirtyp = bp; + cm_data.buf_dirtyListEndp = bp; + } + bp->dirtyp = NULL; + } + lock_ReleaseWrite(&buf_globalLock); } /* clean all buffers, reset log pointers and invalidate all buffers. diff --git a/src/WINNT/afsd/cm_buf.h b/src/WINNT/afsd/cm_buf.h index 1c5d8b254..5ae0065d9 100644 --- a/src/WINNT/afsd/cm_buf.h +++ b/src/WINNT/afsd/cm_buf.h @@ -70,6 +70,7 @@ typedef struct cm_buf { * hash function is good and if there are * enough buckets for the size of the cache. */ + struct cm_buf *dirtyp; /* next in the dirty list */ osi_mutex_t mx; /* mutex protecting structure except refcount */ unsigned long refCount; /* reference count (buf_globalLock) */ long idCounter; /* counter for softrefs; bumped at each recycle */ diff --git a/src/WINNT/afsd/cm_memmap.c b/src/WINNT/afsd/cm_memmap.c index fe51f8cac..900c01b40 100644 --- a/src/WINNT/afsd/cm_memmap.c +++ b/src/WINNT/afsd/cm_memmap.c @@ -790,6 +790,7 @@ cm_InitMappedMemory(DWORD virtualCache, char * cachePath, DWORD stats, DWORD chu if ( newFile ) { afsi_log("Building AFS Cache from scratch"); + memset(&cm_data, 0, sizeof(cm_config_data_t)); cm_data.size = sizeof(cm_config_data_t); cm_data.magic = CM_CONFIG_DATA_MAGIC; cm_data.baseAddress = baseAddress; @@ -833,6 +834,8 @@ cm_InitMappedMemory(DWORD virtualCache, char * cachePath, DWORD stats, DWORD chu cm_data.bufDataBaseAddress = (char *) baseAddress; baseAddress += ComputeSizeOfDataBuffers(cacheBlocks, CM_CONFIGDEFAULT_BLOCKSIZE); cm_data.bufEndOfData = (char *) baseAddress; + cm_data.buf_dirtyListp = NULL; + cm_data.buf_dirtyListEndp = NULL; cm_data.fakeDirVersion = 0x8; UuidCreate((UUID *)&cm_data.Uuid); cm_data.volSerialNumber = volumeSerialNumber; diff --git a/src/WINNT/afsd/cm_memmap.h b/src/WINNT/afsd/cm_memmap.h index 687509d37..f2dd2b344 100644 --- a/src/WINNT/afsd/cm_memmap.h +++ b/src/WINNT/afsd/cm_memmap.h @@ -65,6 +65,8 @@ typedef struct cm_config_data { cm_buf_t * buf_freeListp; cm_buf_t * buf_freeListEndp; + cm_buf_t * buf_dirtyListp; + cm_buf_t * buf_dirtyListEndp; cm_buf_t ** buf_hashTablepp; cm_buf_t ** buf_fileHashTablepp; cm_buf_t * buf_allp; -- 2.39.5