From de74227d9c925206cd6d46496ec4682569d3105b Mon Sep 17 00:00:00 2001 From: Marc Dionne Date: Tue, 29 Apr 2014 12:48:03 -0400 Subject: [PATCH] libafs: Speed up afs_CheckTokenCache On systems with a large number of PAGs and files in use, the periodic daemon job that checks for expired credentials and cleans up the axs cache can run for a very long time. This can lead to kernel soft lockups and eventually hang processes and file access because of unavailable locks. Rework the scanning logic in afs_CheckTokenCache to make the scanning more efficient in most real world cases. On a test system accessing ~4000 files from processes in 1000 PAGs, this has been observed to reduce the runtime of afs_CheckTokenCache from a problematic ~70s down to about 0.7s. Additionally, this changes the conditions in which an axscache is discarded. uid+cell (rather than just uid) must now match, and if no matching unixuser is found, it will also be discarded. Adapted from code from Jeffrey Altman who provided the original loop algorithm and code. Change-Id: I65b275b4244b3b6ab65453623bb8729530a9e1a6 Reviewed-on: http://gerrit.openafs.org/11123 Tested-by: BuildBot Reviewed-by: Chas Williams - CONTRACTOR Reviewed-by: D Brashear --- src/afs/afs_user.c | 83 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 64 insertions(+), 19 deletions(-) diff --git a/src/afs/afs_user.c b/src/afs/afs_user.c index 79d9faa37..0b63550c0 100644 --- a/src/afs/afs_user.c +++ b/src/afs/afs_user.c @@ -41,6 +41,7 @@ #include #endif +#include "afs/afs_axscache.h" /* Exported variables */ afs_rwlock_t afs_xuser; @@ -117,6 +118,23 @@ afs_GCUserData(int aforce) } /*afs_GCUserData */ +static struct unixuser * +afs_FindUserNoLock(afs_int32 auid, afs_int32 acell) +{ + struct unixuser *tu; + afs_int32 i; + + AFS_STATCNT(afs_FindUser); + i = UHash(auid); + for (tu = afs_users[i]; tu; tu = tu->next) { + if (tu->uid == auid && ((tu->cell == acell) || (acell == -1))) { + tu->refCount++; + return tu; + } + } + return NULL; + +} #ifndef AFS_PAG_MANAGER /* @@ -130,6 +148,9 @@ afs_CheckTokenCache(void) int i; struct unixuser *tu; afs_int32 now; + struct vcache *tvc; + struct axscache *tofreelist; + int do_scan = 0; AFS_STATCNT(afs_CheckCacheResets); ObtainReadLock(&afs_xvcache); @@ -137,8 +158,6 @@ afs_CheckTokenCache(void) now = osi_Time(); for (i = 0; i < NUSERS; i++) { for (tu = afs_users[i]; tu; tu = tu->next) { - afs_int32 uid; - /* * If tokens are still good and user has Kerberos tickets, * check expiration @@ -152,13 +171,48 @@ afs_CheckTokenCache(void) tu->states |= (UTokensBad | UNeedsReset); } } - if (tu->states & UNeedsReset) { - tu->states &= ~UNeedsReset; - uid = tu->uid; - afs_ResetAccessCache(uid, 0); + if (tu->states & UNeedsReset) + do_scan = 1; + } + } + /* Skip the potentially expensive scan if nothing to do */ + if (!do_scan) + goto done; + + tofreelist = NULL; + for (i = 0; i < VCSIZE; i++) { + for (tvc = afs_vhashT[i]; tvc; tvc = tvc->hnext) { + /* really should do this under cache write lock, but that. + * is hard to under locking hierarchy */ + if (tvc->Access) { + struct axscache **ac, **nac; + + for ( ac = &tvc->Access; *ac;) { + nac = &(*ac)->next; + tu = afs_FindUserNoLock((*ac)->uid, tvc->f.fid.Cell); + if (tu == NULL || (tu->states & UNeedsReset)) { + struct axscache *tmp; + tmp = *ac; + *ac = *nac; + tmp->next = tofreelist; + tofreelist = tmp; + } else + ac = nac; + if (tu != NULL) + tu->refCount--; + } } } } + afs_FreeAllAxs(&tofreelist); + for (i = 0; i < NUSERS; i++) { + for (tu = afs_users[i]; tu; tu = tu->next) { + if (tu->states & UNeedsReset) + tu->states &= ~UNeedsReset; + } + } + +done: ReleaseReadLock(&afs_xuser); ReleaseReadLock(&afs_xvcache); } /*afs_CheckTokenCache */ @@ -228,22 +282,13 @@ struct unixuser * afs_FindUser(afs_int32 auid, afs_int32 acell, afs_int32 locktype) { struct unixuser *tu; - afs_int32 i; - AFS_STATCNT(afs_FindUser); - i = UHash(auid); ObtainWriteLock(&afs_xuser, 99); - for (tu = afs_users[i]; tu; tu = tu->next) { - if (tu->uid == auid && ((tu->cell == acell) || (acell == -1))) { - tu->refCount++; - ReleaseWriteLock(&afs_xuser); - afs_LockUser(tu, locktype, 365); - return tu; - } - } + tu = afs_FindUserNoLock(auid, acell); ReleaseWriteLock(&afs_xuser); - return NULL; - + if (tu) + afs_LockUser(tu, locktype, 365); + return tu; } /*afs_FindUser */ -- 2.39.5