From 66af1a9d0243299069d3245417ecce2d0a6a8b37 Mon Sep 17 00:00:00 2001 From: Matt Benjamin Date: Thu, 25 Sep 2008 12:42:43 +0000 Subject: [PATCH] DEVEL15-bypasscache-20080922 LICENSE IPL10 add cache bypass support (currently linux only) (cherry picked from commit 9d63160cccf88d554e47e4bed8e3bbe26f14f8c2) --- acinclude.m4 | 9 + src/afs/LINUX/osi_alloc.c | 1 + src/afs/LINUX/osi_module.c | 7 +- src/afs/LINUX/osi_vnodeops.c | 461 ++++++++++++++++++----- src/afs/afs.h | 31 ++ src/afs/afs_bypasscache.c | 649 +++++++++++++++++++++++++++++++++ src/afs/afs_bypasscache.h | 153 ++++++++ src/afs/afs_daemons.c | 29 +- src/afs/afs_dcache.c | 7 +- src/afs/afs_osi_vm.c | 10 + src/afs/afs_pioctl.c | 57 ++- src/afs/afs_stats.h | 4 + src/afs/afs_vcache.c | 6 + src/afsd/afsd.c | 7 +- src/config/venus.h | 5 + src/libafs/Makefile.common.in | 3 + src/libuafs/Makefile.common.in | 10 + src/venus/fs.c | 70 ++++ 18 files changed, 1421 insertions(+), 98 deletions(-) create mode 100644 src/afs/afs_bypasscache.c create mode 100644 src/afs/afs_bypasscache.h diff --git a/acinclude.m4 b/acinclude.m4 index dec9d025e..116d81482 100644 --- a/acinclude.m4 +++ b/acinclude.m4 @@ -51,6 +51,11 @@ AC_ARG_ENABLE([namei-fileserver], fileserver])], , [enable_namei_fileserver="default"]) +AC_ARG_ENABLE([cache-bypass], + [AS_HELP_STRING([--enable-cache-bypass], + [enable client support for cache bypass])], + , + [enable_cache_bypass="no"]) AC_ARG_ENABLE([supergroups], [AS_HELP_STRING([--enable-supergroups], [enable support for nested pts groups])], @@ -1339,6 +1344,10 @@ if test "$enable_largefile_fileserver" = "yes"; then AC_DEFINE(AFS_LARGEFILE_ENV, 1, [define if you want large file fileserver]) fi +if test "$enable_cache_bypass" = "yes"; then + AC_DEFINE(AFS_CACHE_BYPASS, 1, [define to activate cache bypassing Unix client]) +fi + if test "$enable_namei_fileserver" = "yes"; then AC_DEFINE(AFS_NAMEI_ENV, 1, [define if you want to want namei fileserver]) else diff --git a/src/afs/LINUX/osi_alloc.c b/src/afs/LINUX/osi_alloc.c index 95ed5e53c..e31d81ece 100644 --- a/src/afs/LINUX/osi_alloc.c +++ b/src/afs/LINUX/osi_alloc.c @@ -366,6 +366,7 @@ osi_linux_free(void *addr) afs_atomlist_put(al_mem_pool, lmp); /* return osi_linux_mem struct to pool */ afs_linux_cur_allocs--; } else { + BUG(); printf("osi_linux_free: failed to remove chunk from hashtable\n"); } diff --git a/src/afs/LINUX/osi_module.c b/src/afs/LINUX/osi_module.c index 4b87f2c06..3005e2db6 100644 --- a/src/afs/LINUX/osi_module.c +++ b/src/afs/LINUX/osi_module.c @@ -111,7 +111,9 @@ init_module(void) osi_proc_init(); osi_ioctl_init(); #endif - +#if defined(AFS_CACHE_BYPASS) + afs_warn("Cache bypass patched libafs module init.\n"); +#endif return 0; } @@ -123,6 +125,9 @@ void cleanup_module(void) #endif { +#if defined(AFS_CACHE_BYPASS) + afs_warn("Cache bypass patched libafs module cleaning up.\n"); +#endif #ifdef LINUX_KEYRING_SUPPORT osi_keyring_shutdown(); #endif diff --git a/src/afs/LINUX/osi_vnodeops.c b/src/afs/LINUX/osi_vnodeops.c index 13bb371c6..b780f52cf 100644 --- a/src/afs/LINUX/osi_vnodeops.c +++ b/src/afs/LINUX/osi_vnodeops.c @@ -37,6 +37,11 @@ RCSID #endif #if defined(AFS_LINUX26_ENV) #include "h/writeback.h" +#include "h/pagevec.h" +#endif +#if defined(AFS_CACHE_BYPASS) +#include "afs/lock.h" +#include "afs/afs_bypasscache.h" #endif #ifdef pgoff2loff @@ -46,10 +51,29 @@ RCSID #endif #if defined(AFS_LINUX26_ENV) +#define LockPage(pp) lock_page(pp) #define UnlockPage(pp) unlock_page(pp) #endif extern struct vcache *afs_globalVp; +#if defined(AFS_LINUX26_ENV) +/* Some uses of BKL are perhaps not needed for bypass or memcache-- + * why don't we try it out? */ +extern struct afs_cacheOps afs_UfsCacheOps; +#define maybe_lock_kernel() \ + do { \ + if(afs_cacheType == &afs_UfsCacheOps) \ + lock_kernel(); \ + } while(0); + + +#define maybe_unlock_kernel() \ + do { \ + if(afs_cacheType == &afs_UfsCacheOps) \ + unlock_kernel(); \ + } while(0); +#endif /* AFS_CACHE_BYPASS */ + static ssize_t afs_linux_read(struct file *fp, char *buf, size_t count, loff_t * offp) { @@ -57,12 +81,11 @@ afs_linux_read(struct file *fp, char *buf, size_t count, loff_t * offp) struct vcache *vcp = VTOAFS(fp->f_dentry->d_inode); cred_t *credp = crref(); struct vrequest treq; - + afs_size_t isize, offindex; AFS_GLOCK(); afs_Trace4(afs_iclSetp, CM_TRACE_READOP, ICL_TYPE_POINTER, vcp, ICL_TYPE_OFFSET, offp, ICL_TYPE_INT32, count, ICL_TYPE_INT32, 99999); - /* get a validated vcache entry */ code = afs_InitReq(&treq, credp); if (!code) @@ -71,6 +94,13 @@ afs_linux_read(struct file *fp, char *buf, size_t count, loff_t * offp) if (code) code = -code; else { + isize = (i_size_read(fp->f_mapping->host) - 1) >> PAGE_CACHE_SHIFT; + offindex = *offp >> PAGE_CACHE_SHIFT; + if(offindex > isize) { + code=0; + goto done; + } + osi_FlushPages(vcp, credp); /* ensure stale pages are gone */ AFS_GUNLOCK(); #ifdef DO_SYNC_READ @@ -84,7 +114,7 @@ afs_linux_read(struct file *fp, char *buf, size_t count, loff_t * offp) afs_Trace4(afs_iclSetp, CM_TRACE_READOP, ICL_TYPE_POINTER, vcp, ICL_TYPE_OFFSET, offp, ICL_TYPE_INT32, count, ICL_TYPE_INT32, code); - +done: AFS_GUNLOCK(); crfree(credp); return code; @@ -167,7 +197,7 @@ afs_linux_readdir(struct file *fp, void *dirbuf, filldir_t filldir) struct afs_fakestat_state fakestat; #if defined(AFS_LINUX26_ENV) - lock_kernel(); + maybe_lock_kernel(); #endif AFS_GLOCK(); AFS_STATCNT(afs_readdir); @@ -325,7 +355,7 @@ out: out1: AFS_GUNLOCK(); #if defined(AFS_LINUX26_ENV) - unlock_kernel(); + maybe_unlock_kernel(); #endif return code; } @@ -399,13 +429,13 @@ afs_linux_open(struct inode *ip, struct file *fp) int code; #ifdef AFS_LINUX24_ENV - lock_kernel(); + maybe_lock_kernel(); #endif AFS_GLOCK(); code = afs_open(&vcp, fp->f_flags, credp); AFS_GUNLOCK(); #ifdef AFS_LINUX24_ENV - unlock_kernel(); + maybe_unlock_kernel(); #endif crfree(credp); @@ -420,13 +450,13 @@ afs_linux_release(struct inode *ip, struct file *fp) int code = 0; #ifdef AFS_LINUX24_ENV - lock_kernel(); + maybe_lock_kernel(); #endif AFS_GLOCK(); code = afs_close(vcp, fp->f_flags, credp); AFS_GUNLOCK(); #ifdef AFS_LINUX24_ENV - unlock_kernel(); + maybe_unlock_kernel(); #endif crfree(credp); @@ -445,13 +475,13 @@ afs_linux_fsync(struct file *fp, struct dentry *dp) cred_t *credp = crref(); #ifdef AFS_LINUX24_ENV - lock_kernel(); + maybe_lock_kernel(); #endif AFS_GLOCK(); code = afs_fsync(VTOAFS(ip), credp); AFS_GUNLOCK(); #ifdef AFS_LINUX24_ENV - unlock_kernel(); + maybe_unlock_kernel(); #endif crfree(credp); return -code; @@ -585,6 +615,9 @@ afs_linux_flush(struct file *fp) struct vcache *vcp; cred_t *credp; int code; +#if defined(AFS_CACHE_BYPASS) + int bypasscache; +#endif AFS_GLOCK(); @@ -601,6 +634,21 @@ afs_linux_flush(struct file *fp) code = afs_InitReq(&treq, credp); if (code) goto out; +#if defined(AFS_CACHE_BYPASS) + /* If caching is bypassed for this file, or globally, just return 0 */ + if(cache_bypass_strategy == ALWAYS_BYPASS_CACHE) + bypasscache = 1; + else { + ObtainReadLock(&vcp->lock); + if(vcp->cachingStates & FCSBypass) + bypasscache = 1; + ReleaseReadLock(&vcp->lock); + } + if(bypasscache) { + /* future proof: don't rely on 0 return from afs_InitReq */ + code = 0; goto out; + } +#endif ObtainSharedLock(&vcp->lock, 535); if ((vcp->execsOrWriters > 0) && (file_count(fp) == 1)) { @@ -761,7 +809,7 @@ afs_linux_revalidate(struct dentry *dp) int code; #ifdef AFS_LINUX24_ENV - lock_kernel(); + maybe_lock_kernel(); #endif AFS_GLOCK(); @@ -787,7 +835,7 @@ afs_linux_revalidate(struct dentry *dp) AFS_GUNLOCK(); #ifdef AFS_LINUX24_ENV - unlock_kernel(); + maybe_unlock_kernel(); #endif crfree(credp); @@ -830,7 +878,7 @@ afs_linux_dentry_revalidate(struct dentry *dp) int valid; #ifdef AFS_LINUX24_ENV - lock_kernel(); + maybe_lock_kernel(); #endif AFS_GLOCK(); @@ -918,7 +966,7 @@ afs_linux_dentry_revalidate(struct dentry *dp) d_drop(dp); } #ifdef AFS_LINUX24_ENV - unlock_kernel(); + maybe_unlock_kernel(); #endif return valid; @@ -994,7 +1042,7 @@ afs_linux_create(struct inode *dip, struct dentry *dp, int mode) vattr.va_type = mode & S_IFMT; #if defined(AFS_LINUX26_ENV) - lock_kernel(); + maybe_lock_kernel(); #endif AFS_GLOCK(); code = afs_create(VTOAFS(dip), (char *)name, &vattr, NONEXCL, mode, @@ -1013,7 +1061,7 @@ afs_linux_create(struct inode *dip, struct dentry *dp, int mode) AFS_GUNLOCK(); #if defined(AFS_LINUX26_ENV) - unlock_kernel(); + maybe_unlock_kernel(); #endif crfree(credp); return -code; @@ -1043,7 +1091,7 @@ afs_linux_lookup(struct inode *dip, struct dentry *dp) int code; #if defined(AFS_LINUX26_ENV) - lock_kernel(); + maybe_lock_kernel(); #endif AFS_GLOCK(); code = afs_lookup(VTOAFS(dip), comp, &vcp, credp); @@ -1101,7 +1149,7 @@ afs_linux_lookup(struct inode *dip, struct dentry *dp) #endif #if defined(AFS_LINUX26_ENV) - unlock_kernel(); + maybe_unlock_kernel(); #endif crfree(credp); @@ -1155,7 +1203,7 @@ afs_linux_unlink(struct inode *dip, struct dentry *dp) struct vcache *tvc = VTOAFS(dp->d_inode); #if defined(AFS_LINUX26_ENV) - lock_kernel(); + maybe_lock_kernel(); #endif if (VREFCOUNT(tvc) > 1 && tvc->opens > 0 && !(tvc->states & CUnlinked)) { @@ -1220,7 +1268,7 @@ afs_linux_unlink(struct inode *dip, struct dentry *dp) d_drop(dp); out: #if defined(AFS_LINUX26_ENV) - unlock_kernel(); + maybe_unlock_kernel(); #endif crfree(credp); return -code; @@ -1258,7 +1306,7 @@ afs_linux_mkdir(struct inode *dip, struct dentry *dp, int mode) const char *name = dp->d_name.name; #if defined(AFS_LINUX26_ENV) - lock_kernel(); + maybe_lock_kernel(); #endif VATTR_NULL(&vattr); vattr.va_mask = ATTR_MODE; @@ -1279,7 +1327,7 @@ afs_linux_mkdir(struct inode *dip, struct dentry *dp, int mode) AFS_GUNLOCK(); #if defined(AFS_LINUX26_ENV) - unlock_kernel(); + maybe_unlock_kernel(); #endif crfree(credp); return -code; @@ -1327,7 +1375,7 @@ afs_linux_rename(struct inode *oldip, struct dentry *olddp, #if defined(AFS_LINUX26_ENV) /* Prevent any new references during rename operation. */ - lock_kernel(); + maybe_lock_kernel(); if (!d_unhashed(newdp)) { d_drop(newdp); @@ -1356,7 +1404,7 @@ afs_linux_rename(struct inode *oldip, struct dentry *olddp, d_rehash(rehash); #if defined(AFS_LINUX26_ENV) - unlock_kernel(); + maybe_unlock_kernel(); #endif crfree(credp); @@ -1472,94 +1520,320 @@ afs_linux_follow_link(struct dentry *dp, struct dentry *basep, #endif /* AFS_LINUX24_ENV */ #endif /* USABLE_KERNEL_PAGE_SYMLINK_CACHE */ +#if defined(AFS_CACHE_BYPASS) + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) + +/* The kernel calls readpages before trying readpage, with a list of + * pages. The readahead algorithm expands num_pages when it thinks + * the application will benefit. Unlike readpage, the pages are not + * necessarily allocated. If we do not a) allocate required pages and + * b) remove them from page_list, linux will re-enter at afs_linux_readpage + * for each required page (and the page will be pre-allocated) */ + +static int +afs_linux_readpages(struct file *fp, struct address_space *mapping, + struct list_head *page_list, unsigned num_pages) +{ + afs_int32 page_ix; + uio_t *auio; + afs_offs_t offset; + struct iovec* iovecp; + struct nocache_read_request *ancr; + struct page *pp, *ppt; + struct pagevec lrupv; + afs_int32 code = 0; + + cred_t *credp; + struct inode *ip = FILE_INODE(fp); + struct vcache *avc = VTOAFS(ip); + afs_int32 bypasscache = 0; /* bypass for this read */ + afs_int32 base_index = 0; + afs_int32 page_count = 0; + afs_int32 isize; + + credp = crref(); + + switch(cache_bypass_strategy) { + case NEVER_BYPASS_CACHE: + break; + case ALWAYS_BYPASS_CACHE: + bypasscache = 1; + break; + case LARGE_FILES_BYPASS_CACHE: + if(i_size_read(ip) > cache_bypass_threshold) { + bypasscache = 1; + } + break; + default: + break; + } + + /* In the new incarnation of selective caching, a file's caching policy + * can change, eg because file size exceeds threshold, etc. */ + trydo_cache_transition(avc, credp, bypasscache); + + if(!bypasscache) { + while(!list_empty(page_list)) { + pp = list_entry(page_list->prev, struct page, lru); + list_del(&pp->lru); + } + goto out; + } + /* background thread must free: iovecp, auio, ancr */ + iovecp = osi_Alloc(num_pages * sizeof(struct iovec)); + + auio = osi_Alloc(sizeof(uio_t)); + auio->uio_iov = iovecp; + auio->uio_iovcnt = num_pages; + auio->uio_flag = UIO_READ; + auio->uio_seg = AFS_UIOSYS; + auio->uio_resid = num_pages * PAGE_SIZE; + + ancr = osi_Alloc(sizeof(struct nocache_read_request)); + ancr->auio = auio; + ancr->offset = auio->uio_offset; + ancr->length = auio->uio_resid; + + pagevec_init(&lrupv, 0); + + for(page_ix = 0; page_ix < num_pages; ++page_ix) { + + if(list_empty(page_list)) + break; + + pp = list_entry(page_list->prev, struct page, lru); + /* If we allocate a page and don't remove it from page_list, + * the page cache gets upset. */ + list_del(&pp->lru); + isize = (i_size_read(fp->f_mapping->host) - 1) >> PAGE_CACHE_SHIFT; + if(pp->index > isize) { + if(PageLocked(pp)) + UnlockPage(pp); + continue; + } + + if(page_ix == 0) { + offset = ((loff_t) pp->index) << PAGE_CACHE_SHIFT; + auio->uio_offset = offset; + base_index = pp->index; + } + iovecp[page_ix].iov_len = PAGE_SIZE; + code = add_to_page_cache(pp, mapping, pp->index, GFP_KERNEL); + if(base_index != pp->index) { + if(PageLocked(pp)) + UnlockPage(pp); + page_cache_release(pp); + iovecp[page_ix].iov_base = (void *) 0; + base_index++; + continue; + } + base_index++; + if(code) { + if(PageLocked(pp)) + UnlockPage(pp); + page_cache_release(pp); + iovecp[page_ix].iov_base = (void *) 0; + } else { + page_count++; + if(!PageLocked(pp)) { + LockPage(pp); + } + + /* save the page for background map */ + iovecp[page_ix].iov_base = (void*) pp; + + /* and put it on the LRU cache */ + if (!pagevec_add(&lrupv, pp)) + __pagevec_lru_add(&lrupv); + } + } + + /* If there were useful pages in the page list, make sure all pages + * are in the LRU cache, then schedule the read */ + if(page_count) { + pagevec_lru_add(&lrupv); + code = afs_ReadNoCache(avc, ancr, credp); + } else { + /* If there is nothing for the background thread to handle, + * it won't be freeing the things that we never gave it */ + osi_Free(iovecp, num_pages * sizeof(struct iovec)); + osi_Free(auio, sizeof(uio_t)); + osi_Free(ancr, sizeof(struct nocache_read_request)); + } + /* we do not flush, release, or unmap pages--that will be + * done for us by the background thread as each page comes in + * from the fileserver */ + crfree(credp); + +out: + return -code; +} + +#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) */ +#endif /* defined(AFS_CACHE_BYPASS */ + + /* afs_linux_readpage * all reads come through here. A strategy-like read call. */ static int afs_linux_readpage(struct file *fp, struct page *pp) { - int code; - cred_t *credp = crref(); + afs_int32 code; + cred_t *credp = crref(); #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0) - char *address; - afs_offs_t offset = ((loff_t) pp->index) << PAGE_CACHE_SHIFT; + char *address; + afs_offs_t offset = ((loff_t) pp->index) << PAGE_CACHE_SHIFT; #else - ulong address = afs_linux_page_address(pp); - afs_offs_t offset = pageoff(pp); + ulong address = afs_linux_page_address(pp); + afs_offs_t offset = pageoff(pp); #endif - uio_t tuio; - struct iovec iovec; - struct inode *ip = FILE_INODE(fp); - int cnt = page_count(pp); - struct vcache *avc = VTOAFS(ip); - +#if defined(AFS_CACHE_BYPASS) + afs_int32 bypasscache = 0; /* bypass for this read */ + struct nocache_read_request *ancr; +#endif + afs_int32 isize; + uio_t *auio; + struct iovec *iovecp; + struct inode *ip = FILE_INODE(fp); + afs_int32 cnt = page_count(pp); + struct vcache *avc = VTOAFS(ip); #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0) - address = kmap(pp); - ClearPageError(pp); + address = kmap(pp); + ClearPageError(pp); #else - atomic_add(1, &pp->count); - set_bit(PG_locked, &pp->flags); /* other bits? See mm.h */ - clear_bit(PG_error, &pp->flags); -#endif - - setup_uio(&tuio, &iovec, (char *)address, offset, PAGE_SIZE, UIO_READ, - AFS_UIOSYS); + atomic_add(1, &pp->count); + set_bit(PG_locked, &pp->flags); /* other bits? See mm.h */ + clear_bit(PG_error, &pp->flags); +#endif + /* If the page is past the end of the file, skip it */ + isize = (i_size_read(fp->f_mapping->host) - 1) >> PAGE_CACHE_SHIFT; + if(pp->index > isize) { + if(PageLocked(pp)) + UnlockPage(pp); + goto done; + } + /* if bypasscache, receiver frees, else we do */ + auio = osi_Alloc(sizeof(uio_t)); + iovecp = osi_Alloc(sizeof(struct iovec)); + + setup_uio(auio, iovecp, (char *)address, offset, PAGE_SIZE, UIO_READ, + AFS_UIOSYS); + +#if defined(AFS_CACHE_BYPASS) + + switch(cache_bypass_strategy) { + case NEVER_BYPASS_CACHE: + break; + case ALWAYS_BYPASS_CACHE: + bypasscache = 1; + break; + case LARGE_FILES_BYPASS_CACHE: + if(i_size_read(ip) > cache_bypass_threshold) { + bypasscache = 1; + } + break; + default: + break; + } + + /* In the new incarnation of selective caching, a file's caching policy + * can change, eg because file size exceeds threshold, etc. */ + trydo_cache_transition(avc, credp, bypasscache); + + if(bypasscache) { + if(address) + kunmap(pp); + /* save the page for background map */ + auio->uio_iov->iov_base = (void*) pp; + /* the background thread will free this */ + ancr = osi_Alloc(sizeof(struct nocache_read_request)); + ancr->auio = auio; + ancr->offset = offset; + ancr->length = PAGE_SIZE; + + maybe_lock_kernel(); + code = afs_ReadNoCache(avc, ancr, credp); + maybe_unlock_kernel(); + + goto done; /* skips release page, doing it in bg thread */ + } +#endif + #ifdef AFS_LINUX24_ENV - lock_kernel(); + maybe_lock_kernel(); #endif - AFS_GLOCK(); - afs_Trace4(afs_iclSetp, CM_TRACE_READPAGE, ICL_TYPE_POINTER, ip, ICL_TYPE_POINTER, pp, ICL_TYPE_INT32, cnt, ICL_TYPE_INT32, 99999); /* not a possible code value */ - code = afs_rdwr(avc, &tuio, UIO_READ, 0, credp); - afs_Trace4(afs_iclSetp, CM_TRACE_READPAGE, ICL_TYPE_POINTER, ip, - ICL_TYPE_POINTER, pp, ICL_TYPE_INT32, cnt, ICL_TYPE_INT32, - code); - AFS_GUNLOCK(); + AFS_GLOCK(); + afs_Trace4(afs_iclSetp, CM_TRACE_READPAGE, ICL_TYPE_POINTER, ip, ICL_TYPE_POINTER, pp, ICL_TYPE_INT32, cnt, ICL_TYPE_INT32, 99999); /* not a possible code value */ + + code = afs_rdwr(avc, auio, UIO_READ, 0, credp); + + afs_Trace4(afs_iclSetp, CM_TRACE_READPAGE, ICL_TYPE_POINTER, ip, + ICL_TYPE_POINTER, pp, ICL_TYPE_INT32, cnt, ICL_TYPE_INT32, + code); + AFS_GUNLOCK(); #ifdef AFS_LINUX24_ENV - unlock_kernel(); + maybe_unlock_kernel(); #endif + if (!code) { + /* XXX valid for no-cache also? Check last bits of files... :) + * Cognate code goes in afs_NoCacheFetchProc. */ + if (auio->uio_resid) /* zero remainder of page */ + memset((void *)(address + (PAGE_SIZE - auio->uio_resid)), 0, + auio->uio_resid); - if (!code) { - if (tuio.uio_resid) /* zero remainder of page */ - memset((void *)(address + (PAGE_SIZE - tuio.uio_resid)), 0, - tuio.uio_resid); #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0) - flush_dcache_page(pp); - SetPageUptodate(pp); + flush_dcache_page(pp); + SetPageUptodate(pp); #else - set_bit(PG_uptodate, &pp->flags); + set_bit(PG_uptodate, &pp->flags); #endif - } + } /* !code */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0) - kunmap(pp); - UnlockPage(pp); + kunmap(pp); + UnlockPage(pp); #else - clear_bit(PG_locked, &pp->flags); - wake_up(&pp->wait); - free_page(address); + clear_bit(PG_locked, &pp->flags); + wake_up(&pp->wait); + free_page(address); #endif - if (!code && AFS_CHUNKOFFSET(offset) == 0) { - struct dcache *tdc; - struct vrequest treq; +#if defined(AFS_CACHE_BYPASS) - AFS_GLOCK(); - code = afs_InitReq(&treq, credp); - if (!code && !NBObtainWriteLock(&avc->lock, 534)) { - tdc = afs_FindDCache(avc, offset); - if (tdc) { - if (!(tdc->mflags & DFNextStarted)) - afs_PrefetchChunk(avc, tdc, credp, &treq); - afs_PutDCache(tdc); - } - ReleaseWriteLock(&avc->lock); - } - AFS_GUNLOCK(); - } +/* do not call afs_GetDCache if cache is bypassed */ + if(bypasscache) + goto done; + +#endif - crfree(credp); - return -code; + /* free if not bypassing cache */ + osi_Free(auio, sizeof(uio_t)); + osi_Free(iovecp, sizeof(struct iovec)); + + if (!code && AFS_CHUNKOFFSET(offset) == 0) { + struct dcache *tdc; + struct vrequest treq; + + AFS_GLOCK(); + code = afs_InitReq(&treq, credp); + if (!code && !NBObtainWriteLock(&avc->lock, 534)) { + tdc = afs_FindDCache(avc, offset); + if (tdc) { + if (!(tdc->mflags & DFNextStarted)) + afs_PrefetchChunk(avc, tdc, credp, &treq); + afs_PutDCache(tdc); + } + ReleaseWriteLock(&avc->lock); + } + AFS_GUNLOCK(); + } + +done: + crfree(credp); + return -code; } @@ -1581,7 +1855,7 @@ afs_linux_writepage_sync(struct inode *ip, struct page *pp, base = (((loff_t) pp->index) << PAGE_CACHE_SHIFT) + offset; credp = crref(); - lock_kernel(); + maybe_lock_kernel(); AFS_GLOCK(); afs_Trace4(afs_iclSetp, CM_TRACE_UPDATEPAGE, ICL_TYPE_POINTER, vcp, ICL_TYPE_POINTER, pp, ICL_TYPE_INT32, page_count(pp), @@ -1609,7 +1883,7 @@ afs_linux_writepage_sync(struct inode *ip, struct page *pp, ICL_TYPE_INT32, code); AFS_GUNLOCK(); - unlock_kernel(); + maybe_unlock_kernel(); crfree(credp); kunmap(pp); @@ -1790,7 +2064,7 @@ static struct inode_operations afs_file_iops = { .setattr = afs_notify_change, #else .default_file_ops = &afs_file_fops, - .readpage = afs_linux_readpage, + .readpage = afs_linux_readpage, .revalidate = afs_linux_revalidate, .updatepage = afs_linux_updatepage, #endif @@ -1799,6 +2073,9 @@ static struct inode_operations afs_file_iops = { #if defined(AFS_LINUX24_ENV) static struct address_space_operations afs_file_aops = { .readpage = afs_linux_readpage, +#if defined(AFS_CACHE_BYPASS) && LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) + .readpages = afs_linux_readpages, +#endif .writepage = afs_linux_writepage, .commit_write = afs_linux_commit_write, .prepare_write = afs_linux_prepare_write, @@ -1843,7 +2120,7 @@ afs_symlink_filler(struct file *file, struct page *page) char *p = (char *)kmap(page); int code; - lock_kernel(); + maybe_lock_kernel(); AFS_GLOCK(); code = afs_linux_ireadlink(ip, p, PAGE_SIZE, AFS_UIOSYS); AFS_GUNLOCK(); @@ -1851,7 +2128,7 @@ afs_symlink_filler(struct file *file, struct page *page) if (code < 0) goto fail; p[code] = '\0'; /* null terminate? */ - unlock_kernel(); + maybe_unlock_kernel(); SetPageUptodate(page); kunmap(page); @@ -1859,7 +2136,7 @@ afs_symlink_filler(struct file *file, struct page *page) return 0; fail: - unlock_kernel(); + maybe_unlock_kernel(); SetPageError(page); kunmap(page); diff --git a/src/afs/afs.h b/src/afs/afs.h index 0c3b1c0a7..df6da6f74 100644 --- a/src/afs/afs.h +++ b/src/afs/afs.h @@ -121,6 +121,10 @@ struct sysname_info { #define BOP_STORE 2 /* parm1 is chunk to store */ #define BOP_PATH 3 /* parm1 is path, parm2 is chunk to fetch */ +#if defined(AFS_CACHE_BYPASS) +#define BOP_FETCH_NOCACHE 4 /* parms are: vnode ptr, offset, segment ptr, addr, cred ptr */ +#endif + #define B_DONTWAIT 1 /* On failure return; don't wait */ /* protocol is: refCount is incremented by user to take block out of free pool. @@ -591,6 +595,22 @@ struct SimpleLocks { /*... to be continued ... */ #endif +#if defined(AFS_CACHE_BYPASS) +/* vcache (file) cachingStates bits */ +#define FCSDesireBypass 0x1 /* This file should bypass the cache */ +#define FCSBypass 0x2 /* This file is currently NOT being cached */ +#define FCSManuallySet 0x4 /* The bypass flags were set, or reset, manually (via pioctl) + and should not be overridden by the file's name */ + +/* Flag values used by the Transition routines */ +#define TRANSChangeDesiredBit 0x1 /* The Transition routine should set or + * reset the FCSDesireBypass bit */ +#define TRANSVcacheIsLocked 0x2 /* The Transition routine does not need to + * lock vcache (it's already locked) */ +#define TRANSSetManualBit 0x4 /* The Transition routine should set FCSManuallySet so that + * filename checking does not override pioctl requests */ +#endif /* AFS_CACHE_BYPASS */ + #define CPSIZE 2 #if defined(AFS_XBSD_ENV) || defined(AFS_DARWIN_ENV) #define vrefCount v->v_usecount @@ -740,6 +760,17 @@ struct vcache { * this file. */ short flockCount; /* count of flock readers, or -1 if writer */ char mvstat; /* 0->normal, 1->mt pt, 2->root. */ + +#if defined(AFS_CACHE_BYPASS) + char cachingStates; /* Caching policies for this file */ + afs_uint32 cachingTransitions; /* # of times file has flopped between caching and not */ +#if defined(AFS_LINUX24_ENV) + off_t next_seq_offset; /* Next sequential offset (used by prefetch/readahead) */ +#else + off_t next_seq_blk_offset; /* accounted in blocks for Solaris & IRIX */ +#endif +#endif + afs_uint32 states; /* state bits */ #if defined(AFS_SUN5_ENV) afs_uint32 vstates; /* vstate bits */ diff --git a/src/afs/afs_bypasscache.c b/src/afs/afs_bypasscache.c new file mode 100644 index 000000000..1a7dba6ea --- /dev/null +++ b/src/afs/afs_bypasscache.c @@ -0,0 +1,649 @@ +/* + * COPYRIGHT © 2000 + * THE REGENTS OF THE UNIVERSITY OF MICHIGAN + * ALL RIGHTS RESERVED + * + * Permission is granted to use, copy, create derivative works + * and redistribute this software and such derivative works + * for any purpose, so long as the name of The University of + * Michigan is not used in any advertising or publicity + * pertaining to the use of distribution of this software + * without specific, written prior authorization. If the + * above copyright notice or any other identification of the + * University of Michigan is included in any copy of any + * portion of this software, then the disclaimer below must + * also be included. + * + * THIS SOFTWARE IS PROVIDED AS IS, WITHOUT REPRESENTATION + * FROM THE UNIVERSITY OF MICHIGAN AS TO ITS FITNESS FOR ANY + * PURPOSE, AND WITHOUT WARRANTY BY THE UNIVERSITY O + * MICHIGAN OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING + * WITHOUT LIMITATION THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE + * REGENTS OF THE UNIVERSITY OF MICHIGAN SHALL NOT BE LIABLE + * FOR ANY DAMAGES, INCLUDING SPECIAL, INDIRECT, INCIDENTAL, OR + * CONSEQUENTIAL DAMAGES, WITH RESPECT TO ANY CLAIM ARISING + * OUT OF OR IN CONNECTION WITH THE USE OF THE SOFTWARE, EVEN + * IF IT HAS BEEN OR IS HEREAFTER ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGES. + */ + + /* + * Portions Copyright (c) 2008 + * The Linux Box Corporation + * ALL RIGHTS RESERVED + * + * Permission is granted to use, copy, create derivative works + * and redistribute this software and such derivative works + * for any purpose, so long as the name of the Linux Box + * Corporation is not used in any advertising or publicity + * pertaining to the use or distribution of this software + * without specific, written prior authorization. If the + * above copyright notice or any other identification of the + * Linux Box Corporation is included in any copy of any + * portion of this software, then the disclaimer below must + * also be included. + * + * This software is provided as is, without representation + * from the Linux Box Corporation as to its fitness for any + * purpose, and without warranty by the Linux Box Corporation + * of any kind, either express or implied, including + * without limitation the implied warranties of + * merchantability and fitness for a particular purpose. The + * Linux Box Corporation shall not be liable for any damages, + * including special, indirect, incidental, or consequential + * damages, with respect to any claim arising out of or in + * connection with the use of the software, even if it has been + * or is hereafter advised of the possibility of such damages. + */ + + +#include +#include "afs/param.h" + +#if defined(AFS_CACHE_BYPASS) + +#include "afs/afs_bypasscache.h" + +/* + * afs_bypasscache.c + * + */ +#include "afs/sysincludes.h" /* Standard vendor system headers */ +#include "afs/afsincludes.h" /* Afs-based standard headers */ +#include "afs/afs_stats.h" /* statistics */ +#include "afs/nfsclient.h" +#include "rx/rx_globals.h" + +#if defined(AFS_LINUX26_ENV) +#define LockPage(pp) lock_page(pp) +#define UnlockPage(pp) unlock_page(pp) +#endif +#define AFS_KMAP_ATOMIC + +#ifndef afs_min +#define afs_min(A,B) ((A)<(B)) ? (A) : (B) +#endif + +/* conditional GLOCK macros */ +#define COND_GLOCK(var) \ + do { \ + var = ISAFS_GLOCK(); \ + if(!var) \ + RX_AFS_GLOCK(); \ + } while(0); + +#define COND_RE_GUNLOCK(var) \ + do { \ + if(var) \ + RX_AFS_GUNLOCK(); \ + } while(0); + + +/* conditional GUNLOCK macros */ + +#define COND_GUNLOCK(var) \ + do { \ + var = ISAFS_GLOCK(); \ + if(var) \ + RX_AFS_GUNLOCK(); \ + } while(0); + +#define COND_RE_GLOCK(var) \ + do { \ + if(var) \ + RX_AFS_GLOCK(); \ + } while(0); + + +int cache_bypass_strategy = NEVER_BYPASS_CACHE; +int cache_bypass_threshold = AFS_CACHE_BYPASS_DISABLED; /* file size > threshold triggers bypass */ +int cache_bypass_prefetch = 1; /* Should we do prefetching ? */ + +extern afs_rwlock_t afs_xcbhash; + +/* + * This is almost exactly like the PFlush() routine in afs_pioctl.c, + * but that routine is static. We are about to change a file from + * normal caching to bypass it's caching. Therefore, we want to + * free up any cache space in use by the file, and throw out any + * existing VM pages for the file. We keep track of the number of + * times we go back and forth from caching to bypass. + */ +void afs_TransitionToBypass(register struct vcache *avc, register struct AFS_UCRED *acred, int aflags) +{ + + afs_int32 code; + struct vrequest treq; + int setDesire = 0; + int setManual = 0; + + if(!avc) + return; + + if(avc->states & FCSBypass) + osi_Panic("afs_TransitionToBypass: illegal transition to bypass--already FCSBypass\n"); + + if(aflags & TRANSChangeDesiredBit) + setDesire = 1; + if(aflags & TRANSSetManualBit) + setManual = 1; + +#ifdef AFS_BOZONLOCK_ENV + afs_BozonLock(&avc->pvnLock, avc); /* Since afs_TryToSmush will do a pvn_vptrunc */ +#else + AFS_GLOCK(); +#endif + ObtainWriteLock(&avc->lock, 925); + + /* If we never cached this, just change state */ + if(setDesire && (!avc->cachingStates & FCSBypass)) { + avc->states |= FCSBypass; + goto done; + } + /* cg2v, try to store any chunks not written 20071204 */ + if (avc->execsOrWriters > 0) { + code = afs_InitReq(&treq, acred); + if(!code) + code = afs_StoreAllSegments(avc, &treq, AFS_SYNC | AFS_LASTSTORE); + } +#if 0 + /* also cg2v, don't dequeue the callback */ + ObtainWriteLock(&afs_xcbhash, 956); + afs_DequeueCallback(avc); + ReleaseWriteLock(&afs_xcbhash); +#endif + avc->states &= ~(CStatd | CDirty); /* next reference will re-stat cache entry */ + /* now find the disk cache entries */ + afs_TryToSmush(avc, acred, 1); + osi_dnlc_purgedp(avc); + if (avc->linkData && !(avc->states & CCore)) { + afs_osi_Free(avc->linkData, strlen(avc->linkData) + 1); + avc->linkData = NULL; + } + + avc->cachingStates |= FCSBypass; /* Set the bypass flag */ + if(setDesire) + avc->cachingStates |= FCSDesireBypass; + if(setManual) + avc->cachingStates |= FCSManuallySet; + avc->cachingTransitions++; + +done: + ReleaseWriteLock(&avc->lock); +#ifdef AFS_BOZONLOCK_ENV + afs_BozonUnlock(&avc->pvnLock, avc); +#else + AFS_GUNLOCK(); +#endif +} + +/* + * This is almost exactly like the PFlush() routine in afs_pioctl.c, + * but that routine is static. We are about to change a file from + * bypassing caching to normal caching. Therefore, we want to + * throw out any existing VM pages for the file. We keep track of + * the number of times we go back and forth from caching to bypass. + */ +void afs_TransitionToCaching(register struct vcache *avc, register struct AFS_UCRED *acred, int aflags) +{ + int resetDesire = 0; + int setManual = 0; + + if(!avc) + return; + + if(!avc->states & FCSBypass) + osi_Panic("afs_TransitionToCaching: illegal transition to caching--already caching\n"); + + if(aflags & TRANSChangeDesiredBit) + resetDesire = 1; + if(aflags & TRANSSetManualBit) + setManual = 1; + +#ifdef AFS_BOZONLOCK_ENV + afs_BozonLock(&avc->pvnLock, avc); /* Since afs_TryToSmush will do a pvn_vptrunc */ +#else + AFS_GLOCK(); +#endif + ObtainWriteLock(&avc->lock, 926); + + /* Ok, we actually do need to flush */ + ObtainWriteLock(&afs_xcbhash, 957); + afs_DequeueCallback(avc); + avc->states &= ~(CStatd | CDirty); /* next reference will re-stat cache entry */ + ReleaseWriteLock(&afs_xcbhash); + /* now find the disk cache entries */ + afs_TryToSmush(avc, acred, 1); + osi_dnlc_purgedp(avc); + if (avc->linkData && !(avc->states & CCore)) { + afs_osi_Free(avc->linkData, strlen(avc->linkData) + 1); + avc->linkData = NULL; + } + + avc->cachingStates &= ~(FCSBypass); /* Reset the bypass flag */ + if (resetDesire) + avc->cachingStates &= ~(FCSDesireBypass); + if (setManual) + avc->cachingStates |= FCSManuallySet; + avc->cachingTransitions++; + + ReleaseWriteLock(&avc->lock); +#ifdef AFS_BOZONLOCK_ENV + afs_BozonUnlock(&avc->pvnLock, avc); +#else + AFS_GUNLOCK(); +#endif +} + +/* In the case where there's an error in afs_NoCacheFetchProc or + * afs_PrefetchNoCache, all of the pages they've been passed need + * to be unlocked. + */ +#if defined(AFS_LINUX24_ENV) +#define unlock_pages(auio) \ + do { \ + struct iovec *ciov; \ + struct page *pp; \ + afs_int32 iovmax; \ + afs_int32 iovno = 0; \ + ciov = auio->uio_iov; \ + iovmax = auio->uio_iovcnt - 1; \ + pp = (struct page*) ciov->iov_base; \ + afs_warn("BYPASS: Unlocking pages..."); \ + while(1) { \ + if(pp != NULL && PageLocked(pp)) \ + UnlockPage(pp); \ + iovno++; \ + if(iovno > iovmax) \ + break; \ + ciov = (auio->uio_iov + iovno); \ + pp = (struct page*) ciov->iov_base; \ + } \ + afs_warn("Pages Unlocked.\n"); \ + } while(0); +#else +#ifdef UKERNEL +#define unlock_pages(auio) \ + do { } while(0); +#else +#error AFS_CACHE_BYPASS not implemented on this platform +#endif +#endif + +/* no-cache prefetch routine */ +static afs_int32 +afs_NoCacheFetchProc(register struct rx_call *acall, + register struct vcache *avc, + register uio_t *auio, + afs_int32 release_pages) +{ + afs_int32 length; + afs_int32 code; + int tlen; + int moredata, iovno, iovoff, iovmax, clen, result, locked; + struct iovec *ciov; + struct page *pp; + char *address; +#ifdef AFS_KMAP_ATOMIC + char *page_buffer = osi_Alloc(PAGE_SIZE); +#else + char *page_buffer = NULL; +#endif + + ciov = auio->uio_iov; + pp = (struct page*) ciov->iov_base; + iovmax = auio->uio_iovcnt - 1; + iovno = iovoff = result = 0; + do { + + COND_GUNLOCK(locked); + code = rx_Read(acall, (char *)&length, sizeof(afs_int32)); + COND_RE_GLOCK(locked); + + if (code != sizeof(afs_int32)) { + result = 0; + afs_warn("Preread error. code: %d instead of %d\n", + code, sizeof(afs_int32)); + unlock_pages(auio); + goto done; + } else + length = ntohl(length); + + /* + * The fetch protocol is extended for the AFS/DFS translator + * to allow multiple blocks of data, each with its own length, + * to be returned. As long as the top bit is set, there are more + * blocks expected. + * + * We do not do this for AFS file servers because they sometimes + * return large negative numbers as the transfer size. + */ + if (avc->states & CForeign) { + moredata = length & 0x80000000; + length &= ~0x80000000; + } else { + moredata = 0; + } + + while (length > 0) { + + clen = ciov->iov_len - iovoff; + tlen = afs_min(length, clen); +#ifdef AFS_LINUX24_ENV +#ifndef AFS_KMAP_ATOMIC + if(pp) + address = kmap(pp); + else { + /* rx doesn't provide an interface to simply advance + or consume n bytes. for now, allocate a PAGE_SIZE + region of memory to receive bytes in the case that + there were holes in readpages */ + if(page_buffer == NULL) + page_buffer = osi_Alloc(PAGE_SIZE); + address = page_buffer; + } +#else + address = page_buffer; +#endif +#else +#ifndef UKERNEL +#error AFS_CACHE_BYPASS not implemented on this platform +#endif +#endif /* LINUX24 */ + COND_GUNLOCK(locked); + code = rx_Read(acall, address, tlen); + COND_RE_GLOCK(locked); + + if (code < 0) { + afs_warn("afs_NoCacheFetchProc: rx_Read error. Return code was %d\n", code); + result = 0; + unlock_pages(auio); + goto done; + } else if (code == 0) { + result = 0; + afs_warn("afs_NoCacheFetchProc: rx_Read returned zero. Aborting.\n"); + unlock_pages(auio); + goto done; + } + length -= code; + tlen -= code; + + if(tlen > 0) { + iovoff += code; + address += code; + + } else { +#ifdef AFS_LINUX24_ENV +#ifdef AFS_KMAP_ATOMIC + if(pp) { + address = kmap_atomic(pp, KM_USER0); + memcpy(address, page_buffer, PAGE_SIZE); + kunmap_atomic(address, KM_USER0); + } +#endif +#else +#ifndef UKERNEL +#error AFS_CACHE_BYPASS not implemented on this platform +#endif +#endif /* LINUX 24 */ + /* we filled a page, conditionally release it */ + if(release_pages && ciov->iov_base) { + /* this is appropriate when no caller intends to unlock + * and release the page */ +#ifdef AFS_LINUX24_ENV + SetPageUptodate(pp); + if(PageLocked(pp)) + UnlockPage(pp); + else + afs_warn("afs_NoCacheFetchProc: page not locked at iovno %d!\n", iovno); + +#ifndef AFS_KMAP_ATOMIC + kunmap(pp); +#endif +#else +#ifndef UKERNEL +#error AFS_CACHE_BYPASS not implemented on this platform +#endif +#endif /* LINUX24 */ + } + /* and carry uio_iov */ + iovno++; + if(iovno > iovmax) goto done; + + ciov = (auio->uio_iov + iovno); + pp = (struct page*) ciov->iov_base; + iovoff = 0; + } + } + } while (moredata); + +done: + if(page_buffer) + osi_Free(page_buffer, PAGE_SIZE); + return result; +} + + +/* dispatch a no-cache read request */ +afs_int32 +afs_ReadNoCache(register struct vcache *avc, + register struct nocache_read_request *bparms, + struct AFS_UCRED *acred) +{ + afs_int32 code; + afs_int32 bcnt; + struct brequest *breq; + struct vrequest *areq; + + /* the reciever will free this */ + areq = osi_Alloc(sizeof(struct vrequest)); + + if (avc && avc->vc_error) { + code = EIO; + afs_warn("afs_ReadNoCache VCache Error!\n"); + goto cleanup; + } + if ((code = afs_InitReq(areq, acred))) { + afs_warn("afs_ReadNoCache afs_InitReq error!\n"); + goto cleanup; + } + + AFS_GLOCK(); + code = afs_VerifyVCache(avc, areq); + AFS_GUNLOCK(); + + if (code) { + code = afs_CheckCode(code, areq, 11); /* failed to get it */ + afs_warn("afs_ReadNoCache Failed to verify VCache!\n"); + goto cleanup; + } + + bparms->areq = areq; + + /* and queue this one */ + bcnt = 1; + AFS_GLOCK(); + while(bcnt < 20) { + breq = afs_BQueue(BOP_FETCH_NOCACHE, avc, B_DONTWAIT, 0, acred, 1, 1, bparms); + if(breq != 0) { + code = 0; + break; + } + afs_osi_Wait(10 * bcnt, 0, 0); + } + AFS_GUNLOCK(); + + if(!breq) { + code = EBUSY; + goto cleanup; + } + + return code; + +cleanup: + /* If there's a problem before we queue the request, we need to + * do everything that would normally happen when the request was + * processed, like unlocking the pages and freeing memory. + */ +#ifdef AFS_LINUX24_ENV + unlock_pages(bparms->auio); +#else +#ifndef UKERNEL +#error AFS_CACHE_BYPASS not implemented on this platform +#endif +#endif + osi_Free(areq, sizeof(struct vrequest)); + osi_Free(bparms->auio->uio_iov, bparms->auio->uio_iovcnt * sizeof(struct iovec)); + osi_Free(bparms->auio, sizeof(uio_t)); + osi_Free(bparms, sizeof(struct nocache_read_request)); + return code; + +} + + +/* Cannot have static linkage--called from BPrefetch (afs_daemons) */ +afs_int32 +afs_PrefetchNoCache(register struct vcache *avc, + register struct AFS_UCRED *acred, + register struct nocache_read_request *bparms) +{ + uio_t *auio; + struct iovec *iovecp; + struct vrequest *areq; + afs_int32 code, length_hi, bytes, locked; + + register struct conn *tc; + afs_int32 i; + struct rx_call *tcall; + struct tlocal1 { + struct AFSVolSync tsync; + struct AFSFetchStatus OutStatus; + struct AFSCallBack CallBack; + }; + struct tlocal1 *tcallspec; + + auio = bparms->auio; + areq = bparms->areq; + iovecp = auio->uio_iov; + + tcallspec = (struct tlocal1 *) osi_Alloc(sizeof(struct tlocal1)); + do { + tc = afs_Conn(&avc->fid, areq, SHARED_LOCK /* ignored */); + if (tc) { + avc->callback = tc->srvr->server; + i = osi_Time(); + tcall = rx_NewCall(tc->id); +#ifdef AFS_64BIT_CLIENT + if(!afs_serverHasNo64Bit(tc)) { + code = StartRXAFS_FetchData64(tcall, + (struct AFSFid *) &avc->fid.Fid, + auio->uio_offset, + bparms->length); + if (code == 0) { + + COND_GUNLOCK(locked); + bytes = rx_Read(tcall, (char *)&length_hi, sizeof(afs_int32)); + COND_RE_GLOCK(locked); + + if (bytes != sizeof(afs_int32)) { + length_hi = 0; + code = rx_Error(tcall); + COND_GUNLOCK(locked); + code = rx_EndCall(tcall, code); + COND_RE_GLOCK(locked); + tcall = (struct rx_call *)0; + } + } + if (code == RXGEN_OPCODE || afs_serverHasNo64Bit(tc)) { + if (auio->uio_offset > 0x7FFFFFFF) { + code = EFBIG; + } else { + afs_int32 pos; + pos = auio->uio_offset; + COND_GUNLOCK(locked); + if (!tcall) + tcall = rx_NewCall(tc->id); + code = StartRXAFS_FetchData(tcall, + (struct AFSFid *) &avc->fid.Fid, + pos, bparms->length); + COND_RE_GLOCK(locked); + } + afs_serverSetNo64Bit(tc); + } + } /* afs_serverHasNo64Bit */ +#else + code = StartRXAFS_FetchData(tcall, + (struct AFSFid *) &avc->fid.Fid, + auio->uio_offset, bparms->length); +#endif + + if (code == 0) { + code = afs_NoCacheFetchProc(tcall, avc, auio, + 1 /* release_pages */); + } else { + afs_warn("BYPASS: StartRXAFS_FetchData failed: %d\n", code); + unlock_pages(auio); + goto done; + } + if (code == 0) { + code = EndRXAFS_FetchData(tcall, + &tcallspec->OutStatus, + &tcallspec->CallBack, + &tcallspec->tsync); + } else { + afs_warn("BYPASS: NoCacheFetchProc failed: %d\n", code); + } + code = rx_EndCall(tcall, code); + } + else { + afs_warn("BYPASS: No connection.\n"); + code = -1; +#ifdef AFS_LINUX24_ENV + unlock_pages(auio); +#else +#ifndef UKERNEL +#error AFS_CACHE_BYPASS not implemented on this platform +#endif +#endif + goto done; + } + } while (afs_Analyze(tc, code, &avc->fid, areq, + AFS_STATS_FS_RPCIDX_FETCHDATA, + SHARED_LOCK,0)); +done: + /* + * Copy appropriate fields into vcache + */ + + afs_ProcessFS(avc, &tcallspec->OutStatus, areq); + + osi_Free(areq, sizeof(struct vrequest)); + osi_Free(tcallspec, sizeof(struct tlocal1)); + osi_Free(iovecp, auio->uio_iovcnt * sizeof(struct iovec)); + osi_Free(bparms, sizeof(struct nocache_read_request)); + osi_Free(auio, sizeof(uio_t)); + return code; +} + +#endif /* AFS_CACHE_BYPASS */ diff --git a/src/afs/afs_bypasscache.h b/src/afs/afs_bypasscache.h new file mode 100644 index 000000000..6a9ec11ee --- /dev/null +++ b/src/afs/afs_bypasscache.h @@ -0,0 +1,153 @@ +/* + * COPYRIGHT © 2000 + * THE REGENTS OF THE UNIVERSITY OF MICHIGAN + * ALL RIGHTS RESERVED + * + * Permission is granted to use, copy, create derivative works + * and redistribute this software and such derivative works + * for any purpose, so long as the name of The University of + * Michigan is not used in any advertising or publicity + * pertaining to the use of distribution of this software + * without specific, written prior authorization. If the + * above copyright notice or any other identification of the + * University of Michigan is included in any copy of any + * portion of this software, then the disclaimer below must + * also be included. + * + * THIS SOFTWARE IS PROVIDED AS IS, WITHOUT REPRESENTATION + * FROM THE UNIVERSITY OF MICHIGAN AS TO ITS FITNESS FOR ANY + * PURPOSE, AND WITHOUT WARRANTY BY THE UNIVERSITY O + * MICHIGAN OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING + * WITHOUT LIMITATION THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE + * REGENTS OF THE UNIVERSITY OF MICHIGAN SHALL NOT BE LIABLE + * FOR ANY DAMAGES, INCLUDING SPECIAL, INDIRECT, INCIDENTAL, OR + * CONSEQUENTIAL DAMAGES, WITH RESPECT TO ANY CLAIM ARISING + * OUT OF OR IN CONNECTION WITH THE USE OF THE SOFTWARE, EVEN + * IF IT HAS BEEN OR IS HEREAFTER ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGES. + */ + + /* + * Portions Copyright (c) 2008 + * The Linux Box Corporation + * ALL RIGHTS RESERVED + * + * Permission is granted to use, copy, create derivative works + * and redistribute this software and such derivative works + * for any purpose, so long as the name of the Linux Box + * Corporation is not used in any advertising or publicity + * pertaining to the use or distribution of this software + * without specific, written prior authorization. If the + * above copyright notice or any other identification of the + * Linux Box Corporation is included in any copy of any + * portion of this software, then the disclaimer below must + * also be included. + * + * This software is provided as is, without representation + * from the Linux Box Corporation as to its fitness for any + * purpose, and without warranty by the Linux Box Corporation + * of any kind, either express or implied, including + * without limitation the implied warranties of + * merchantability and fitness for a particular purpose. The + * Linux Box Corporation shall not be liable for any damages, + * including special, indirect, incidental, or consequential + * damages, with respect to any claim arising out of or in + * connection with the use of the software, even if it has been + * or is hereafter advised of the possibility of such damages. + */ + + +#ifndef _AFS_BYPASSCACHE_H +#define _AFS_BYPASSCACHE_H + +#if defined(AFS_CACHE_BYPASS) + +#include +#include "afs/param.h" +#include "afs/sysincludes.h" +#include "afsincludes.h" + +#define AFS_CACHE_BYPASS_DISABLED -1 + +#ifdef UKERNEL +typedef struct uio uio_t; +#ifndef PAGE_SIZE +#define PAGE_SIZE 1024 * sizeof(long) / 8 +#endif +#endif + +/* A ptr to an object of the following type is expected to be passed + * as the ab->parm[0] to afs_BQueue */ +struct nocache_read_request { + /* Why can't we all get along? */ +#if defined(AFS_SUN5_ENV) + /* SOLARIS */ + u_offset_t offset; + struct seg *segment; + caddr_t address; +#elif defined(AFS_SGI_ENV) + /* SGI (of some vintage) */ + int32 offset; + int32 rem; + int32 pmp; /* mmm */ + int32 length; +#elif defined(AFS_LINUX24_ENV) || defined(AFS_USR_LINUX24_ENV) + /* The tested platform, as CITI impl. just packs ab->parms */ + uio_t * auio; + struct vrequest *areq; + afs_size_t offset; + afs_size_t length; +#endif +}; + +enum cache_bypass_strategies +{ + ALWAYS_BYPASS_CACHE, + NEVER_BYPASS_CACHE, + LARGE_FILES_BYPASS_CACHE +}; + +extern int cache_bypass_prefetch; +extern int cache_bypass_strategy; +extern int cache_bypass_threshold; + +void afs_TransitionToBypass(register struct vcache *, register struct AFS_UCRED *, int); +void afs_TransitionToCaching(register struct vcache *, register struct AFS_UCRED *, int); + +/* Cache strategy permits vnode transition between caching and no-cache-- + * currently, this means LARGE_FILES_BYPASS_CACHE. Currently, no pioctl permits + * setting FCSBypass manually for a vnode */ +#define variable_cache_strategy \ + (! ((cache_bypass_strategy == ALWAYS_BYPASS_CACHE) || \ + (cache_bypass_strategy == NEVER_BYPASS_CACHE)) ) + +/* Cache-coherently toggle cache/no-cache for a vnode */ +#define trydo_cache_transition(avc, credp, bypasscache) \ + do { \ + if(variable_cache_strategy) { \ + if(bypasscache) { \ + if(!(avc->cachingStates & FCSBypass)) \ + afs_TransitionToBypass(avc, credp, TRANSChangeDesiredBit); \ + } else { \ + if(avc->cachingStates & FCSBypass) \ + afs_TransitionToCaching(avc, credp, TRANSChangeDesiredBit); \ + } \ + } \ + } \ + while(0); + +/* dispatch a no-cache read request */ +afs_int32 +afs_ReadNoCache(register struct vcache *avc, register struct nocache_read_request *bparms, + struct AFS_UCRED *acred); + +/* no-cache prefetch routine */ +afs_int32 +afs_PrefetchNoCache(register struct vcache *avc, register struct AFS_UCRED *acred, + struct nocache_read_request *bparms); + + +#endif /* AFS_CACHE_BYPASS */ +#endif /* _AFS_BYPASSCACHE_H */ + diff --git a/src/afs/afs_daemons.c b/src/afs/afs_daemons.c index b5e21b360..891f9a64f 100644 --- a/src/afs/afs_daemons.c +++ b/src/afs/afs_daemons.c @@ -26,7 +26,9 @@ RCSID #include /* for vm_att(), vm_det() */ #endif - +#if defined(AFS_CACHE_BYPASS) +#include "afs/afs_bypasscache.h" +#endif// defined(AFS_CACHE_BYPASS) /* background request queue size */ afs_lock_t afs_xbrs; /* lock for brs */ static int brsInit = 0; @@ -516,6 +518,27 @@ BPrefetch(register struct brequest *ab) } } +#if defined(AFS_CACHE_BYPASS) +#if 1 /* XXX Matt debugging */ +static +#endif +void +BPrefetchNoCache(register struct brequest *ab) +{ + struct vrequest treq; + afs_size_t len; + + if ((len = afs_InitReq(&treq, ab->cred))) + return; + +#ifndef UKERNEL + /* OS-specific prefetch routine */ + afs_PrefetchNoCache(ab->vc, ab->cred, (struct nocache_read_request *) ab->ptr_parm[0]); +#else +#warning Cache-bypass code path not implemented in UKERNEL +#endif +} +#endif static void BStore(register struct brequest *ab) @@ -979,6 +1002,10 @@ afs_BackgroundDaemon(void) tb->opcode); if (tb->opcode == BOP_FETCH) BPrefetch(tb); +#if defined(AFS_CACHE_BYPASS) + else if (tb->opcode == BOP_FETCH_NOCACHE) + BPrefetchNoCache(tb); +#endif else if (tb->opcode == BOP_STORE) BStore(tb); else if (tb->opcode == BOP_PATH) diff --git a/src/afs/afs_dcache.c b/src/afs/afs_dcache.c index ac373f076..fbcae429e 100644 --- a/src/afs/afs_dcache.c +++ b/src/afs/afs_dcache.c @@ -1847,7 +1847,6 @@ afs_GetDCache(register struct vcache *avc, afs_size_t abyte, #endif /* AFS_NOSTATS */ AFS_STATCNT(afs_GetDCache); - if (dcacheDisabled) return NULL; @@ -2017,8 +2016,12 @@ afs_GetDCache(register struct vcache *avc, afs_size_t abyte, || afs_freeDCList != NULLIDX) break; /* If we can't get space for 5 mins we give up and panic */ - if (++downDCount > 300) + if (++downDCount > 300) { +#if defined(AFS_CACHE_BYPASS) + afs_warn("GetDCache calling osi_Panic: No space in five minutes.\n downDCount: %d\n aoffset: %d alen: %d\n", downDCount, aoffset, alen); +#endif osi_Panic("getdcache"); + } MReleaseWriteLock(&afs_xdcache); /* * Locks held: diff --git a/src/afs/afs_osi_vm.c b/src/afs/afs_osi_vm.c index 74aa51c0b..ccd4efd10 100644 --- a/src/afs/afs_osi_vm.c +++ b/src/afs/afs_osi_vm.c @@ -51,6 +51,16 @@ osi_FlushPages(register struct vcache *avc, struct AFS_UCRED *credp) { int vfslocked; afs_hyper_t origDV; +#if defined(AFS_CACHE_BYPASS) + /* The optimization to check DV under read lock below is identical a + * change in CITI cache bypass work. The problem CITI found in 1999 + * was that this code and background daemon doing prefetching competed + * for the vcache entry shared lock. It's not clear to me from the + * tech report, but it looks like CITI fixed the general prefetch code + * path as a bonus when experimenting on prefetch for cache bypass, see + * citi-tr-01-3. + */ +#endif ObtainReadLock(&avc->lock); /* If we've already purged this version, or if we're the ones * writing this version, don't flush it (could lose the diff --git a/src/afs/afs_pioctl.c b/src/afs/afs_pioctl.c index 2389b5fda..7e482983a 100644 --- a/src/afs/afs_pioctl.c +++ b/src/afs/afs_pioctl.c @@ -23,6 +23,7 @@ RCSID #include "afsincludes.h" /* Afs-based standard headers */ #include "afs/afs_stats.h" /* afs statistics */ #include "afs/vice.h" +#include "afs/afs_bypasscache.h" #include "rx/rx_globals.h" struct VenusFid afs_rootFid; @@ -104,6 +105,10 @@ DECL_PIOCTL(PDiscon); DECL_PIOCTL(PNFSNukeCreds); DECL_PIOCTL(PNewUuid); DECL_PIOCTL(PPrecache); +#if defined(AFS_CACHE_BYPASS) +DECL_PIOCTL(PSetCachingThreshold); +DECL_PIOCTL(PSetCachingBlkSize); +#endif /* * A macro that says whether we're going to need HandleClientContext(). @@ -219,7 +224,12 @@ static int (*(CpioctlSw[])) () = { static int (*(OpioctlSw[])) () = { PBogus, /* 0 */ - PNFSNukeCreds, /* 1 -- nuke all creds for NFS client */ + PNFSNukeCreds, /* 1 -- nuke all creds for NFS client */ +#if defined(AFS_CACHE_BYPASS) + PSetCachingThreshold /* 2 -- get/set cache-bypass size threshold */ +#else + PNoop /* 2 -- get/set cache-bypass size threshold */ +#endif }; #define PSetClientContext 99 /* Special pioctl to setup caller's creds */ @@ -3880,6 +3890,51 @@ DECL_PIOCTL(PNewUuid) return 0; } +#if defined(AFS_CACHE_BYPASS) + +DECL_PIOCTL(PSetCachingThreshold) +{ + afs_int32 getting; + afs_int32 setting; + + setting = getting = 1; + + if (ain == NULL || ainSize < sizeof(afs_int32)) + setting = 0; + + if (aout == NULL) + getting = 0; + + if (setting == 0 && getting == 0) + return EINVAL; + + /* + * If setting, set first, and return the value now in effect + */ + if (setting) { + afs_int32 threshold; + + if (!afs_osi_suser(*acred)) + return EPERM; + memcpy((char *)&threshold, ain, sizeof(afs_int32)); + cache_bypass_threshold = threshold; + afs_warn("Cache Bypass Threshold set to: %d\n", threshold); + /* TODO: move to separate pioctl, or enhance pioctl */ + cache_bypass_strategy = LARGE_FILES_BYPASS_CACHE; + } + + if (getting) { + /* Return the current size threshold */ + afs_int32 oldThreshold = cache_bypass_threshold; + memcpy(aout, (char *)&oldThreshold, sizeof(afs_int32)); + *aoutSize = sizeof(afs_int32); + } + + return(0); +} + +#endif /* defined(AFS_CACHE_BYPASS) */ + DECL_PIOCTL(PCallBackAddr) { #ifndef UKERNEL diff --git a/src/afs/afs_stats.h b/src/afs/afs_stats.h index 4e4b2b44d..26ee6450c 100644 --- a/src/afs/afs_stats.h +++ b/src/afs/afs_stats.h @@ -640,6 +640,10 @@ struct afs_CMCallStats { afs_int32 C_SRXAFSCB_GetCacheConfig; /* afs_callback.c */ afs_int32 C_SRXAFSCB_GetCE64; /* afs_callback.c */ afs_int32 C_SRXAFSCB_GetCellByNum; /* afs_callback.c */ +#if defined(AFS_CACHE_BYPASS) + afs_int32 C_BPrefetchNoCache; /* afs_daemons.c */ + afs_int32 C_afs_ReadNoCache; /* osi_vnodeops.c */ +#endif }; struct afs_CMMeanStats { diff --git a/src/afs/afs_vcache.c b/src/afs/afs_vcache.c index 79274d990..9c333110b 100644 --- a/src/afs/afs_vcache.c +++ b/src/afs/afs_vcache.c @@ -1039,6 +1039,12 @@ restart: tvc->vmh = tvc->segid = NULL; tvc->credp = NULL; #endif + +#if defined(AFS_CACHE_BYPASS) + tvc->cachingStates = 0; + tvc->cachingTransitions = 0; +#endif + #ifdef AFS_BOZONLOCK_ENV #if defined(AFS_SUN5_ENV) rw_init(&tvc->rwlock, "vcache rwlock", RW_DEFAULT, NULL); diff --git a/src/afsd/afsd.c b/src/afsd/afsd.c index 0b08fdc1e..07d69588c 100644 --- a/src/afsd/afsd.c +++ b/src/afsd/afsd.c @@ -292,7 +292,12 @@ int createAndTrunc = O_CREAT | O_TRUNC; /*Create & truncate on open */ int ownerRWmode = 0600; /*Read/write OK by owner */ static int filesSet = 0; /*True if number of files explicitly set */ static int nFilesPerDir = 2048; /* # files per cache dir */ -static int nDaemons = 2; /* Number of background daemons */ +#if defined(AFS_CACHE_BYPASS) +#define AFSD_NDAEMONS 4 +#else +#define AFSD_NDAEMONS 2 +#endif +static int nDaemons = AFSD_NDAEMONS; /* Number of background daemons */ static int chunkSize = 0; /* 2^chunkSize bytes per chunk */ static int dCacheSize; /* # of dcache entries */ static int vCacheSize = 200; /* # of volume cache entries */ diff --git a/src/config/venus.h b/src/config/venus.h index 2e5317f5d..894fc526d 100644 --- a/src/config/venus.h +++ b/src/config/venus.h @@ -188,4 +188,9 @@ struct cm_initparams { /* OpenAFS-specific 'O' pioctl's */ #define VIOC_NFS_NUKE_CREDS _OVICEIOCTL(1) /* nuke creds for all PAG's */ +#if defined(AFS_CACHE_BYPASS) +/* Uncoordinated 'O' pioctls */ +#define VIOC_SETBYPASS_THRESH _OVICEIOCTL(2) /* cache-bypass size thresh */ +#endif + #endif /* AFS_VENUS_H */ diff --git a/src/libafs/Makefile.common.in b/src/libafs/Makefile.common.in index fb9c0a1de..12696d6d5 100644 --- a/src/libafs/Makefile.common.in +++ b/src/libafs/Makefile.common.in @@ -77,6 +77,7 @@ AFSAOBJS = \ afs_analyze.o \ afs_axscache.o \ afs_buffer.o \ + afs_bypasscache.o \ afs_callback.o \ afs_cbqueue.o \ afs_cell.o \ @@ -242,6 +243,8 @@ afs_axscache.o: $(TOP_SRC_AFS)/afs_axscache.c $(CRULE_OPT) afs_buffer.o: $(TOP_SRC_AFS)/afs_buffer.c $(CRULE_OPT) +afs_bypasscache.o: $(TOP_SRC_AFS)/afs_bypasscache.c + $(CRULE_OPT) afs_cell.o: $(TOP_SRC_AFS)/afs_cell.c $(CRULE_OPT) afs_conn.o: $(TOP_SRC_AFS)/afs_conn.c diff --git a/src/libuafs/Makefile.common.in b/src/libuafs/Makefile.common.in index 43fa1d941..c8be2cc07 100644 --- a/src/libuafs/Makefile.common.in +++ b/src/libuafs/Makefile.common.in @@ -90,6 +90,7 @@ UAFSOBJ = \ $(UOBJ)/afs_error.o \ $(UOBJ)/afs_axscache.o \ $(UOBJ)/afs_buffer.o \ + $(UOBJ)/afs_bypasscache.o \ $(UOBJ)/afs_callback.o \ $(UOBJ)/afs_cbqueue.o \ $(UOBJ)/afs_cell.o \ @@ -217,6 +218,7 @@ AFSWEBOBJ = \ $(WEBOBJ)/afs_error.o \ $(WEBOBJ)/afs_axscache.o \ $(WEBOBJ)/afs_buffer.o \ + $(WEBOBJ)/afs_bypasscache.o \ $(WEBOBJ)/afs_callback.o \ $(WEBOBJ)/afs_cbqueue.o \ $(WEBOBJ)/afs_cell.o \ @@ -345,6 +347,7 @@ AFSWEBOBJKRB = \ $(WEBOBJ)/afs_error.o \ $(WEBOBJ)/afs_axscache.o \ $(WEBOBJ)/afs_buffer.o \ + $(WEBOBJ)/afs_bypasscache.o \ $(WEBOBJ)/afs_callback.o \ $(WEBOBJ)/afs_cbqueue.o \ $(WEBOBJ)/afs_cell.o \ @@ -468,6 +471,7 @@ JUAFSOBJ = \ $(JUAFS)/afs_error.o \ $(JUAFS)/afs_axscache.o \ $(JUAFS)/afs_buffer.o \ + $(JUAFS)/afs_bypasscache.o \ $(JUAFS)/afs_callback.o \ $(JUAFS)/afs_cbqueue.o \ $(JUAFS)/afs_cell.o \ @@ -600,6 +604,8 @@ $(UOBJ)/afs_axscache.o: $(TOP_SRC_AFS)/afs_axscache.c $(CRULE1) $(UOBJ)/afs_buffer.o: $(TOP_SRC_AFS)/afs_buffer.c $(CRULE1) +$(UOBJ)/afs_bypasscache.o: $(TOP_SRC_AFS)/afs_bypasscache.c + $(CRULE1) $(UOBJ)/afs_cell.o: $(TOP_SRC_AFS)/afs_cell.c $(CRULE1) $(UOBJ)/afs_conn.o: $(TOP_SRC_AFS)/afs_conn.c @@ -861,6 +867,8 @@ $(WEBOBJ)/afs_axscache.o: $(TOP_SRC_AFS)/afs_axscache.c $(CRULE2) $(WEBOBJ)/afs_buffer.o: $(TOP_SRC_AFS)/afs_buffer.c $(CRULE2) +$(WEBOBJ)/afs_bypasscache.o: $(TOP_SRC_AFS)/afs_bypasscache.c + $(CRULE2) $(WEBOBJ)/afs_cell.o: $(TOP_SRC_AFS)/afs_cell.c $(CRULE2) $(WEBOBJ)/afs_conn.o: $(TOP_SRC_AFS)/afs_conn.c @@ -1126,6 +1134,8 @@ $(JUAFS)/afs_axscache.o: $(TOP_SRC_AFS)/afs_axscache.c $(CRULE1) $(JUAFS)/afs_buffer.o: $(TOP_SRC_AFS)/afs_buffer.c $(CRULE1) +$(JUAFS)/afs_bypasscache.o: $(TOP_SRC_AFS)/afs_bypasscache.c + $(CRULE1) $(JUAFS)/afs_cell.o: $(TOP_SRC_AFS)/afs_cell.c $(CRULE1) $(JUAFS)/afs_conn.o: $(TOP_SRC_AFS)/afs_conn.c diff --git a/src/venus/fs.c b/src/venus/fs.c index 5a797dbb4..97d50de84 100644 --- a/src/venus/fs.c +++ b/src/venus/fs.c @@ -1275,6 +1275,70 @@ UuidCmd(struct cmd_syndesc *as, void *arock) return 0; } +#if defined(AFS_CACHE_BYPASS) +/* + * Set cache-bypass threshold. Files larger than this size will not be cached. + * With a threshold of 0, the cache is always bypassed. With a threshold of -1, + * cache bypass is disabled. + */ + +static int +BypassThresholdCmd(struct cmd_syndesc *as, char *arock) +{ + afs_int32 code; + afs_int32 size; + struct ViceIoctl blob; + afs_int32 threshold_i, threshold_o; + char *tp; + + /* if new threshold supplied, then set and confirm, else, + * get current threshold and print + */ + + if(as->parms[0].items) { + int digit, ix, len; + + tp = as->parms[0].items->data; + len = strlen(tp); + digit = 1; + for(ix = 0; ix < len; ++ix) { + if(!isdigit(tp[0])) { + digit = 0; + break; + } + } + if (digit == 0) { + fprintf(stderr, "fs bypassthreshold -size: %s must be an undecorated digit string.\n", tp); + return EINVAL; + } + threshold_i = atoi(tp); + if(ix > 9 && threshold_i < 2147483647) + threshold_i = 2147483647; + blob.in = (char *) &threshold_i; + blob.in_size = sizeof(threshold_i); + } else { + blob.in = NULL; + blob.in_size = 0; + } + + blob.out = (char *) &threshold_o; + blob.out_size = sizeof(threshold_o); + code = pioctl(0, VIOC_SETBYPASS_THRESH, &blob, 1); + if (code) { + Die(errno, NULL); + return 1; + } else { + printf("Cache bypass threshold %d", threshold_o); + if(threshold_o == -1) + printf(" (disabled)"); + printf("\n"); + } + + return 0; +} + +#endif + static int FlushCmd(struct cmd_syndesc *as, void *arock) { @@ -3538,6 +3602,12 @@ main(int argc, char **argv) cmd_AddParm(ts, "-fast", CMD_FLAG, CMD_OPTIONAL, "don't check name with VLDB"); +#if defined(AFS_CACHE_BYPASS) + ts = cmd_CreateSyntax("bypassthreshold", BypassThresholdCmd, 0, + "get/set cache bypass file size threshold"); + cmd_AddParm(ts, "-size", CMD_SINGLE, CMD_OPTIONAL, "file size"); +#endif + /* defect 3069 -- 2.39.5