From af9c9d905a08f28dc672dda6e5a6b9437f1650a8 Mon Sep 17 00:00:00 2001 From: Simon Wilkinson Date: Wed, 15 Jul 2009 00:02:31 +0100 Subject: [PATCH] Add support for background page copies LICENSE BSD This patch adds support for backgrounding the page copies which are caused by a call to readpages() In theory, this should improve the throughput of the AFS kernel module as it permits processes to start work whilst data is still being read into the page cache for later pages. Reviewed-on: http://gerrit.openafs.org/537 Tested-by: Derrick Brashear Reviewed-by: Derrick Brashear --- src/afs/LINUX/osi_module.c | 8 + src/afs/LINUX/osi_pagecopy.c | 251 ++++++++++++++++++++++++++++++ src/afs/LINUX/osi_pagecopy.h | 43 +++++ src/afs/LINUX/osi_vnodeops.c | 37 +++-- src/libafs/Makefile.common.in | 4 +- src/libafs/MakefileProto.LINUX.in | 3 +- 6 files changed, 333 insertions(+), 13 deletions(-) create mode 100644 src/afs/LINUX/osi_pagecopy.c create mode 100644 src/afs/LINUX/osi_pagecopy.h diff --git a/src/afs/LINUX/osi_module.c b/src/afs/LINUX/osi_module.c index 145ec342c..fa39a6396 100644 --- a/src/afs/LINUX/osi_module.c +++ b/src/afs/LINUX/osi_module.c @@ -36,6 +36,8 @@ #include #endif +#include "osi_pagecopy.h" + extern struct file_system_type afs_fs_type; #if !defined(AFS_LINUX24_ENV) @@ -118,6 +120,8 @@ init_module(void) #if defined(AFS_CACHE_BYPASS) afs_warn("Cache bypass patched libafs module init.\n"); #endif + afs_init_pagecopy(); + return 0; } @@ -132,6 +136,9 @@ cleanup_module(void) #if defined(AFS_CACHE_BYPASS) afs_warn("Cache bypass patched libafs module cleaning up.\n"); #endif + + afs_shutdown_pagecopy(); + #ifdef LINUX_KEYRING_SUPPORT osi_keyring_shutdown(); #endif @@ -153,6 +160,7 @@ cleanup_module(void) osi_ioctl_clean(); osi_proc_clean(); #endif + return; } diff --git a/src/afs/LINUX/osi_pagecopy.c b/src/afs/LINUX/osi_pagecopy.c new file mode 100644 index 000000000..482983234 --- /dev/null +++ b/src/afs/LINUX/osi_pagecopy.c @@ -0,0 +1,251 @@ +/* + * Copyright (c) 2009 Simon Wilkinson. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR `AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Background page copying + * + * In the Linux CM, we pull cached files in from disk by reading them into + * a page backed by the disk file, then copying them into the relevant AFS + * page. This is a syncronous operation, requiring us to wait until the + * disk read is completed before the page copy can be performed. When we're + * doing readahead with readpages(), it means that the readpages() call must + * block until the readahead is complete, which somewhat defeats the point. + * + * This file implements a background queuing system for performing these + * page copies. For each collection of pages requiring copying, a new + * task is created by calling afs_pagecopy_init_task(). Every time + * readpage() on the backing cache returns a page which is still locked, + * afs_pagecopy_queue_page() can be called to queue up a background copy + * of this page. queue_page() ensures that the new page is connected to + * the current task structure, and that that task is on a locally implemented + * work queue. + * + * The work queue is handled by a dedicated kernel thread (created by + * afs_init_pagecopy() and destroyed with afs_shutdown_pagecopy() ). This + * thread iterates on the queue, moving all pages that are unlocked to a + * different list, and placing tasks with unlocked pages onto the kernel + * work queue. Once it has run through all of the unlocked pages, it will + * identify a still-locked page to sleep upon, and wait until that page is + * unlocked. + * + * The final act of copying the pages is performed by a per-task job in the + * kernel work queue (this allows us to use multiple processors on SMP systems) + */ + +#include +#include "afs/param.h" + +#include +#include +#include +#include +#include + +static DECLARE_WAIT_QUEUE_HEAD (afs_pagecopy_wq); +static spinlock_t afs_pagecopy_lock; +static struct list_head afs_pagecopy_tasks; +static struct task_struct * afs_pagecopy_thread_id; + +struct afs_pagecopy_page { + struct page *afspage; + struct page *cachepage; + struct list_head tasklink; +}; + +struct afs_pagecopy_task { + struct work_struct work; + struct list_head checkpages; + struct list_head copypages; + atomic_t refcnt; + spinlock_t lock; + struct list_head joblink; +}; + +#if defined(INIT_WORK_HAS_DATA) +static void afs_pagecopy_worker(void *rock); +#else +static void afs_pagecopy_worker(struct work_struct *work); +#endif + +struct afs_pagecopy_task * +afs_pagecopy_init_task(void) { + struct afs_pagecopy_task *task; + + task = kzalloc(sizeof(struct afs_pagecopy_task), GFP_NOFS); + INIT_LIST_HEAD(&task->checkpages); + INIT_LIST_HEAD(&task->copypages); + INIT_LIST_HEAD(&task->joblink); +#if defined(INIT_WORK_HAS_DATA) + INIT_WORK(&task->work, afs_pagecopy_worker, &task->work); +#else + INIT_WORK(&task->work, afs_pagecopy_worker); +#endif + spin_lock_init(&task->lock); + atomic_inc(&task->refcnt); + + return task; +} + +void afs_pagecopy_queue_page(struct afs_pagecopy_task *task, + struct page *cachepage, + struct page *afspage) +{ + struct afs_pagecopy_page *page; + + page = kzalloc(sizeof(struct afs_pagecopy_page), GFP_NOFS); + INIT_LIST_HEAD(&page->tasklink); + + page_cache_get(cachepage); + page->cachepage = cachepage; + page_cache_get(afspage); + page->afspage = afspage; + + spin_lock(&task->lock); + list_add_tail(&page->tasklink, &task->checkpages); + spin_lock(&afs_pagecopy_lock); + if (list_empty(&task->joblink)) { + atomic_inc(&task->refcnt); + list_add_tail(&task->joblink, &afs_pagecopy_tasks); + } + spin_unlock(&afs_pagecopy_lock); + spin_unlock(&task->lock); + + wake_up_interruptible(&afs_pagecopy_wq); +} + +void afs_pagecopy_put_task(struct afs_pagecopy_task *task) +{ + if (!atomic_dec_and_test(&task->refcnt)) + return; + + kfree(task); +} + +static struct page * afs_pagecopy_checkworkload(void) { + struct page *sleeppage = NULL; + struct afs_pagecopy_task *task, *tmp_task; + struct afs_pagecopy_page *page, *tmp_page; + + spin_lock(&afs_pagecopy_lock); + list_for_each_entry_safe(task, tmp_task, &afs_pagecopy_tasks, joblink) { + spin_unlock(&afs_pagecopy_lock); + + spin_lock(&task->lock); + list_for_each_entry_safe(page, tmp_page, &task->checkpages, tasklink) { + if (!PageLocked(page->cachepage)) { + list_move_tail(&page->tasklink, &task->copypages); + atomic_inc(&task->refcnt); + if (!schedule_work(&task->work)) + atomic_dec(&task->refcnt); + } else if (!sleeppage) { + page_cache_get(page->cachepage); + sleeppage = page->cachepage; + } + } + /* If the task structure has no more pages to check, remove it + * from our workload queue */ + if (list_empty(&task->checkpages)) { + spin_lock(&afs_pagecopy_lock); + spin_unlock(&task->lock); + list_del_init(&task->joblink); + spin_unlock(&afs_pagecopy_lock); + afs_pagecopy_put_task(task); + } else { + spin_unlock(&task->lock); + } + spin_lock(&afs_pagecopy_lock); + } + spin_unlock(&afs_pagecopy_lock); + + return sleeppage; +} + +#if defined(INIT_WORK_HAS_DATA) +static void afs_pagecopy_worker(void *work) +#else +static void afs_pagecopy_worker(struct work_struct *work) +#endif +{ + struct afs_pagecopy_task *task = + container_of(work, struct afs_pagecopy_task, work); + struct afs_pagecopy_page *page; + + spin_lock(&task->lock); + while (!list_empty(&task->copypages)) { + page = list_entry(task->copypages.next, struct afs_pagecopy_page, + tasklink); + list_del(&page->tasklink); + spin_unlock(&task->lock); + + if (PageUptodate(page->cachepage)) { + copy_highpage(page->afspage, page->cachepage); + flush_dcache_page(page->afspage); + ClearPageError(page->afspage); + SetPageUptodate(page->afspage); + } + unlock_page(page->afspage); + page_cache_release(page->cachepage); + page_cache_release(page->afspage); + kfree(page); + + spin_lock(&task->lock); + } + spin_unlock(&task->lock); + + afs_pagecopy_put_task(task); +} + +static int afs_pagecopy_thread(void *unused) { + struct page *sleeppage; + + while (!kthread_should_stop()) { + for (;;) { + sleeppage = afs_pagecopy_checkworkload(); + if (sleeppage) { + wait_on_page_locked(sleeppage); + page_cache_release(sleeppage); + } else { + break; + } + } + wait_event_interruptible(afs_pagecopy_wq, + !list_empty(&afs_pagecopy_tasks) || kthread_should_stop()); + } + + return 0; +} + +void afs_init_pagecopy(void) { + spin_lock_init(&afs_pagecopy_lock); + INIT_LIST_HEAD(&afs_pagecopy_tasks); + + afs_pagecopy_thread_id = kthread_run(afs_pagecopy_thread, NULL, + "afs_pagecopy"); +} + +void afs_shutdown_pagecopy(void) { + if (afs_pagecopy_thread_id) + kthread_stop(afs_pagecopy_thread_id); +} + diff --git a/src/afs/LINUX/osi_pagecopy.h b/src/afs/LINUX/osi_pagecopy.h new file mode 100644 index 000000000..b645b8930 --- /dev/null +++ b/src/afs/LINUX/osi_pagecopy.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2009 Simon Wilkinson. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR `AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef AFS_LINUX_OSI_PAGECOPY_H +#define AFS_LINUX_OSI_PAGECOPY_H + +struct afs_pagecopy_task; + +struct afs_pagecopy_task *afs_pagecopy_init_task(void); + +void afs_pagecopy_queue_page(struct afs_pagecopy_task *task, + struct page *cachepage, + struct page *afspage); + +void afs_pagecopy_put_task(struct afs_pagecopy_task *task); + +void afs_init_pagecopy(void); + +void afs_shutdown_pagecopy(void); + +#endif + diff --git a/src/afs/LINUX/osi_vnodeops.c b/src/afs/LINUX/osi_vnodeops.c index 74ce8493e..5e092a82e 100644 --- a/src/afs/LINUX/osi_vnodeops.c +++ b/src/afs/LINUX/osi_vnodeops.c @@ -42,6 +42,8 @@ #include "afs/afs_bypasscache.h" #endif +#include "osi_pagecopy.h" + #ifdef pgoff2loff #define pageoff(pp) pgoff2loff((pp)->index) #else @@ -1755,7 +1757,8 @@ out: static int afs_linux_read_cache(struct file *cachefp, struct page *page, - int chunk, struct pagevec *lrupv) { + int chunk, struct pagevec *lrupv, + struct afs_pagecopy_task *task) { loff_t offset = page_offset(page); struct page *newpage, *cachepage; struct address_space *cachemapping; @@ -1804,25 +1807,34 @@ afs_linux_read_cache(struct file *cachefp, struct page *page, if (!PageUptodate(cachepage)) { ClearPageError(cachepage); code = cachemapping->a_ops->readpage(NULL, cachepage); - if (!code) { + if (!code && !task) { wait_on_page_locked(cachepage); - if (!PageUptodate(cachepage)) - code = -EIO; } } else { unlock_page(cachepage); } if (!code) { - copy_highpage(page, cachepage); - flush_dcache_page(page); - SetPageUptodate(page); + if (PageUptodate(cachepage)) { + copy_highpage(page, cachepage); + flush_dcache_page(page); + SetPageUptodate(page); + UnlockPage(page); + } else if (task) { + afs_pagecopy_queue_page(task, cachepage, page); + } else { + code = -EIO; + } + } + + if (code) { + UnlockPage(page); } - UnlockPage(page); out: if (cachepage) page_cache_release(cachepage); + return code; } @@ -1918,7 +1930,7 @@ afs_linux_readpage_fastpath(struct file *fp, struct page *pp, int *codep) cacheFp = afs_linux_raw_open(&tdc->f.inode, NULL); pagevec_init(&lrupv, 0); - code = afs_linux_read_cache(cacheFp, pp, tdc->f.chunk, &lrupv); + code = afs_linux_read_cache(cacheFp, pp, tdc->f.chunk, &lrupv, NULL); if (pagevec_count(&lrupv)) __pagevec_lru_add_file(&lrupv); @@ -2119,6 +2131,7 @@ afs_linux_readpages(struct file *fp, struct address_space *mapping, unsigned int page_idx; loff_t offset; struct pagevec lrupv; + struct afs_pagecopy_task *task; #if defined(AFS_CACHE_BYPASS) bypasscache = afs_linux_can_bypass(ip); @@ -2140,6 +2153,8 @@ afs_linux_readpages(struct file *fp, struct address_space *mapping, ObtainWriteLock(&avc->lock, 912); AFS_GUNLOCK(); + task = afs_pagecopy_init_task(); + tdc = NULL; pagevec_init(&lrupv, 0); for (page_idx = 0; page_idx < num_pages; page_idx++) { @@ -2179,7 +2194,7 @@ afs_linux_readpages(struct file *fp, struct address_space *mapping, if (!pagevec_add(&lrupv, page)) __pagevec_lru_add_file(&lrupv); - afs_linux_read_cache(cacheFp, page, tdc->f.chunk, &lrupv); + afs_linux_read_cache(cacheFp, page, tdc->f.chunk, &lrupv, task); } page_cache_release(page); } @@ -2189,6 +2204,8 @@ afs_linux_readpages(struct file *fp, struct address_space *mapping, if (tdc) filp_close(cacheFp, NULL); + afs_pagecopy_put_task(task); + AFS_GLOCK(); if (tdc) { ReleaseReadLock(&tdc->lock); diff --git a/src/libafs/Makefile.common.in b/src/libafs/Makefile.common.in index 4ffbb56ce..6c758b744 100644 --- a/src/libafs/Makefile.common.in +++ b/src/libafs/Makefile.common.in @@ -515,8 +515,8 @@ osi_config.o: $(TOP_SRCDIR)/afs/$(MKAFS_OSTYPE)/osi_config.c $(CRULE_NOOPT) osi_timeout.o: $(TOP_SRCDIR)/afs/$(MKAFS_OSTYPE)/osi_timeout.c $(CRULE_NOOPT) - - +osi_pagecopy.o: $(TOP_SRCDIR)/afs/$(MKAFS_OSTYPE)/osi_pagecopy.c + $(CRULE_NOOPT) clean: -$(RM) -rf STATIC* MODLOAD* $(AFS_OS_CLEAN) diff --git a/src/libafs/MakefileProto.LINUX.in b/src/libafs/MakefileProto.LINUX.in index 474b09198..c2fb1f658 100644 --- a/src/libafs/MakefileProto.LINUX.in +++ b/src/libafs/MakefileProto.LINUX.in @@ -37,7 +37,8 @@ AFS_OS_OBJS = \ osi_ioctl.o \ osi_proc.o \ - osi_vnodeops.o + osi_vnodeops.o \ + osi_pagecopy.o AFS_OS_PAGOBJS = \ osi_alloc.o \ -- 2.39.5