From 11232f19f0ffd6dd88a1b498e792b3d5d3804286 Mon Sep 17 00:00:00 2001 From: Andrew Deason Date: Mon, 8 Jul 2019 14:49:23 -0500 Subject: [PATCH] afs: Avoid panics in afs_InvalidateAllSegments Currently, afs_InvalidateAllSegments panics when afs_GetValidDSlot fails. We panic in these cases because afs_InvalidateAllSegments cannot simply return an error to its callers; we must invalidate all segments for the given vcache, or we risk serving incorrect data to userspace as explained in the comments. Instead of panicing, though, we could simply sleep and retry the operation until it succeeds. Implement this, retrying every 10 seconds, and logging a message every hour that we're stuck (in case we're stuck for a long time). When we retry the operation, do so in a background request, to avoid a somewhat common situation on Linux where we always get I/O errors from the cache when the calling process has a SIGKILL pending. Create a new background op for this, BOP_INVALIDATE_SEGMENTS. With this, the relevant vcache will be effectively unusable for the entire time we're stuck in this situation (avc->lock will be write-locked), but this is at least better than panicing the whole machine. Reviewed-on: https://gerrit.openafs.org/13677 Reviewed-by: Benjamin Kaduk Reviewed-by: Michael Meffie Reviewed-by: Cheyenne Wills Tested-by: BuildBot (cherry picked from commit 3be5880d1d2a0aef6600047ed43d602949cd5f4d) Change-Id: Iba1cde70a4d5e919fedfe27d0540878113a369e4 Reviewed-on: https://gerrit.openafs.org/13847 Tested-by: Andrew Deason Reviewed-by: Michael Meffie Reviewed-by: Cheyenne Wills Reviewed-by: Marcio Brito Barbosa Reviewed-by: Mark Vitale Reviewed-by: Stephan Wiesand --- src/afs/afs.h | 1 + src/afs/afs_daemons.c | 22 ++++++ src/afs/afs_prototypes.h | 3 +- src/afs/afs_segments.c | 143 ++++++++++++++++++++++++++++++--------- 4 files changed, 135 insertions(+), 34 deletions(-) diff --git a/src/afs/afs.h b/src/afs/afs.h index e242e89be..3b47b6e38 100644 --- a/src/afs/afs.h +++ b/src/afs/afs.h @@ -148,6 +148,7 @@ struct sysname_info { #define BOP_MOVE 5 /* ptr1 afs_uspc_param ptr2 sname ptr3 dname */ #endif #define BOP_PARTIAL_STORE 6 /* parm1 is chunk to store */ +#define BOP_INVALIDATE_SEGMENTS 7 /* no parms: just uses the 'bp->vc' vcache */ #define B_DONTWAIT 1 /* On failure return; don't wait */ diff --git a/src/afs/afs_daemons.c b/src/afs/afs_daemons.c index 3d42b3c8d..732e73df2 100644 --- a/src/afs/afs_daemons.c +++ b/src/afs/afs_daemons.c @@ -598,6 +598,26 @@ BPartialStore(struct brequest *ab) afs_DestroyReq(treq); } +static void +BInvalidateSegments(struct brequest *ab) +{ + int code; + struct vcache *tvc = ab->vc; + osi_Assert(WriteLocked(&tvc->lock)); + + code = afs_InvalidateAllSegments_once(tvc); + + /* Set return code, and wakeup anyone waiting. */ + if ((ab->flags & BUVALID) == 0) { + ab->code_raw = ab->code_checkcode = code; + ab->flags |= BUVALID; + if ((ab->flags & BUWAIT)) { + ab->flags &= ~BUWAIT; + afs_osi_Wakeup(ab); + } + } +} + /* release a held request buffer */ void afs_BRelease(struct brequest *ab) @@ -1095,6 +1115,8 @@ afs_BackgroundDaemon(void) #endif else if (tb->opcode == BOP_PARTIAL_STORE) BPartialStore(tb); + else if (tb->opcode == BOP_INVALIDATE_SEGMENTS) + BInvalidateSegments(tb); else panic("background bop"); brequest_release(tb); diff --git a/src/afs/afs_prototypes.h b/src/afs/afs_prototypes.h index 5c7dc5aff..934694943 100644 --- a/src/afs/afs_prototypes.h +++ b/src/afs/afs_prototypes.h @@ -834,7 +834,8 @@ extern int HandleIoctl(struct vcache *avc, afs_int32 acom, /* afs_segments.c */ extern int afs_StoreAllSegments(struct vcache *avc, struct vrequest *areq, int sync); -extern int afs_InvalidateAllSegments(struct vcache *avc); +extern void afs_InvalidateAllSegments(struct vcache *avc); +extern int afs_InvalidateAllSegments_once(struct vcache *avc); extern int afs_ExtendSegments(struct vcache *avc, afs_size_t alen, struct vrequest *areq); extern int afs_TruncateAllSegments(struct vcache *avc, diff --git a/src/afs/afs_segments.c b/src/afs/afs_segments.c index fc1903436..f720cb4f1 100644 --- a/src/afs/afs_segments.c +++ b/src/afs/afs_segments.c @@ -496,28 +496,13 @@ afs_StoreAllSegments(struct vcache *avc, struct vrequest *areq, } /*afs_StoreAllSegments (new 03/02/94) */ - -/* - * afs_InvalidateAllSegments - * - * Description: - * Invalidates all chunks for a given file - * - * Parameters: - * avc : Pointer to vcache entry. - * - * Environment: - * For example, called after an error has been detected. Called - * with avc write-locked, and afs_xdcache unheld. - */ - int -afs_InvalidateAllSegments(struct vcache *avc) +afs_InvalidateAllSegments_once(struct vcache *avc) { struct dcache *tdc; afs_int32 hash; afs_int32 index; - struct dcache **dcList; + struct dcache **dcList = NULL; int i, dcListMax, dcListCount; AFS_STATCNT(afs_InvalidateAllSegments); @@ -543,17 +528,7 @@ afs_InvalidateAllSegments(struct vcache *avc) if (afs_indexUnique[index] == avc->f.fid.Fid.Unique) { tdc = afs_GetValidDSlot(index); if (!tdc) { - /* In the case of fatal errors during stores, we MUST - * invalidate all of the relevant chunks. Otherwise, the chunks - * will be left with the 'new' data that was never successfully - * written to the server, but the DV in the dcache is still the - * old DV. So, we may indefinitely serve data to applications - * that is not actually in the file on the fileserver. If we - * cannot afs_GetValidDSlot the appropriate entries, currently - * there is no way to ensure the dcache is invalidated. So for - * now, to avoid risking serving bad data from the cache, panic - * instead. */ - osi_Panic("afs_InvalidateAllSegments tdc count"); + goto error; } ReleaseReadLock(&tdc->tlock); if (!FidCmp(&tdc->f.fid, &avc->f.fid)) @@ -570,11 +545,7 @@ afs_InvalidateAllSegments(struct vcache *avc) if (afs_indexUnique[index] == avc->f.fid.Fid.Unique) { tdc = afs_GetValidDSlot(index); if (!tdc) { - /* We cannot proceed after getting this error; we risk serving - * incorrect data to applications. So panic instead. See the - * above comment next to the previous afs_GetValidDSlot call - * for details. */ - osi_Panic("afs_InvalidateAllSegments tdc store"); + goto error; } ReleaseReadLock(&tdc->tlock); if (!FidCmp(&tdc->f.fid, &avc->f.fid)) { @@ -611,6 +582,112 @@ afs_InvalidateAllSegments(struct vcache *avc) osi_Free(dcList, dcListMax * sizeof(struct dcache *)); return 0; + + error: + ReleaseWriteLock(&afs_xdcache); + + if (dcList) { + for (i = 0; i < dcListCount; i++) { + tdc = dcList[i]; + if (tdc) { + afs_PutDCache(tdc); + } + } + osi_Free(dcList, dcListMax * sizeof(struct dcache *)); + } + return EIO; +} + + +/* + * afs_InvalidateAllSegments + * + * Description: + * Invalidates all chunks for a given file + * + * Parameters: + * avc : Pointer to vcache entry. + * + * Environment: + * For example, called after an error has been detected. Called + * with avc write-locked, and afs_xdcache unheld. + */ + +void +afs_InvalidateAllSegments(struct vcache *avc) +{ + int code; + afs_uint32 last_warn; + + code = afs_InvalidateAllSegments_once(avc); + if (code == 0) { + /* Success; nothing more to do. */ + return; + } + + /* + * If afs_InvalidateAllSegments_once failed, we cannot simply return an + * error to our caller. This function is called when we encounter a fatal + * error during stores, in which case we MUST invalidate all chunks for the + * given file. If we fail to invalidate some chunks, they will be left with + * the 'new' dirty/written data that was never successfully stored on the + * server, but the DV in the dcache is still the old DV. So, if its left + * alone, we may indefinitely serve data to applications that is not + * actually in the file on the fileserver. + * + * So to make sure we never serve userspace bad data after such a failure, + * we must keep trying to invalidate the dcaches for the given file. (Note + * that we cannot simply set a flag on the vcache to retry the invalidate + * later on, because the vcache may go away, but the 'bad' dcaches could + * remain.) We do this below, via background daemon requests because in + * some scenarios we can always get I/O errors on accessing the cache if we + * access via a user pid. (e.g. on LINUX, this can happen if the pid has a + * pending SIGKILL.) Doing this via background daemon ops should avoid + * that. + */ + + last_warn = osi_Time(); + afs_warn("afs: Failed to invalidate cache chunks for fid %d.%d.%d.%d; our " + "local disk cache may be throwing errors. We must invalidate " + "these chunks to avoid possibly serving incorrect data, so we'll " + "retry until we succeed. If AFS access seems to hang, this may " + "be why.\n", + avc->f.fid.Cell, avc->f.fid.Fid.Volume, avc->f.fid.Fid.Vnode, + avc->f.fid.Fid.Unique); + + do { + static const afs_uint32 warn_int = 60*60; /* warn once every hour */ + afs_uint32 now = osi_Time(); + struct brequest *bp; + + if (now < last_warn || now - last_warn > warn_int) { + last_warn = now; + afs_warn("afs: Still trying to invalidate cache chunks for fid " + "%d.%d.%d.%d. We will retry until we succeed; if AFS " + "access seems to hang, this may be why.\n", + avc->f.fid.Cell, avc->f.fid.Fid.Volume, + avc->f.fid.Fid.Vnode, avc->f.fid.Fid.Unique); + } + + /* Wait 10 seconds between attempts. */ + afs_osi_Wait(1000 * 10, NULL, 0); + + /* + * Ask a background daemon to do this request for us. Note that _we_ hold + * the write lock on 'avc', while the background daemon does the work. This + * is a little weird, but it helps avoid any issues with lock ordering + * or if our caller does not expect avc->lock to be dropped while + * running. + */ + bp = afs_BQueue(BOP_INVALIDATE_SEGMENTS, avc, 0, 1, NULL, 0, 0, NULL, + NULL, NULL); + while ((bp->flags & BUVALID) == 0) { + bp->flags |= BUWAIT; + afs_osi_Sleep(bp); + } + code = bp->code_raw; + afs_BRelease(bp); + } while (code); } /*! -- 2.39.5