From 31b32e2e2f3d09a8ca5cd91a6f48be9ab72efe10 Mon Sep 17 00:00:00 2001 From: Andrew Deason Date: Tue, 12 Oct 2010 17:46:36 -0500 Subject: [PATCH] viced: Add options for interrupting clients Add the -offline-timeout and -offline-shutdown-timeout options to the fileserver, to implement interrupting clients accessing volumes we are trying to take the volume offline. Document the new options. Currently this is only implemented for read operations. Implementing this for write operations and callback breaks will require more work. This also removes the VGetVolumeTimed interface from the volume package, since the fileserver was the only user and with this change the fileserver now uses the VGetVolumeWithCall interface. Reviewed-on: http://gerrit.openafs.org/2984 Tested-by: BuildBot Reviewed-by: Derrick Brashear (cherry picked from commit 4f7a51aa5115a9073771bc396e25d510aeae71e8) Change-Id: I34967bb03a09e5655b02c9b7b39272cfe8f71d7b Reviewed-on: http://gerrit.openafs.org/10799 Reviewed-by: D Brashear Reviewed-by: Andrew Deason Tested-by: BuildBot Reviewed-by: Stephan Wiesand --- .../pod8/fragments/dafileserver-synopsis.pod | 2 + .../pod8/fragments/fileserver-options.pod | 41 +++++++++++ .../pod8/fragments/fileserver-synopsis.pod | 2 + src/viced/afsfileprocs.c | 68 +++++++++++++++---- src/viced/viced.c | 55 +++++++++++++++ src/vol/volume.c | 14 ---- src/vol/volume.h | 2 - 7 files changed, 154 insertions(+), 30 deletions(-) diff --git a/doc/man-pages/pod8/fragments/dafileserver-synopsis.pod b/doc/man-pages/pod8/fragments/dafileserver-synopsis.pod index 24697a610..f1f633c47 100644 --- a/doc/man-pages/pod8/fragments/dafileserver-synopsis.pod +++ b/doc/man-pages/pod8/fragments/dafileserver-synopsis.pod @@ -54,3 +54,5 @@ B S<<< [B<-vlruinterval> >] >>> S<<< [B<-vlrumax> >] >>> S<<< [B<-unsafe-nosalvage>] >>> + S<<< [B<-offline-timeout> >] >>> + S<<< [B<-offline-shutdown-timeout> >] >>> diff --git a/doc/man-pages/pod8/fragments/fileserver-options.pod b/doc/man-pages/pod8/fragments/fileserver-options.pod index 813e825cd..4b779ab1c 100644 --- a/doc/man-pages/pod8/fragments/fileserver-options.pod +++ b/doc/man-pages/pod8/fragments/fileserver-options.pod @@ -402,3 +402,44 @@ other option, whereas others argue that choosing anything besides the C option allows for an unacceptable risk of data loss. This may depend on your usage patterns, your hardware, your platform and filesystem, and who you talk to about this topic. + +=item B<-offline-timeout> > + +Setting this option to I means that if any clients are reading from a +volume when we want to offline that volume (for example, as part of +releasing a volume), we will wait I seconds for the clients' request +to finish. If the clients' requests have not finished, we will then +interrupt the client requests and send an error to those clients, +allowing the volume to go offline. + +If a client is interrupted, from the client's point of view, it will +appear as if they had accessed the volume after it had gone offline. For +RO volumes, this mean the client should fail-over to other valid RO +sites for that volume. This option may speed up volume releases if +volumes are being accessed by clients that have slow or unreliable +network connections. + +Setting this option to C<0> means to interrupt clients immediately if a +volume is waiting to go offline. Setting this option to C<-1> means to +wait forever for client requests to finish. The default value is C<-1>. + +For the LWP fileserver, the only valid value for this option is C<-1>. + +=item B<-offline-shutdown-timeout> > + +This option behaves similarly to B<-offline-timeout> but applies to +volumes that are going offline as part of the fileserver shutdown +process. If the value specified is I, we will interrupt any clients +reading from volumes after I seconds have passed since we first +needed to wait for a volume to offline during the shutdown process. + +Setting this option to C<0> means to interrupt all clients reading from +volumes immediately during the shutdown process. Setting this option to +C<-1> means to wait forever for client requests to finish during the +shutdown process. + +If B<-offline-timeout> is specified, the default value of +B<-offline-shutdown-timeout> is the value specified for +B<-offline-timeout>. Otherwise, the default value is C<-1>. + +For the LWP fileserver, the only valid value for this option is C<-1>. diff --git a/doc/man-pages/pod8/fragments/fileserver-synopsis.pod b/doc/man-pages/pod8/fragments/fileserver-synopsis.pod index ba20687a5..396987e8e 100644 --- a/doc/man-pages/pod8/fragments/fileserver-synopsis.pod +++ b/doc/man-pages/pod8/fragments/fileserver-synopsis.pod @@ -46,3 +46,5 @@ B S<<< [B<-m> >] >>> S<<< [B<-lock>] >>> S<<< [B<-sync> >] >>> + S<<< [B<-offline-timeout> >] >>> + S<<< [B<-offline-shutdown-timeout> >] >>> diff --git a/src/viced/afsfileprocs.c b/src/viced/afsfileprocs.c index 46b2a10a4..c0a2e8cf0 100644 --- a/src/viced/afsfileprocs.c +++ b/src/viced/afsfileprocs.c @@ -545,7 +545,8 @@ CallPostamble(struct rx_connection *aconn, afs_int32 ret, * are incremented and they must be eventualy released. */ static afs_int32 -CheckVnode(AFSFid * fid, Volume ** volptr, Vnode ** vptr, int lock) +CheckVnodeWithCall(AFSFid * fid, Volume ** volptr, struct VCallByVol *cbv, + Vnode ** vptr, int lock) { Error fileCode = 0; Error local_errorCode, errorCode = -1; @@ -572,7 +573,8 @@ CheckVnode(AFSFid * fid, Volume ** volptr, Vnode ** vptr, int lock) #endif errorCode = 0; - *volptr = VGetVolumeTimed(&local_errorCode, &errorCode, (afs_int32) fid->Volume, ts); + *volptr = VGetVolumeWithCall(&local_errorCode, &errorCode, + fid->Volume, ts, cbv); if (!errorCode) { osi_Assert(*volptr); break; @@ -682,6 +684,12 @@ CheckVnode(AFSFid * fid, Volume ** volptr, Vnode ** vptr, int lock) return (0); } /*CheckVnode */ +static_inline afs_int32 +CheckVnode(AFSFid * fid, Volume ** volptr, Vnode ** vptr, int lock) +{ + return CheckVnodeWithCall(fid, volptr, NULL, vptr, lock); +} + /* * This routine returns the ACL associated with the targetptr. If the * targetptr isn't a directory, we access its parent dir and get the ACL @@ -858,10 +866,11 @@ VanillaUser(struct client *client) * after completing disk I/O. *------------------------------------------------------------------------*/ static afs_int32 -GetVolumePackage(struct rx_call *acall, AFSFid * Fid, Volume ** volptr, - Vnode ** targetptr, int chkforDir, Vnode ** parent, - struct client **client, int locktype, afs_int32 * rights, - afs_int32 * anyrights) +GetVolumePackageWithCall(struct rx_call *acall, struct VCallByVol *cbv, + AFSFid * Fid, Volume ** volptr, + Vnode ** targetptr, int chkforDir, Vnode ** parent, + struct client **client, int locktype, afs_int32 * rights, + afs_int32 * anyrights) { struct acl_accessList *aCL = NULL; /* Internal access List */ int aCLSize; /* size of the access list */ @@ -870,7 +879,7 @@ GetVolumePackage(struct rx_call *acall, AFSFid * Fid, Volume ** volptr, rx_KeepAliveOff(acall); - if ((errorCode = CheckVnode(Fid, volptr, targetptr, locktype))) + if ((errorCode = CheckVnodeWithCall(Fid, volptr, cbv, targetptr, locktype))) goto gvpdone; if (chkforDir) { @@ -921,6 +930,17 @@ gvpdone: } /*GetVolumePackage */ +static_inline afs_int32 +GetVolumePackage(struct rx_call *acall, AFSFid * Fid, Volume ** volptr, + Vnode ** targetptr, int chkforDir, Vnode ** parent, + struct client **client, int locktype, afs_int32 * rights, + afs_int32 * anyrights) +{ + return GetVolumePackageWithCall(acall, NULL, Fid, volptr, targetptr, + chkforDir, parent, client, locktype, + rights, anyrights); +} + /*------------------------------------------------------------------------ * PutVolumePackage * @@ -948,9 +968,9 @@ gvpdone: * Enables keepalives on the call. *------------------------------------------------------------------------*/ static void -PutVolumePackage(struct rx_call *acall, Vnode * parentwhentargetnotdir, - Vnode * targetptr, Vnode * parentptr, Volume * volptr, - struct client **client) +PutVolumePackageWithCall(struct rx_call *acall, Vnode * parentwhentargetnotdir, + Vnode * targetptr, Vnode * parentptr, Volume * volptr, + struct client **client, struct VCallByVol *cbv) { Error fileCode = 0; /* Error code returned by the volume package */ @@ -968,7 +988,7 @@ PutVolumePackage(struct rx_call *acall, Vnode * parentwhentargetnotdir, osi_Assert(!fileCode || (fileCode == VSALVAGE)); } if (volptr) { - VPutVolume(volptr); + VPutVolumeWithCall(volptr, cbv); } rx_KeepAliveOn(acall); @@ -977,6 +997,15 @@ PutVolumePackage(struct rx_call *acall, Vnode * parentwhentargetnotdir, } } /*PutVolumePackage */ +static_inline void +PutVolumePackage(struct rx_call *acall, Vnode * parentwhentargetnotdir, + Vnode * targetptr, Vnode * parentptr, Volume * volptr, + struct client **client) +{ + PutVolumePackageWithCall(acall, parentwhentargetnotdir, targetptr, + parentptr, volptr, client, NULL); +} + static int VolumeOwner(struct client *client, Vnode * targetptr) { @@ -2290,6 +2319,7 @@ common_FetchData64(struct rx_call *acall, struct AFSFid *Fid, afs_int32 rights, anyrights; /* rights for this and any user */ struct client *t_client = NULL; /* tmp ptr to client data */ struct in_addr logHostAddr; /* host ip holder for inet_ntoa */ + struct VCallByVol tcbv, *cbv = NULL; #if FS_STATS_DETAILED struct fs_stats_opTimingData *opP; /* Ptr to this op's timing struct */ struct fs_stats_xferData *xferP; /* Ptr to this op's byte size struct */ @@ -2329,12 +2359,17 @@ common_FetchData64(struct rx_call *acall, struct AFSFid *Fid, ("SRXAFS_FetchData, Fid = %u.%u.%u, Host %s:%d, Id %d\n", Fid->Volume, Fid->Vnode, Fid->Unique, inet_ntoa(logHostAddr), ntohs(rxr_PortOf(tcon)), t_client->ViceId)); + + queue_NodeInit(&tcbv); + tcbv.call = acall; + cbv = &tcbv; + /* * Get volume/vnode for the fetched file; caller's access rights to * it are also returned */ if ((errorCode = - GetVolumePackage(acall, Fid, &volptr, &targetptr, DONTCHECK, + GetVolumePackageWithCall(acall, cbv, Fid, &volptr, &targetptr, DONTCHECK, &parentwhentargetnotdir, &client, READ_LOCK, &rights, &anyrights))) goto Bad_FetchData; @@ -2474,8 +2509,8 @@ common_FetchData64(struct rx_call *acall, struct AFSFid *Fid, Bad_FetchData: /* Update and store volume/vnode and parent vnodes back */ - (void)PutVolumePackage(acall, parentwhentargetnotdir, targetptr, - (Vnode *) 0, volptr, &client); + (void)PutVolumePackageWithCall(acall, parentwhentargetnotdir, targetptr, + (Vnode *) 0, volptr, &client, cbv); ViceLog(2, ("SRXAFS_FetchData returns %d\n", errorCode)); errorCode = CallPostamble(tcon, errorCode, thost); @@ -7371,10 +7406,15 @@ FetchData_RXStyle(Volume * volptr, Vnode * targetptr, (*a_bytesFetchedP) += nBytes; #endif /* FS_STATS_DETAILED */ if (nBytes != wlen) { + afs_int32 err; FDH_CLOSE(fdP); #ifndef HAVE_PIOV FreeSendBuffer((struct afs_buffer *)tbuffer); #endif /* HAVE_PIOV */ + err = VIsGoingOffline(volptr); + if (err) { + return err; + } return -31; } Len -= wlen; diff --git a/src/viced/viced.c b/src/viced/viced.c index 13d5d5187..a7e022101 100644 --- a/src/viced/viced.c +++ b/src/viced/viced.c @@ -199,6 +199,8 @@ int udpBufSize = 0; /* UDP buffer size for receive */ int sendBufSize = 16384; /* send buffer size */ int saneacls = 0; /* Sane ACLs Flag */ static int unsafe_attach = 0; /* avoid inUse check on vol attach? */ +static int offline_timeout = -1; /* -offline-timeout option */ +static int offline_shutdown_timeout = -1; /* -offline-shutdown-timeout option */ struct timeval tp; @@ -966,6 +968,8 @@ FlagMsg(void) fputs("[-nojumbo (disable jumbogram network packets - deprecated)] ", stdout); fputs("[-jumbo (enable jumbogram network packets)] ", stdout); fputs("[-sync ]", stdout); + fputs("[-offline-timeout ]", stdout); + fputs("[-offline-shutdown-timeout ]", stdout); /* fputs("[-enable_peer_stats] ", stdout); */ /* fputs("[-enable_process_stats] ", stdout); */ fputs("[-help]\n", stdout); @@ -1448,6 +1452,45 @@ ParseArgs(int argc, char *argv[]) return -1; } } + else if (strcmp(argv[i], "-offline-timeout") == 0) { + if (i + 1 >= argc) { + printf("You have to specify -offline-timeout \n"); + return -1; + } + offline_timeout = atoi(argv[++i]); +#ifndef AFS_PTHREAD_ENV + if (offline_timeout != -1) { + printf("The only valid -offline-timeout value for the LWP " + "fileserver is -1\n"); + return -1; + } +#endif /* AFS_PTHREAD_ENV */ + if (offline_timeout < -1) { + printf("Invalid -offline-timeout value %s; the only valid " + "negative value is -1\n", argv[i]); + return -1; + } + } + else if (strcmp(argv[i], "-offline-shutdown-timeout") == 0) { + if (i + 1 >= argc) { + printf("You have to specify -offline-shutdown-timeout " + "\n"); + return -1; + } + offline_shutdown_timeout = atoi(argv[++i]); +#ifndef AFS_PTHREAD_ENV + if (offline_shutdown_timeout != -1) { + printf("The only valid -offline-shutdown-timeout value for the " + "LWP fileserver is -1\n"); + return -1; + } +#endif /* AFS_PTHREAD_ENV */ + if (offline_shutdown_timeout < -1) { + printf("Invalid -offline-timeout value %s; the only valid " + "negative value is -1\n", argv[i]); + return -1; + } + } else { return (-1); } @@ -2282,6 +2325,18 @@ main(int argc, char *argv[]) opts.nSmallVnodes = nSmallVns; opts.volcache = volcache; opts.unsafe_attach = unsafe_attach; + if (offline_timeout != -1) { + opts.interrupt_rxcall = rx_InterruptCall; + opts.offline_timeout = offline_timeout; + } + if (offline_shutdown_timeout == -1) { + /* default to -offline-timeout, if shutdown-specific timeout is not + * specified */ + opts.offline_shutdown_timeout = offline_timeout; + } else { + opts.interrupt_rxcall = rx_InterruptCall; + opts.offline_shutdown_timeout = offline_shutdown_timeout; + } if (VInitVolumePackage2(fileServer, &opts)) { ViceLog(0, diff --git a/src/vol/volume.c b/src/vol/volume.c index 49602730c..700c43571 100644 --- a/src/vol/volume.c +++ b/src/vol/volume.c @@ -3975,20 +3975,6 @@ VGetVolume(Error * ec, Error * client_ec, VolId volumeId) return retVal; } -/* same as VGetVolume, but if a volume is waiting to go offline, we only wait - * until time ts. If we have waited longer than that, we return that it is - * actually offline, instead of waiting for it to go offline */ -Volume * -VGetVolumeTimed(Error * ec, Error * client_ec, VolId volumeId, - const struct timespec *ts) -{ - Volume *retVal; - VOL_LOCK; - retVal = GetVolume(ec, client_ec, volumeId, NULL, ts); - VOL_UNLOCK; - return retVal; -} - /** * Get a volume reference associated with an RX call. * diff --git a/src/vol/volume.h b/src/vol/volume.h index b30ca3d3e..f9ae8af94 100644 --- a/src/vol/volume.h +++ b/src/vol/volume.h @@ -804,8 +804,6 @@ struct volHeader { extern char *VSalvageMessage; /* Canonical message when a volume is forced * offline */ extern Volume *VGetVolume(Error * ec, Error * client_ec, VolId volumeId); -extern Volume *VGetVolumeTimed(Error * ec, Error * client_ec, VolId volumeId, - const struct timespec *ts); extern Volume *VGetVolumeWithCall(Error * ec, Error * client_ec, VolId volumeId, const struct timespec *ts, struct VCallByVol *cbv); extern Volume *VGetVolume_r(Error * ec, VolId volumeId); -- 2.39.5