From: Andrew Deason Date: Wed, 29 Jul 2009 15:56:34 +0000 (-0400) Subject: DAFS: avoid shutdown hang during salvage X-Git-Tag: openafs-devel-1_5_63~55 X-Git-Url: https://git.michaelhowe.org/gitweb/?a=commitdiff_plain;h=64bad257d66b7d70ebdb62a10217818730e2e0a6;p=packages%2Fo%2Fopenafs.git DAFS: avoid shutdown hang during salvage Right now, when the fileserver shuts down, it tries to cancel any demand-salvages in progress. This causes the fileserver to hang, since during shutdown, the salvageserver may already be gone. This avoids cancelling in-progress salvages, and allows salvages to continue after the fileserver has shut down. To do this, the salvageserver now closes SALVSYNC sockets in spawned children, so the children don't get SALVSYNC requests directed at them, since they can't handle them. The salvageserver also now periodically scans for logs of salvaging children that have terminated, and incorporates them into SalsrvLog, just like it does for it's own children. The salvageserver children that actually perform salvages now also set the inUse volume header field to their programType, so the fileserver knows not to schedule new salvages for that volume in the event of a fileserver restart during a salvage. FIXES 124486 Reviewed-on: http://gerrit.openafs.org/279 Reviewed-by: Derrick Brashear Tested-by: Derrick Brashear --- diff --git a/src/vol/salvaged.c b/src/vol/salvaged.c index 6af6013a3..c382200a0 100644 --- a/src/vol/salvaged.c +++ b/src/vol/salvaged.c @@ -165,6 +165,9 @@ static int Reap_Child(char * prog, int * pid, int * status); static void * SalvageLogCleanupThread(void *); static int SalvageLogCleanup(int pid); +static void * SalvageLogScanningThread(void *); +static void ScanLogs(struct rx_queue *log_watch_queue); + struct log_cleanup_node { struct rx_queue q; int pid; @@ -524,6 +527,10 @@ SalvageServer(void) &attrs, &SalvageLogCleanupThread, NULL) == 0); + assert(pthread_create(&tid, + &attrs, + &SalvageLogScanningThread, + NULL) == 0); /* loop forever serving requests */ while (1) { @@ -745,3 +752,115 @@ SalvageLogCleanup(int pid) return 0; } + +/* wake up every five minutes to see if a non-child salvage has finished */ +#define SALVAGE_SCAN_POLL_INTERVAL 300 + +/** + * Thread to look for SalvageLog.$pid files that are not from our child + * worker salvagers, and notify SalvageLogCleanupThread to clean them + * up. This can happen if we restart during salvages, or the + * salvageserver crashes or something. + * + * @param arg unused + * + * @return always NULL + */ +static void * +SalvageLogScanningThread(void * arg) +{ + struct rx_queue log_watch_queue; + struct log_cleanup_node * cleanup; + + queue_Init(&log_watch_queue); + + { + DIR *dp; + struct dirent *dirp; + char prefix[AFSDIR_PATH_MAX]; + size_t prefix_len; + + afs_snprintf(prefix, sizeof(prefix), "%s.", AFSDIR_SLVGLOG_FILE); + prefix_len = strlen(prefix); + + dp = opendir(AFSDIR_LOGS_DIR); + assert(dp); + + while ((dirp = readdir(dp)) != NULL) { + pid_t pid; + struct log_cleanup_node *cleanup; + int i; + + if (strncmp(dirp->d_name, prefix, prefix_len) != 0) { + /* not a salvage logfile; skip */ + continue; + } + + errno = 0; + pid = strtol(dirp->d_name + prefix_len, NULL, 10); + + if (errno != 0) { + /* file is SalvageLog. but isn't + * a pid, so skip */ + continue; + } + + VOL_LOCK; + for (i = 0; i < Parallel; ++i) { + if (pid == child_slot[i]) { + break; + } + } + VOL_UNLOCK; + if (i < Parallel) { + /* this pid is one of our children, so the reaper thread + * will take care of it; skip */ + continue; + } + + cleanup = + (struct log_cleanup_node *) malloc(sizeof(struct log_cleanup_node)); + cleanup->pid = pid; + + queue_Append(&log_watch_queue, cleanup); + } + + closedir(dp); + } + + ScanLogs(&log_watch_queue); + + while (queue_IsNotEmpty(&log_watch_queue)) { + sleep(SALVAGE_SCAN_POLL_INTERVAL); + ScanLogs(&log_watch_queue); + } + + return NULL; +} + +/** + * look through log_watch_queue, and if any processes are not still + * running, hand them off to the SalvageLogCleanupThread + * + * @param log_watch_queue a queue of PIDs that we should clean up if + * that PID has died + */ +static void +ScanLogs(struct rx_queue *log_watch_queue) +{ + struct log_cleanup_node *cleanup, *next; + + assert(pthread_mutex_lock(&worker_lock) == 0); + + for (queue_Scan(log_watch_queue, cleanup, next, log_cleanup_node)) { + /* if a process is still running, assume it's the salvage process + * still going, and keep waiting for it */ + if (kill(cleanup->pid, 0) < 0 && errno == ESRCH) { + queue_Remove(cleanup); + queue_Append(&log_cleanup_queue, cleanup); + assert(pthread_cond_signal(&log_cleanup_queue.queue_change_cv) == 0); + } + } + + assert(pthread_mutex_unlock(&worker_lock) == 0); +} diff --git a/src/vol/salvsync-server.c b/src/vol/salvsync-server.c index bdc8bd0ff..6eb46af67 100644 --- a/src/vol/salvsync-server.c +++ b/src/vol/salvsync-server.c @@ -189,6 +189,9 @@ static struct QueueHead pendingQueue; /* volumes being salvaged */ */ static int partition_salvaging[VOLMAXPARTS+1]; +static int HandlerFD[MAXHANDLERS]; +static void (*HandlerProc[MAXHANDLERS]) (int); + #define VSHASH_SIZE 64 #define VSHASH_MASK (VSHASH_SIZE-1) #define VSHASH(vid) ((vid)&VSHASH_MASK) @@ -292,6 +295,21 @@ SALVSYNC_salvInit(void) assert(pthread_create(&tid, &tattr, SALVSYNC_syncThread, NULL) == 0); } +static void +CleanFDs(void) +{ + int i; + for (i = 0; i < MAXHANDLERS; ++i) { + if (HandlerFD[i] >= 0) { + SALVSYNC_Drop(HandlerFD[i]); + } + } + + /* just in case we were in AcceptOff mode, and thus this fd wouldn't + * have a handler */ + close(salvsync_server_state.fd); + salvsync_server_state.fd = -1; +} static fd_set SALVSYNC_readfds; @@ -304,6 +322,11 @@ SALVSYNC_syncThread(void * args) int tid; SYNC_server_state_t * state = &salvsync_server_state; + /* when we fork, the child needs to close the salvsync server sockets, + * otherwise, it may get salvsync requests, instead of the parent + * salvageserver */ + assert(pthread_atfork(NULL, NULL, CleanFDs) == 0); + SYNC_getAddr(&state->endpoint, &state->addr); SYNC_cleanupSock(state); @@ -362,6 +385,12 @@ SALVSYNC_com(osi_socket fd) SALVSYNC_command scom; SALVSYNC_response sres; SYNC_PROTO_BUF_DECL(buf); + + memset(&com, 0, sizeof(com)); + memset(&res, 0, sizeof(res)); + memset(&scom, 0, sizeof(scom)); + memset(&sres, 0, sizeof(sres)); + memset(&sres_hdr, 0, sizeof(sres)); com.payload.buf = (void *)buf; com.payload.len = SYNC_PROTO_MAX_LEN; @@ -758,9 +787,6 @@ AcceptOff(void) /* The multiple FD handling code. */ -static int HandlerFD[MAXHANDLERS]; -static void (*HandlerProc[MAXHANDLERS]) (int); - static void InitHandler(void) { diff --git a/src/vol/vol-salvage.c b/src/vol/vol-salvage.c index 2f5866c7c..51bdd158f 100644 --- a/src/vol/vol-salvage.c +++ b/src/vol/vol-salvage.c @@ -3237,6 +3237,44 @@ AskOffline(VolumeId volumeId, char * partition) Log("AskOffline: request for fileserver to take volume offline failed; salvage aborting.\n"); Abort("Salvage aborted\n"); } + +#ifdef AFS_DEMAND_ATTACH_FS + /* set inUse = programType in the volume header. We do this in case + * the fileserver restarts/crashes while we are salvaging. + * Otherwise, the fileserver could attach the volume again on + * startup while we are salvaging, which would be very bad, or + * schedule another salvage while we are salvaging, which would be + * annoying. */ + if (!Testing) { + int fd; + IHandle_t *h; + char name[VMAXPATHLEN]; + struct VolumeHeader header; + struct VolumeDiskHeader diskHeader; + struct VolumeDiskData volHeader; + + afs_snprintf(name, sizeof(name), "%s/" VFORMAT, fileSysPathName, + afs_printable_uint32_lu(volumeId)); + + fd = afs_open(name, O_RDONLY); + assert(fd >= 0); + assert(read(fd, &diskHeader, sizeof(diskHeader)) == sizeof(diskHeader)); + assert(diskHeader.stamp.magic == VOLUMEHEADERMAGIC); + close(fd); + + DiskToVolumeHeader(&header, &diskHeader); + + IH_INIT(h, fileSysDevice, header.parent, header.volumeInfo); + assert(IH_IREAD(h, 0, (char*)&volHeader, sizeof(volHeader)) == sizeof(volHeader)); + assert(volHeader.stamp.magic == VOLUMEINFOMAGIC); + + volHeader.inUse = programType; + + assert(IH_IWRITE(h, 0, (char*)&volHeader, sizeof(volHeader)) == sizeof(volHeader)); + + IH_RELEASE(h); + } +#endif /* AFS_DEMAND_ATTACH_FS */ } void diff --git a/src/vol/volume.c b/src/vol/volume.c index d5ed2fc7a..b440e9f7f 100644 --- a/src/vol/volume.c +++ b/src/vol/volume.c @@ -353,7 +353,6 @@ static void VVByPListWait_r(struct DiskPartition64 * dp); static int VCheckSalvage(register Volume * vp); static int VUpdateSalvagePriority_r(Volume * vp); static int VScheduleSalvage_r(Volume * vp); -static int VCancelSalvage_r(Volume * vp, int reason); /* Volume hash table */ static void VReorderHash_r(VolumeHashChainHead * head, Volume * pp, Volume * vp); @@ -1344,9 +1343,10 @@ VShutdownVolume_r(Volume * vp) switch(V_attachState(vp)) { case VOL_STATE_SALVAGING: - /* make sure salvager knows we don't want - * the volume back */ - VCancelSalvage_r(vp, SALVSYNC_SHUTDOWN); + /* Leave salvaging volumes alone. Any in-progress salvages will + * continue working after viced shuts down. This is intentional. + */ + case VOL_STATE_PREATTACHED: case VOL_STATE_ERROR: VChangeState_r(vp, VOL_STATE_UNATTACHED); @@ -4008,6 +4008,35 @@ VRequestSalvage_r(Error * ec, Volume * vp, int reason, int flags) vp->salvage.requested = 1; vp->salvage.reason = reason; vp->stats.last_salvage = FT_ApproxTime(); + if (VIsSalvager(V_inUse(vp))) { + Log("VRequestSalvage: volume %u appears to be salvaging, but we\n", vp->hashid); + Log(" didn't request a salvage. Forcing it offline waiting for the\n"); + Log(" salvage to finish; if you are sure no salvage is running,\n"); + Log(" run a salvage manually.\n"); + + /* make sure neither VScheduleSalvage_r nor + * VUpdateSalvagePriority_r try to schedule another salvage */ + vp->salvage.requested = vp->salvage.scheduled = 0; + + /* these stats aren't correct, but doing this makes them + * slightly closer to being correct */ + vp->stats.salvages++; + vp->stats.last_salvage_req = FT_ApproxTime(); + IncUInt64(&VStats.salvages); + + VChangeState_r(vp, VOL_STATE_ERROR); + *ec = VSALVAGE; + code = 1; + + } else if (vp->stats.salvages < SALVAGE_COUNT_MAX) { + VChangeState_r(vp, VOL_STATE_SALVAGING); + *ec = VSALVAGING; + } else { + Log("VRequestSalvage: volume %u online salvaged too many times; forced offline.\n", vp->hashid); + VChangeState_r(vp, VOL_STATE_ERROR); + *ec = VSALVAGE; + code = 1; + } if (flags & VOL_SALVAGE_INVALIDATE_HEADER) { /* Instead of ReleaseVolumeHeader, we do FreeVolumeHeader() so that the the next VAttachVolumeByVp_r() invocation @@ -4017,15 +4046,6 @@ VRequestSalvage_r(Error * ec, Volume * vp, int reason, int flags) */ FreeVolumeHeader(vp); } - if (vp->stats.salvages < SALVAGE_COUNT_MAX) { - VChangeState_r(vp, VOL_STATE_SALVAGING); - *ec = VSALVAGING; - } else { - Log("VRequestSalvage: volume %u online salvaged too many times; forced offline.\n", vp->hashid); - VChangeState_r(vp, VOL_STATE_ERROR); - *ec = VSALVAGE; - code = 1; - } } return code; } @@ -4186,57 +4206,6 @@ VScheduleSalvage_r(Volume * vp) return ret; } -/** - * ask salvageserver to cancel a scheduled salvage operation. - * - * @param[in] vp pointer to volume object - * @param[in] reason SALVSYNC protocol reason code - * - * @return operation status - * @retval 0 success - * @retval 1 request failed - * - * @pre VOL_LOCK is held. - * - * @post salvageserver is sent a request to cancel the volume salvage. - * volume is transitioned to a hard error state. - * - * @internal volume package internal use only. - */ -static int -VCancelSalvage_r(Volume * vp, int reason) -{ - int code, ret = 0; - -#ifdef SALVSYNC_BUILD_CLIENT - if (vp->salvage.scheduled) { - VChangeState_r(vp, VOL_STATE_SALVSYNC_REQ); - VOL_UNLOCK; - - /* can't use V_id() since there's no guarantee - * we have the disk data header at this point */ - code = SALVSYNC_SalvageVolume(vp->hashid, - VPartitionPath(vp->partition), - SALVSYNC_CANCEL, - reason, - 0, - NULL); - - VOL_LOCK; - VChangeState_r(vp, VOL_STATE_ERROR); - - if (code == SYNC_OK) { - vp->salvage.scheduled = 0; - vp->salvage.requested = 0; - } else { - ret = 1; - } - } -#endif /* SALVSYNC_BUILD_CLIENT */ - return ret; -} - - #ifdef SALVSYNC_BUILD_CLIENT /** * connect to the salvageserver SYNC service. diff --git a/src/vol/volume_inline.h b/src/vol/volume_inline.h index ef54b4577..a26a6dccd 100644 --- a/src/vol/volume_inline.h +++ b/src/vol/volume_inline.h @@ -11,6 +11,26 @@ #define _AFS_VOL_VOLUME_INLINE_H 1 #include "volume.h" +/** + * tell caller whether the given program type represents a salvaging + * program. + * + * @param type program type enumeration + * + * @return whether program state is a salvager + * @retval 0 type is a non-salvaging program + * @retval 1 type is a salvaging program + */ +static_inline int +VIsSalvager(ProgramType type) +{ + switch(type) { + case salvager: + case salvageServer: + return 1; + } + return 0; +} /***************************************************/ /* demand attach fs state machine routines */