From 8084fc28a3069949bdc788f048fa8a96c012b96d Mon Sep 17 00:00:00 2001 From: Andrew Deason Date: Thu, 31 Mar 2011 17:22:12 -0500 Subject: [PATCH] salvager: Error volumes on GetInodeSummary errors When GetInodeSummary fails due to an internal failure (not from just failing to find applicable inodes), currently it just returns an error, and does not return the checked-out singleVolumeNumber back to the fileserver. When we fail to gather inodes, we should force the volume to an error state, since we haven't salvaged the volume. But if we fail to find any applicable inodes, we just want to VOL_DONE the volume, since the header has possibly been destroyed, and the volume doesn't exist. So, issue an FSYNC_VOL_FORCE_ERROR command when we encounter errors in GetInodeSummary, except when we fail to find applicable inodes. Change-Id: I4aed126ec09c4b93cfd8572abeba3d22bff0e6a5 Reviewed-on: http://gerrit.openafs.org/4439 Tested-by: BuildBot Reviewed-by: Derrick Brashear --- src/vol/vol-salvage.c | 61 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 55 insertions(+), 6 deletions(-) diff --git a/src/vol/vol-salvage.c b/src/vol/vol-salvage.c index 25aa463f6..cdfded23d 100644 --- a/src/vol/vol-salvage.c +++ b/src/vol/vol-salvage.c @@ -164,6 +164,7 @@ Vnodes with 0 inode pointers in RW volumes are now deleted. #include "volume.h" #include "partition.h" #include "daemon_com.h" +#include "daemon_com_inline.h" #include "fssync.h" #include "volume_inline.h" #include "salvsync.h" @@ -279,6 +280,7 @@ static int IsVnodeOrphaned(struct SalvInfo *salvinfo, VnodeId vnode); static int AskVolumeSummary(struct SalvInfo *salvinfo, VolumeId singleVolumeNumber); static void MaybeAskOnline(struct SalvInfo *salvinfo, VolumeId volumeId); +static void AskError(struct SalvInfo *salvinfo, VolumeId volumeId); #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL) static int LockVolume(struct SalvInfo *salvinfo, VolumeId volumeId); @@ -1143,6 +1145,8 @@ GetInodeSummary(struct SalvInfo *salvinfo, FD_t inodeFile, VolumeId singleVolume char *part = salvinfo->fileSysPath; char *tdir; int i; + int retcode = 0; + int deleted = 0; afs_sfsize_t st_size; /* This file used to come from vfsck; cobble it up ourselves now... */ @@ -1152,7 +1156,8 @@ GetInodeSummary(struct SalvInfo *salvinfo, FD_t inodeFile, VolumeId singleVolume singleVolumeNumber, &forceSal, forceR, wpath, NULL)) < 0) { if (err == -2) { Log("*** I/O error %d when writing a tmp inode file; Not salvaged %s ***\nIncrease space on partition or use '-tmpdir'\n", errno, dev); - return -1; + retcode = -1; + goto error; } Abort("Unable to get inodes for \"%s\"; not salvaged\n", dev); } @@ -1205,17 +1210,38 @@ GetInodeSummary(struct SalvInfo *salvinfo, FD_t inodeFile, VolumeId singleVolume else { struct VolumeSummary *vsp; int i; + int foundSVN = 0; GetVolumeSummary(salvinfo, singleVolumeNumber); for (i = 0, vsp = salvinfo->volumeSummaryp; i < salvinfo->nVolumes; i++) { - if (vsp->fileName) + if (vsp->fileName) { + if (vsp->header.id == singleVolumeNumber) { + foundSVN = 1; + } DeleteExtraVolumeHeaderFile(salvinfo, vsp); + } + } + + if (!foundSVN) { + if (Testing) { + MaybeAskOnline(salvinfo, singleVolumeNumber); + } else { + /* make sure we get rid of stray .vol headers, even if + * they're not in our volume summary (might happen if + * e.g. something else created them and they're not in the + * fileserver VGC) */ + VDestroyVolumeDiskHeader(salvinfo->fileSysPartition, + singleVolumeNumber, 0 /*parent*/); + AskDelete(salvinfo, singleVolumeNumber); + } } } Log("%s vice inodes on %s; not salvaged\n", singleVolumeNumber ? "No applicable" : "No", dev); - return -1; + retcode = -1; + deleted = 1; + goto error; } ip = (struct ViceInodeInfo *)malloc(nInodes*sizeof(struct ViceInodeInfo)); if (ip == NULL) { @@ -1241,7 +1267,8 @@ GetInodeSummary(struct SalvInfo *salvinfo, FD_t inodeFile, VolumeId singleVolume if (OS_WRITE(summaryFile, &summary, sizeof(summary)) != sizeof(summary)) { Log("Difficulty writing summary file (errno = %d); %s not salvaged\n", errno, dev); OS_CLOSE(summaryFile); - return -1; + retcode = -1; + goto error; } summary.index += (summary.nInodes); nInodes -= summary.nInodes; @@ -1253,7 +1280,8 @@ GetInodeSummary(struct SalvInfo *salvinfo, FD_t inodeFile, VolumeId singleVolume if (OS_SYNC(summaryFile) == -1) { Log("Unable to write summary file (errno = %d); %s not salvaged\n", errno, dev); OS_CLOSE(summaryFile); - return -1; + retcode = -1; + goto error; } if (canfork && !debug) { ShowLog = 0; @@ -1283,7 +1311,13 @@ GetInodeSummary(struct SalvInfo *salvinfo, FD_t inodeFile, VolumeId singleVolume } Log("%d nVolumesInInodeFile %lu \n",salvinfo->nVolumesInInodeFile,(unsigned long)st_size); OS_CLOSE(summaryFile); - return 0; + + error: + if (retcode && singleVolumeNumber && !deleted) { + AskError(salvinfo, singleVolumeNumber); + } + + return retcode; } /* Comparison routine for volume sort. @@ -4305,6 +4339,21 @@ LockVolume(struct SalvInfo *salvinfo, VolumeId volumeId) } #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */ +static void +AskError(struct SalvInfo *salvinfo, VolumeId volumeId) +{ +#if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL) + afs_int32 code; + code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name, + FSYNC_VOL_FORCE_ERROR, FSYNC_WHATEVER, NULL); + if (code != SYNC_OK) { + Log("AskError: failed to force volume %lu into error state; " + "SYNC error code %ld (%s)\n", (long unsigned)volumeId, + (long)code, SYNC_res2string(code)); + } +#endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */ +} + void AskOffline(struct SalvInfo *salvinfo, VolumeId volumeId) { -- 2.39.5