/*
- * Copyright 2006, Sine Nomine Associates and others.
+ * Copyright 2006-2007, Sine Nomine Associates and others.
* All Rights Reserved.
*
* This software has been released under the terms of the IBM Public
if (!*timeout)
break;
if (!(*timeout & 1))
- Log("SYNC_connect temporary failure (will retry)\n");
+ Log("SYNC_connect: temporary failure on circuit '%s' (will retry)\n",
+ state->proto_name);
SYNC_disconnect(state);
sleep(*timeout++);
}
com.hdr.command = SYNC_COM_CHANNEL_CLOSE;
com.hdr.command_len = sizeof(SYNC_command_hdr);
+ com.hdr.flags |= SYNC_FLAG_CHANNEL_SHUTDOWN;
/* in case the other end dropped, don't do any retries */
state->retry_limit = 0;
state->hard_timeout = 0;
- code = SYNC_ask(state, &com, &res);
-
- if (code == SYNC_OK) {
- if (res.hdr.response != SYNC_OK) {
- Log("SYNC_closeChannel: channel shutdown request denied; closing socket anyway\n");
- } else if (!(res.hdr.flags & SYNC_FLAG_CHANNEL_SHUTDOWN)) {
- Log("SYNC_closeChannel: channel shutdown request mishandled by server\n");
- }
- } else {
- Log("SYNC_closeChannel: channel communications problem");
- }
-
+ SYNC_ask(state, &com, &res);
SYNC_disconnect(state);
- return code;
+ return SYNC_OK;
}
int
if (code == SYNC_OK) {
break;
} else if (code == SYNC_BAD_COMMAND) {
- Log("SYNC_ask: protocol mismatch; make sure fileserver, volserver, salvageserver and salvager are same version\n");
+ Log("SYNC_ask: protocol mismatch on circuit '%s'; make sure "
+ "fileserver, volserver, salvageserver and salvager are same "
+ "version\n", state->proto_name);
break;
- } else if (code == SYNC_COM_ERROR) {
- Log("SYNC_ask: protocol communications failure; attempting reconnect to server\n");
+ } else if ((code == SYNC_COM_ERROR) && (tries < state->retry_limit)) {
+ Log("SYNC_ask: protocol communications failure on circuit '%s'; "
+ "attempting reconnect to server\n", state->proto_name);
SYNC_reconnect(state);
/* try again */
} else {
- /* unknown (probably protocol-specific) response code, pass it up to the caller, and let them deal with it */
+ /*
+ * unknown (probably protocol-specific) response code, pass it up to
+ * the caller, and let them deal with it
+ */
break;
}
}
if (code == SYNC_COM_ERROR) {
- Log("SYNC_ask: fatal protocol error; disabling sync protocol to server running on port %d until next server restart\n",
- state->port);
+ Log("SYNC_ask: fatal protocol error on circuit '%s'; disabling sync "
+ "protocol to server running on port %d until next server restart\n",
+ state->proto_name, state->port);
state->fatal_error = 1;
}
#endif
if (state->fd == -1) {
- Log("SYNC_ask: invalid sync file descriptor\n");
+ Log("SYNC_ask: invalid sync file descriptor on circuit '%s'\n",
+ state->proto_name);
res->hdr.response = SYNC_COM_ERROR;
goto done;
}
if (com->hdr.command_len > SYNC_PROTO_MAX_LEN) {
- Log("SYNC_ask: internal SYNC buffer too small; please file a bug\n");
+ Log("SYNC_ask: internal SYNC buffer too small on circuit '%s'; "
+ "please file a bug\n", state->proto_name);
res->hdr.response = SYNC_COM_ERROR;
goto done;
}
#ifdef AFS_NT40_ENV
n = send(state->fd, buf, com->hdr.command_len, 0);
if (n != com->hdr.command_len) {
- Log("SYNC_ask: write failed\n");
+ Log("SYNC_ask: write failed on circuit '%s'\n", state->proto_name);
res->hdr.response = SYNC_COM_ERROR;
goto done;
}
+ if (com->hdr.command == SYNC_COM_CHANNEL_CLOSE) {
+ /* short circuit close channel requests */
+ res->hdr.response = SYNC_OK;
+ goto done;
+ }
+
n = recv(state->fd, buf, SYNC_PROTO_MAX_LEN, 0);
if (n == 0 || (n < 0 && WSAEINTR != WSAGetLastError())) {
- Log("SYNC_ask: No response\n");
+ Log("SYNC_ask: No response on circuit '%s'\n", state->proto_name);
res->hdr.response = SYNC_COM_ERROR;
goto done;
}
#else /* !AFS_NT40_ENV */
n = write(state->fd, buf, com->hdr.command_len);
if (com->hdr.command_len != n) {
- Log("SYNC_ask: write failed\n");
+ Log("SYNC_ask: write failed on circuit '%s'\n", state->proto_name);
res->hdr.response = SYNC_COM_ERROR;
goto done;
}
+ if (com->hdr.command == SYNC_COM_CHANNEL_CLOSE) {
+ /* short circuit close channel requests */
+ res->hdr.response = SYNC_OK;
+ goto done;
+ }
+
/* receive the response */
iov[0].iov_base = (char *)&res->hdr;
iov[0].iov_len = sizeof(res->hdr);
}
n = readv(state->fd, iov, iovcnt);
if (n == 0 || (n < 0 && errno != EINTR)) {
- Log("SYNC_ask: No response\n");
+ Log("SYNC_ask: No response on circuit '%s'\n", state->proto_name);
res->hdr.response = SYNC_COM_ERROR;
goto done;
}
res->recv_len = n;
if (n < sizeof(res->hdr)) {
- Log("SYNC_ask: response too short\n");
+ Log("SYNC_ask: response too short on circuit '%s'\n",
+ state->proto_name);
res->hdr.response = SYNC_COM_ERROR;
goto done;
}
#endif
if ((n - sizeof(res->hdr)) > res->payload.len) {
- Log("SYNC_ask: response too long\n");
+ Log("SYNC_ask: response too long on circuit '%s'\n",
+ state->proto_name);
res->hdr.response = SYNC_COM_ERROR;
goto done;
}
#endif
if (res->hdr.response_len != n) {
- Log("SYNC_ask: length field in response inconsistent\n");
+ Log("SYNC_ask: length field in response inconsistent "
+ "on circuit '%s'\n", state->proto_name);
res->hdr.response = SYNC_COM_ERROR;
goto done;
}
if (res->hdr.response == SYNC_DENIED) {
- Log("SYNC_ask: negative response\n");
+ Log("SYNC_ask: negative response on circuit '%s'\n", state->proto_name);
}
done:
/*
- * Copyright 2006, Sine Nomine Associates and others.
+ * Copyright 2006-2007, Sine Nomine Associates and others.
* All Rights Reserved.
*
* This software has been released under the terms of the IBM Public
/* general reason codes */
#define SYNC_REASON_NONE 0
#define SYNC_REASON_MALFORMED_PACKET 1
-
+#define SYNC_REASON_NOMEM 2
/* SYNC protocol flags
*
afs_uint32 proto_version;
int retry_limit; /* max number of times for SYNC_ask to retry */
afs_int32 hard_timeout; /* upper limit on time to keep trying */
+ char * proto_name; /**< sync protocol associated with this conn */
byte fatal_error; /* fatal error on this client conn */
} SYNC_client_state;
extern int LogLevel;
-static SYNC_client_state fssync_state = { -1, 2040, FSYNC_PROTO_VERSION, 5, 120 };
+static SYNC_client_state fssync_state =
+ { -1, /* file descriptor */
+ 2040, /* port number */
+ FSYNC_PROTO_VERSION, /* protocol version */
+ 5, /* connect retry limit */
+ 120, /* hard timeout */
+ "FSSYNC", /* protocol name string */
+ };
#ifdef AFS_PTHREAD_ENV
static pthread_mutex_t vol_fsync_mutex;
* License. For details, see the LICENSE file in the top-level source
* directory or online at http://www.openafs.org/dl/license10.html
*
- * Portions Copyright (c) 2006 Sine Nomine Associates
+ * Portions Copyright (c) 2006-2007 Sine Nomine Associates
*/
/*
return;
}
+ if (com.recv_len < sizeof(com.hdr)) {
+ Log("FSSYNC_com: invalid protocol message length (%u)\n", com.recv_len);
+ res.hdr.response = SYNC_COM_ERROR;
+ res.hdr.reason = SYNC_REASON_MALFORMED_PACKET;
+ res.hdr.flags |= SYNC_FLAG_CHANNEL_SHUTDOWN;
+ goto respond;
+ }
+
if (com.hdr.proto_version != FSYNC_PROTO_VERSION) {
Log("FSYNC_com: invalid protocol version (%u)\n", com.hdr.proto_version);
res.hdr.response = SYNC_COM_ERROR;
goto respond;
}
+ if (com.hdr.command == SYNC_COM_CHANNEL_CLOSE) {
+ res.hdr.response = SYNC_OK;
+ res.hdr.flags |= SYNC_FLAG_CHANNEL_SHUTDOWN;
+ goto respond;
+ }
+
+
VOL_LOCK;
switch (com.hdr.command) {
case FSYNC_VOL_ON:
case FSYNC_VOL_STATS_VLRU:
res.hdr.response = FSYNC_com_StatsOp(fd, &com, &res);
break;
- case SYNC_COM_CHANNEL_CLOSE:
- res.hdr.response = SYNC_OK;
- res.hdr.flags |= SYNC_FLAG_CHANNEL_SHUTDOWN;
- break;
default:
res.hdr.response = SYNC_BAD_COMMAND;
break;
/*
- * Copyright 2006, Sine Nomine Associates and others.
+ * Copyright 2006-2007, Sine Nomine Associates and others.
* All Rights Reserved.
*
* This software has been released under the terms of the IBM Public
node = SALVSYNC_getWork();
assert(node != NULL);
+ Log("dispatching child to salvage volume %u...\n",
+ node->command.sop.parent);
+
VOL_LOCK;
/* find a slot */
for (slot = 0; slot < Parallel; slot++) {
int ret;
struct DiskPartition * partP;
- VChildProcReconnectFS();
+ /* do not allow further forking inside salvager */
+ canfork = 0;
/* do not attempt to close parent's logFile handle as
* another thread may have held the lock on the FILE
ShowLog = 0;
}
- if (node->command.sop.volume <= 0) {
+ if (node->command.sop.parent <= 0) {
Log("salvageServer: invalid volume id specified; salvage aborted\n");
return 1;
}
}
/* Salvage individual volume; don't notify fs */
- SalvageFileSys1(partP, node->command.sop.volume);
-
- VDisconnectFS();
+ SalvageFileSys1(partP, node->command.sop.parent);
fclose(logFile);
return 0;
/* ok, we've reaped a child */
current_workers--;
- SALVSYNC_doneWorkByPid(pid, 0);
+ SALVSYNC_doneWorkByPid(pid, WEXITSTATUS(status));
assert(pthread_cond_broadcast(&worker_cv) == 0);
}
*pid = ret;
if (WCOREDUMP(*status))
Log("\"%s\" core dumped!\n", prog);
- if (WIFSIGNALED(*status) != 0 || WEXITSTATUS(*status) != 0)
+ if ((WIFSIGNALED(*status) != 0) ||
+ ((WEXITSTATUS(*status) != 0) &&
+ (WEXITSTATUS(*status) != SALSRV_EXIT_VOLGROUP_LINK)))
Log("\"%s\" (pid=%d) terminated abnormally!\n", prog, ret);
} else {
Log("wait returned -1\n");
/*
- * Copyright 2006, Sine Nomine Associates and others.
+ * Copyright 2006-2007, Sine Nomine Associates and others.
* All Rights Reserved.
*
* This software has been released under the terms of the IBM Public
extern int VInit;
extern pthread_mutex_t vol_salvsync_mutex;
-static SYNC_client_state salvsync_client_state = { -1, 2041, SALVSYNC_PROTO_VERSION, 5, 120 };
+static SYNC_client_state salvsync_client_state =
+ { -1, /* file descriptor */
+ 2041, /* port */
+ SALVSYNC_PROTO_VERSION, /* protocol version */
+ 5, /* connect retry limit */
+ 120, /* hard timeout */
+ "SALVSYNC", /* protocol name string */
+ };
/*
* client-side routines
SALVSYNC_askSalv(SYNC_command * com, SYNC_response * res)
{
afs_int32 code;
+ SALVSYNC_command_hdr * scom = com->payload.buf;
+
+ scom->hdr_version = SALVSYNC_PROTO_VERSION;
VSALVSYNC_LOCK;
code = SYNC_ask(&salvsync_client_state, com, res);
com.hdr.reason = reason;
com.hdr.command_len = sizeof(com.hdr) + sizeof(scom);
scom.volume = volume;
+ scom.parent = volume;
scom.prio = prio;
if (partName) {
return SALVSYNC_askSalv(&com, res);
}
+afs_int32
+SALVSYNC_LinkVolume(VolumeId parent,
+ VolumeId clone,
+ char * partName,
+ SYNC_response * res_in)
+{
+ SYNC_command com;
+ SYNC_response res_l, *res;
+ SALVSYNC_command_hdr scom;
+ SALVSYNC_response_hdr sres;
+ int n, tot;
+
+ memset(&com, 0, sizeof(com));
+ memset(&scom, 0, sizeof(scom));
+
+ if (res_in) {
+ res = res_in;
+ } else {
+ memset(&res_l, 0, sizeof(res_l));
+ memset(&sres, 0, sizeof(sres));
+ res_l.payload.buf = (void *) &sres;
+ res_l.payload.len = sizeof(sres);
+ res = &res_l;
+ }
+
+ com.payload.buf = (void *) &scom;
+ com.payload.len = sizeof(scom);
+ com.hdr.command = SALVSYNC_OP_LINK;
+ com.hdr.reason = SALVSYNC_REASON_WHATEVER;
+ com.hdr.command_len = sizeof(com.hdr) + sizeof(scom);
+ scom.volume = clone;
+ scom.parent = parent;
+
+ if (partName) {
+ strlcpy(scom.partName, partName, sizeof(scom.partName));
+ } else {
+ scom.partName[0] = '\0';
+ }
+
+ return SALVSYNC_askSalv(&com, res);
+}
+
#endif /* AFS_DEMAND_ATTACH_FS */
/*
- * Copyright 2006, Sine Nomine Associates and others.
+ * Copyright 2006-2007, Sine Nomine Associates and others.
* All Rights Reserved.
*
* This software has been released under the terms of the IBM Public
* SALVSYNC is a feature specific to the demand attach fileserver
*/
+static int AllocNode(struct SalvageQueueNode ** node);
+
static int AddToSalvageQueue(struct SalvageQueueNode * node);
static void DeleteFromSalvageQueue(struct SalvageQueueNode * node);
static void AddToPendingQueue(struct SalvageQueueNode * node);
static void DeleteFromPendingQueue(struct SalvageQueueNode * node);
static struct SalvageQueueNode * LookupPendingCommand(SALVSYNC_command_hdr * qry);
static struct SalvageQueueNode * LookupPendingCommandByPid(int pid);
-static void RaiseCommandPrio(struct SalvageQueueNode * node, SALVSYNC_command_hdr * com);
-
-static struct SalvageQueueNode * LookupNode(VolumeId vid, char * partName);
-static struct SalvageQueueNode * LookupNodeByCommand(SALVSYNC_command_hdr * qry);
+static void UpdateCommandPrio(struct SalvageQueueNode * node);
+static void HandlePrio(struct SalvageQueueNode * clone,
+ struct SalvageQueueNode * parent,
+ afs_uint32 new_prio);
+
+static int LinkNode(struct SalvageQueueNode * parent,
+ struct SalvageQueueNode * clone);
+
+static struct SalvageQueueNode * LookupNode(VolumeId vid, char * partName,
+ struct SalvageQueueNode ** parent);
+static struct SalvageQueueNode * LookupNodeByCommand(SALVSYNC_command_hdr * qry,
+ struct SalvageQueueNode ** parent);
static void AddNodeToHash(struct SalvageQueueNode * node);
static void DeleteNodeFromHash(struct SalvageQueueNode * node);
static afs_int32 SALVSYNC_com_Salvage(SALVSYNC_command * com, SALVSYNC_response * res);
static afs_int32 SALVSYNC_com_Cancel(SALVSYNC_command * com, SALVSYNC_response * res);
-static afs_int32 SALVSYNC_com_RaisePrio(SALVSYNC_command * com, SALVSYNC_response * res);
static afs_int32 SALVSYNC_com_Query(SALVSYNC_command * com, SALVSYNC_response * res);
static afs_int32 SALVSYNC_com_CancelAll(SALVSYNC_command * com, SALVSYNC_response * res);
+static afs_int32 SALVSYNC_com_Link(SALVSYNC_command * com, SALVSYNC_response * res);
extern int LogLevel;
static int AcceptSd = -1; /* Socket used by server for accepting connections */
-/* be careful about rearranging elements in this structure.
- * element placement has been optimized for locality of reference
- * in SALVSYNC_getWork() */
+/**
+ * queue of all volumes waiting to be salvaged.
+ */
struct SalvageQueue {
volatile int total_len;
- volatile afs_int32 last_insert; /* id of last partition to have a salvage node insert */
+ volatile afs_int32 last_insert; /**< id of last partition to have a salvage node inserted */
volatile int len[VOLMAXPARTS+1];
- volatile struct rx_queue part[VOLMAXPARTS+1];
+ volatile struct rx_queue part[VOLMAXPARTS+1]; /**< per-partition queues of pending salvages */
pthread_cond_t cv;
};
static struct SalvageQueue salvageQueue; /* volumes waiting to be salvaged */
+/**
+ * queue of all volumes currently being salvaged.
+ */
struct QueueHead {
- volatile struct rx_queue q;
- volatile int len;
+ volatile struct rx_queue q; /**< queue of salvages in progress */
+ volatile int len; /**< length of in-progress queue */
pthread_cond_t queue_change_cv;
};
static struct QueueHead pendingQueue; /* volumes being salvaged */
static struct QueueHead SalvageHashTable[VSHASH_SIZE];
static struct SalvageQueueNode *
-LookupNode(afs_uint32 vid, char * partName)
+LookupNode(afs_uint32 vid, char * partName,
+ struct SalvageQueueNode ** parent)
{
struct rx_queue *qp, *nqp;
struct SalvageQueueNode *vsp;
if (queue_IsEnd(&SalvageHashTable[idx], qp)) {
vsp = NULL;
}
+
+ if (parent) {
+ if (vsp) {
+ *parent = (vsp->type == SALVSYNC_VOLGROUP_CLONE) ?
+ vsp->volgroup.parent : vsp;
+ } else {
+ *parent = NULL;
+ }
+ }
+
return vsp;
}
static struct SalvageQueueNode *
-LookupNodeByCommand(SALVSYNC_command_hdr * qry)
+LookupNodeByCommand(SALVSYNC_command_hdr * qry,
+ struct SalvageQueueNode ** parent)
{
- return LookupNode(qry->volume, qry->partName);
+ return LookupNode(qry->volume, qry->partName, parent);
}
static void
return;
}
+ if (com.recv_len < sizeof(com.hdr)) {
+ Log("SALVSYNC_com: invalid protocol message length (%u)\n", com.recv_len);
+ res.hdr.response = SYNC_COM_ERROR;
+ res.hdr.reason = SYNC_REASON_MALFORMED_PACKET;
+ res.hdr.flags |= SYNC_FLAG_CHANNEL_SHUTDOWN;
+ goto respond;
+ }
+
if (com.hdr.proto_version != SALVSYNC_PROTO_VERSION) {
Log("SALVSYNC_com: invalid protocol version (%u)\n", com.hdr.proto_version);
res.hdr.response = SYNC_COM_ERROR;
goto respond;
}
+ if (com.hdr.command == SYNC_COM_CHANNEL_CLOSE) {
+ res.hdr.response = SYNC_OK;
+ res.hdr.flags |= SYNC_FLAG_CHANNEL_SHUTDOWN;
+ goto respond;
+ }
+
if (com.recv_len != (sizeof(com.hdr) + sizeof(SALVSYNC_command_hdr))) {
Log("SALVSYNC_com: invalid protocol message length (%u)\n", com.recv_len);
res.hdr.response = SYNC_COM_ERROR;
case SALVSYNC_NOP:
break;
case SALVSYNC_SALVAGE:
+ case SALVSYNC_RAISEPRIO:
res.hdr.response = SALVSYNC_com_Salvage(&scom, &sres);
break;
case SALVSYNC_CANCEL:
/* cancel all queued salvages */
res.hdr.response = SALVSYNC_com_CancelAll(&scom, &sres);
break;
- case SALVSYNC_RAISEPRIO:
- /* raise the priority of a salvage */
- res.hdr.response = SALVSYNC_com_RaisePrio(&scom, &sres);
- break;
case SALVSYNC_QUERY:
/* query whether a volume is done salvaging */
res.hdr.response = SALVSYNC_com_Query(&scom, &sres);
break;
- case SYNC_COM_CHANNEL_CLOSE:
- res.hdr.response = SYNC_OK;
- res.hdr.flags |= SYNC_FLAG_CHANNEL_SHUTDOWN;
+ case SALVSYNC_OP_LINK:
+ /* link a clone to its parent in the scheduler */
+ res.hdr.response = SALVSYNC_com_Link(&scom, &sres);
break;
default:
res.hdr.response = SYNC_BAD_COMMAND;
SALVSYNC_com_Salvage(SALVSYNC_command * com, SALVSYNC_response * res)
{
afs_int32 code = SYNC_OK;
- struct SalvageQueueNode * node;
+ struct SalvageQueueNode * node, * clone;
+ int hash = 0;
if (SYNC_verifyProtocolString(com->sop->partName, sizeof(com->sop->partName))) {
code = SYNC_FAILED;
goto done;
}
- node = LookupNodeByCommand(com->sop);
+ clone = LookupNodeByCommand(com->sop, &node);
- /* schedule a salvage for this volume */
- if (node != NULL) {
- switch (node->state) {
- case SALVSYNC_STATE_ERROR:
- case SALVSYNC_STATE_DONE:
- memcpy(&node->command.com, com->hdr, sizeof(SYNC_command_hdr));
- memcpy(&node->command.sop, com->sop, sizeof(SALVSYNC_command_hdr));
- node->command.sop.prio = 0;
- if (AddToSalvageQueue(node)) {
- code = SYNC_DENIED;
- }
- break;
- default:
- break;
- }
- } else {
- node = (struct SalvageQueueNode *) malloc(sizeof(struct SalvageQueueNode));
- if (node == NULL) {
+ if (node == NULL) {
+ if (AllocNode(&node)) {
code = SYNC_DENIED;
+ res->hdr->reason = SYNC_REASON_NOMEM;
goto done;
}
- memset(node, 0, sizeof(struct SalvageQueueNode));
- memcpy(&node->command.com, com->hdr, sizeof(SYNC_command_hdr));
- memcpy(&node->command.sop, com->sop, sizeof(SALVSYNC_command_hdr));
- AddNodeToHash(node);
+ clone = node;
+ hash = 1;
+ }
+
+ HandlePrio(clone, node, com->sop->prio);
+
+ switch (node->state) {
+ case SALVSYNC_STATE_QUEUED:
+ UpdateCommandPrio(node);
+ break;
+
+ case SALVSYNC_STATE_ERROR:
+ case SALVSYNC_STATE_DONE:
+ case SALVSYNC_STATE_UNKNOWN:
+ memcpy(&clone->command.com, com->hdr, sizeof(SYNC_command_hdr));
+ memcpy(&clone->command.sop, com->sop, sizeof(SALVSYNC_command_hdr));
if (AddToSalvageQueue(node)) {
- /* roll back */
- DeleteNodeFromHash(node);
- free(node);
- node = NULL;
code = SYNC_DENIED;
- goto done;
}
+ break;
+
+ default:
+ break;
+ }
+
+ if (hash) {
+ AddNodeToHash(node);
}
res->hdr->flags |= SALVSYNC_FLAG_VOL_STATS_VALID;
goto done;
}
- node = LookupNodeByCommand(com->sop);
+ node = LookupNodeByCommand(com->sop, NULL);
if (node == NULL) {
res->sop->state = SALVSYNC_STATE_UNKNOWN;
res->hdr->flags |= SALVSYNC_FLAG_VOL_STATS_VALID;
res->sop->prio = node->command.sop.prio;
res->sop->state = node->state;
- if (node->state == SALVSYNC_STATE_QUEUED) {
+ if ((node->type == SALVSYNC_VOLGROUP_PARENT) &&
+ (node->state == SALVSYNC_STATE_QUEUED)) {
DeleteFromSalvageQueue(node);
}
}
return SYNC_OK;
}
+/**
+ * link a queue node for a clone to its parent volume.
+ */
static afs_int32
-SALVSYNC_com_RaisePrio(SALVSYNC_command * com, SALVSYNC_response * res)
+SALVSYNC_com_Link(SALVSYNC_command * com, SALVSYNC_response * res)
{
afs_int32 code = SYNC_OK;
- struct SalvageQueueNode * node;
+ struct SalvageQueueNode * clone, * parent;
if (SYNC_verifyProtocolString(com->sop->partName, sizeof(com->sop->partName))) {
code = SYNC_FAILED;
goto done;
}
- node = LookupNodeByCommand(com->sop);
+ /* lookup clone's salvage scheduling node */
+ clone = LookupNodeByCommand(com->sop, NULL);
+ if (clone == NULL) {
+ code = SYNC_DENIED;
+ res->hdr->reason = SALVSYNC_REASON_ERROR;
+ goto done;
+ }
- /* raise the priority of a salvage */
- if (node == NULL) {
- code = SALVSYNC_com_Salvage(com, res);
- node = LookupNodeByCommand(com->sop);
- } else {
- switch (node->state) {
- case SALVSYNC_STATE_QUEUED:
- RaiseCommandPrio(node, com->sop);
- break;
- case SALVSYNC_STATE_SALVAGING:
- break;
- case SALVSYNC_STATE_ERROR:
- case SALVSYNC_STATE_DONE:
- code = SALVSYNC_com_Salvage(com, res);
- break;
- default:
- break;
+ /* lookup parent's salvage scheduling node */
+ parent = LookupNode(com->sop->parent, com->sop->partName, NULL);
+ if (parent == NULL) {
+ if (AllocNode(&parent)) {
+ code = SYNC_DENIED;
+ res->hdr->reason = SYNC_REASON_NOMEM;
+ goto done;
}
+ memcpy(&parent->command.com, com->hdr, sizeof(SYNC_command_hdr));
+ memcpy(&parent->command.sop, com->sop, sizeof(SALVSYNC_command_hdr));
+ parent->command.sop.volume = parent->command.sop.parent = com->sop->parent;
+ AddNodeToHash(parent);
}
- if (node == NULL) {
- res->sop->prio = 0;
- res->sop->state = SALVSYNC_STATE_UNKNOWN;
- } else {
- res->hdr->flags |= SALVSYNC_FLAG_VOL_STATS_VALID;
- res->sop->prio = node->command.sop.prio;
- res->sop->state = node->state;
+ if (LinkNode(parent, clone)) {
+ code = SYNC_DENIED;
+ goto done;
}
done:
goto done;
}
- node = LookupNodeByCommand(com->sop);
+ LookupNodeByCommand(com->sop, &node);
/* query whether a volume is done salvaging */
if (node == NULL) {
ReleaseReadLock(&SALVSYNC_handler_lock); /* just in case */
}
+static int
+AllocNode(struct SalvageQueueNode ** node_out)
+{
+ int code = 0;
+ struct SalvageQueueNode * node;
+
+ *node_out = node = (struct SalvageQueueNode *)
+ malloc(sizeof(struct SalvageQueueNode));
+ if (node == NULL) {
+ code = 1;
+ goto done;
+ }
+
+ memset(node, 0, sizeof(struct SalvageQueueNode));
+ node->type = SALVSYNC_VOLGROUP_PARENT;
+ node->state = SALVSYNC_STATE_UNKNOWN;
+
+ done:
+ return code;
+}
+
+static int
+LinkNode(struct SalvageQueueNode * parent,
+ struct SalvageQueueNode * clone)
+{
+ int code = 0;
+ int idx;
+
+ /* check for attaching a clone to a clone */
+ if (parent->type != SALVSYNC_VOLGROUP_PARENT) {
+ code = 1;
+ goto done;
+ }
+
+ /* check for pre-existing registration and openings */
+ for (idx = 0; idx < VOLMAXTYPES; idx++) {
+ if (parent->volgroup.children[idx] == clone) {
+ goto linked;
+ }
+ if (parent->volgroup.children[idx] == NULL) {
+ break;
+ }
+ }
+ if (idx == VOLMAXTYPES) {
+ code = 1;
+ goto done;
+ }
+
+ /* link parent and child */
+ parent->volgroup.children[idx] = clone;
+ clone->type = SALVSYNC_VOLGROUP_CLONE;
+ clone->volgroup.parent = parent;
+
+
+ linked:
+ switch (clone->state) {
+ case SALVSYNC_STATE_QUEUED:
+ DeleteFromSalvageQueue(clone);
+
+ case SALVSYNC_STATE_SALVAGING:
+ switch (parent->state) {
+ case SALVSYNC_STATE_UNKNOWN:
+ case SALVSYNC_STATE_ERROR:
+ case SALVSYNC_STATE_DONE:
+ parent->command.sop.prio = clone->command.sop.prio;
+ AddToSalvageQueue(parent);
+ break;
+
+ case SALVSYNC_STATE_QUEUED:
+ if (clone->command.sop.prio) {
+ parent->command.sop.prio += clone->command.sop.prio;
+ UpdateCommandPrio(parent);
+ }
+ break;
+
+ default:
+ break;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ done:
+ return code;
+}
+
+static void
+HandlePrio(struct SalvageQueueNode * clone,
+ struct SalvageQueueNode * node,
+ afs_uint32 new_prio)
+{
+ afs_uint32 delta;
+
+ switch (node->state) {
+ case SALVSYNC_STATE_ERROR:
+ case SALVSYNC_STATE_DONE:
+ case SALVSYNC_STATE_UNKNOWN:
+ node->command.sop.prio = 0;
+ break;
+ }
+
+ if (new_prio < clone->command.sop.prio) {
+ /* strange. let's just set our delta to 1 */
+ delta = 1;
+ } else {
+ delta = new_prio - clone->command.sop.prio;
+ }
+
+ if (clone->type == SALVSYNC_VOLGROUP_CLONE) {
+ clone->command.sop.prio = new_prio;
+ }
+
+ node->command.sop.prio += delta;
+}
+
static int
AddToSalvageQueue(struct SalvageQueueNode * node)
{
afs_int32 id;
+ struct SalvageQueueNode * last = NULL;
id = volutil_GetPartitionID(node->command.sop.partName);
if (id < 0 || id > VOLMAXPARTS) {
/* don't enqueue salvage requests for unmounted partitions */
return 1;
}
+ if (queue_IsOnQueue(node)) {
+ return 0;
+ }
+
+ if (queue_IsNotEmpty(&salvageQueue.part[id])) {
+ last = queue_Last(&salvageQueue.part[id], SalvageQueueNode);
+ }
queue_Append(&salvageQueue.part[id], node);
salvageQueue.len[id]++;
salvageQueue.total_len++;
salvageQueue.last_insert = id;
node->partition_id = id;
node->state = SALVSYNC_STATE_QUEUED;
+
+ /* reorder, if necessary */
+ if (last && last->command.sop.prio < node->command.sop.prio) {
+ UpdateCommandPrio(node);
+ }
+
assert(pthread_cond_broadcast(&salvageQueue.cv) == 0);
return 0;
}
/* raise the priority of a previously scheduled salvage */
static void
-RaiseCommandPrio(struct SalvageQueueNode * node, SALVSYNC_command_hdr * com)
+UpdateCommandPrio(struct SalvageQueueNode * node)
{
struct SalvageQueueNode *np, *nnp;
afs_int32 id;
+ afs_uint32 prio;
assert(queue_IsOnQueue(node));
- node->command.sop.prio = com->prio;
+ prio = node->command.sop.prio;
id = node->partition_id;
- if (queue_First(&salvageQueue.part[id], SalvageQueueNode)->command.sop.prio < com->prio) {
+ if (queue_First(&salvageQueue.part[id], SalvageQueueNode)->command.sop.prio < prio) {
queue_Remove(node);
queue_Prepend(&salvageQueue.part[id], node);
} else {
for (queue_ScanBackwardsFrom(&salvageQueue.part[id], node, np, nnp, SalvageQueueNode)) {
- if (np->command.sop.prio > com->prio)
+ if (np->command.sop.prio > prio)
break;
}
if (queue_IsEnd(&salvageQueue.part[id], np)) {
assert(pthread_cond_wait(&salvageQueue.cv, &vol_glock_mutex) == 0);
}
-
/*
* short circuit for simple case where only one partition has
* scheduled salvages
SALVSYNC_doneWork_r(struct SalvageQueueNode * node, int result)
{
afs_int32 partid;
+ int idx;
+
DeleteFromPendingQueue(node);
partid = node->partition_id;
if (partid >=0 && partid <= VOLMAXPARTS) {
}
if (result == 0) {
node->state = SALVSYNC_STATE_DONE;
- } else {
+ } else if (result != SALSRV_EXIT_VOLGROUP_LINK) {
node->state = SALVSYNC_STATE_ERROR;
}
+
+ if (node->type == SALVSYNC_VOLGROUP_PARENT) {
+ for (idx = 0; idx < VOLMAXTYPES; idx++) {
+ if (node->volgroup.children[idx]) {
+ node->volgroup.children[idx]->state = node->state;
+ }
+ }
+ }
}
void
/*
- * Copyright 2006, Sine Nomine Associates and others.
+ * Copyright 2006-2007, Sine Nomine Associates and others.
* All Rights Reserved.
*
* This software has been released under the terms of the IBM Public
#ifndef _AFS_VOL_SALVSYNC_H
#define _AFS_VOL_SALVSYNC_H
+#define SALSRV_EXIT_VOLGROUP_LINK 10
+
+
#ifdef AFS_DEMAND_ATTACH_FS
#include "daemon_com.h"
+#include "voldefs.h"
-#define SALVSYNC_PROTO_VERSION 1
+#define SALVSYNC_PROTO_VERSION_V1 1
+#define SALVSYNC_PROTO_VERSION_V2 2
+#define SALVSYNC_PROTO_VERSION SALVSYNC_PROTO_VERSION_V2
-/* SALVSYNC command codes */
-#define SALVSYNC_NOP SYNC_COM_CODE_DECL(0) /* just return stats */
-#define SALVSYNC_SALVAGE SYNC_COM_CODE_DECL(1) /* schedule a salvage */
-#define SALVSYNC_CANCEL SYNC_COM_CODE_DECL(2) /* Cancel a salvage */
-#define SALVSYNC_RAISEPRIO SYNC_COM_CODE_DECL(3) /* move a salvage operation to
- * the head of the work queue */
-#define SALVSYNC_QUERY SYNC_COM_CODE_DECL(4) /* query the status of a salvage */
-#define SALVSYNC_CANCELALL SYNC_COM_CODE_DECL(5) /* cancel all pending salvages */
-
-/* SALVSYNC reason codes */
-#define SALVSYNC_WHATEVER SYNC_REASON_CODE_DECL(0) /* XXXX */
-#define SALVSYNC_ERROR SYNC_REASON_CODE_DECL(1) /* volume is in error state */
-#define SALVSYNC_OPERATOR SYNC_REASON_CODE_DECL(2) /* operator forced salvage */
-#define SALVSYNC_SHUTDOWN SYNC_REASON_CODE_DECL(3) /* cancel due to shutdown */
-#define SALVSYNC_NEEDED SYNC_REASON_CODE_DECL(4) /* needsSalvaged flag set */
+/**
+ * SALVSYNC protocol command codes.
+ */
+typedef enum {
+ SALVSYNC_OP_NOP = SYNC_COM_CODE_DECL(0), /**< just return stats */
+ SALVSYNC_OP_SALVAGE = SYNC_COM_CODE_DECL(1), /**< schedule a salvage */
+ SALVSYNC_OP_CANCEL = SYNC_COM_CODE_DECL(2), /**< cancel a salvage */
+ SALVSYNC_OP_RAISEPRIO = SYNC_COM_CODE_DECL(3), /**< raise salvage priority */
+ SALVSYNC_OP_QUERY = SYNC_COM_CODE_DECL(4), /**< query status of a salvage */
+ SALVSYNC_OP_CANCELALL = SYNC_COM_CODE_DECL(5), /**< cancel all pending salvages */
+ SALVSYNC_OP_LINK = SYNC_COM_CODE_DECL(6), /**< link a clone to its parent */
+ SALVSYNC_OP_MAX_ID /* must be at end of enum */
+} SALVSYNC_op_code_t;
+
+#define SALVSYNC_NOP SALVSYNC_OP_NOP
+#define SALVSYNC_SALVAGE SALVSYNC_OP_SALVAGE
+#define SALVSYNC_CANCEL SALVSYNC_OP_CANCEL
+#define SALVSYNC_RAISEPRIO SALVSYNC_OP_RAISEPRIO
+#define SALVSYNC_QUERY SALVSYNC_OP_QUERY
+#define SALVSYNC_CANCELALL SALVSYNC_OP_CANCELALL
+#define SALVSYNC_LINK SALVSYNC_OP_LINK
+
+/**
+ * SALVSYNC protocol reason codes.
+ */
+typedef enum {
+ SALVSYNC_REASON_WHATEVER = SYNC_REASON_CODE_DECL(0), /**< XXX */
+ SALVSYNC_REASON_ERROR = SYNC_REASON_CODE_DECL(1), /**< volume is in error state */
+ SALVSYNC_REASON_OPERATOR = SYNC_REASON_CODE_DECL(2), /**< operator forced salvage */
+ SALVSYNC_REASON_SHUTDOWN = SYNC_REASON_CODE_DECL(3), /**< cancel due to shutdown */
+ SALVSYNC_REASON_NEEDED = SYNC_REASON_CODE_DECL(4), /**< needsSalvaged flag set */
+ SALVSYNC_REASON_MAX_ID /* must be at end of enum */
+} SALVSYNC_reason_code_t;
+
+#define SALVSYNC_WHATEVER SALVSYNC_REASON_WHATEVER
+#define SALVSYNC_ERROR SALVSYNC_REASON_ERROR
+#define SALVSYNC_OPERATOR SALVSYNC_REASON_OPERATOR
+#define SALVSYNC_SHUTDOWN SALVSYNC_REASON_SHUTDOWN
+#define SALVSYNC_NEEDED SALVSYNC_REASON_NEEDED
/* SALVSYNC response codes */
/* SALVSYNC flags */
#define SALVSYNC_FLAG_VOL_STATS_VALID SYNC_FLAG_CODE_DECL(0) /* volume stats in response are valid */
-/* SALVSYNC command state fields */
-#define SALVSYNC_STATE_UNKNOWN 0 /* unknown state */
-#define SALVSYNC_STATE_QUEUED 1 /* salvage request on queue */
-#define SALVSYNC_STATE_SALVAGING 2 /* salvage is happening now */
-#define SALVSYNC_STATE_ERROR 3 /* salvage ended in an error */
-#define SALVSYNC_STATE_DONE 4 /* last salvage ended successfully */
+/**
+ * SALVSYNC command state.
+ */
+typedef enum {
+ SALVSYNC_STATE_UNKNOWN = 0, /**< unknown state */
+ SALVSYNC_STATE_QUEUED = 1, /**< salvage request is queued */
+ SALVSYNC_STATE_SALVAGING = 2, /**< salvage is happening now */
+ SALVSYNC_STATE_ERROR = 3, /**< salvage ended in an error */
+ SALVSYNC_STATE_DONE = 4 /**< last salvage ended successfully */
+} SALVSYNC_command_state_t;
+/**
+ * on-wire salvsync protocol payload.
+ */
typedef struct SALVSYNC_command_hdr {
- afs_uint32 prio;
- afs_uint32 volume;
- char partName[16]; /* partition name, e.g. /vicepa */
+ afs_uint32 hdr_version; /**< salvsync protocol header version */
+ afs_uint32 prio; /**< salvage priority */
+ afs_uint32 volume; /**< volume on which to operate */
+ afs_uint32 parent; /**< parent volume (for vol group linking command) */
+ char partName[16]; /**< partition name, e.g. /vicepa */
+ afs_uint32 reserved[6];
} SALVSYNC_command_hdr;
typedef struct SALVSYNC_response_hdr {
afs_int32 prio;
afs_int32 sq_len;
afs_int32 pq_len;
+ afs_uint32 reserved[4];
} SALVSYNC_response_hdr;
typedef struct SALVSYNC_command {
SALVSYNC_command_hdr sop;
} SALVSYNC_command_info;
+typedef enum {
+ SALVSYNC_VOLGROUP_PARENT,
+ SALVSYNC_VOLGROUP_CLONE
+} SalvageQueueNodeType_t;
+
struct SalvageQueueNode {
struct rx_queue q;
struct rx_queue hash_chain;
- afs_uint32 state;
+ SalvageQueueNodeType_t type;
+ union {
+ struct SalvageQueueNode * parent;
+ struct SalvageQueueNode * children[VOLMAXTYPES];
+ } volgroup;
+ SALVSYNC_command_state_t state;
struct SALVSYNC_command_info command;
afs_int32 partition_id;
int pid;
extern afs_int32 SALVSYNC_askSalv(SYNC_command * com, SYNC_response * res);
extern afs_int32 SALVSYNC_SalvageVolume(VolumeId volume, char *partName, int com, int reason,
afs_uint32 prio, SYNC_response * res);
+extern afs_int32 SALVSYNC_LinkVolume(VolumeId parent, VolumeId clone,
+ char * partName, SYNC_response * res_in);
/* salvage server interfaces */
extern void SALVSYNC_salvInit(void);
int orphans = ORPH_IGNORE; /* -orphans option */
int Showmode = 0;
+
#ifndef AFS_NT40_ENV
int useSyslog = 0; /* -syslog flag */
int useSyslogFacility = LOG_DAEMON; /* -syslogfacility option */
#endif
+#ifdef AFS_NT40_ENV
+int canfork = 0;
+#else
+int canfork = 1;
+#endif
+
#define MAXPARALLEL 32
int OKToZap; /* -o flag */
DiskToVolumeHeader(&vsp->header, &diskHeader);
if (singleVolumeNumber && vsp->header.id == singleVolumeNumber
&& vsp->header.parent != singleVolumeNumber) {
- Log("%u is a read-only volume; not salvaged\n",
- singleVolumeNumber);
- Exit(1);
+ if (programType == salvageServer) {
+#ifdef SALVSYNC_BUILD_CLIENT
+ Log("fileserver requested salvage of clone %u; scheduling salvage of volume group %u...\n",
+ vsp->header.id, vsp->header.parent);
+ if (SALVSYNC_LinkVolume(vsp->header.parent,
+ vsp->header.id,
+ fileSysPartition->name,
+ NULL) != SYNC_OK) {
+ Log("schedule request failed\n");
+ }
+#endif
+ Exit(SALSRV_EXIT_VOLGROUP_LINK);
+ } else {
+ Log("%u is a read-only volume; not salvaged\n",
+ singleVolumeNumber);
+ Exit(1);
+ }
}
if (!singleVolumeNumber
|| (vsp->header.id == singleVolumeNumber
#else
f = fork();
assert(f >= 0);
+#ifdef AFS_DEMAND_ATTACH_FS
+ if ((f == 0) && (programType == salvageServer)) {
+ /* we are a salvageserver child */
+#ifdef FSSYNC_BUILD_CLIENT
+ VChildProcReconnectFS_r();
+#endif
+#ifdef SALVSYNC_BUILD_CLIENT
+ VReconnectSALV_r();
#endif
+ }
+#endif /* AFS_DEMAND_ATTACH_FS */
+#endif /* !AFS_NT40_ENV */
return f;
}
{
if (ShowLog)
showlog();
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ if (programType == salvageServer) {
+#ifdef SALVSYNC_BUILD_CLIENT
+ VDisconnectSALV();
+#endif
+#ifdef FSSYNC_BUILD_CLIENT
+ VDisconnectFS();
+#endif
+ }
+#endif /* AFS_DEMAND_ATTACH_FS */
+
#ifdef AFS_NT40_ENV
if (main_thread != pthread_self())
pthread_exit((void *)code);
struct InodeSummary *svgp_inodeSummaryp;
int svgp_count;
} SVGParms_t;
-#define canfork 0
-#else /* AFS_NT40_ENV */
-#define canfork 1
#endif /* AFS_NT40_ENV */
+extern int canfork;
+
/* prototypes */
extern void Exit(int code);
#define ROVOL 1
#define BACKVOL 2
+#define VOLMAXTYPES 3 /* _current_ max number of types */
+
/* maximum numbe of Vice partitions */
#define VOLMAXPARTS 255
int
VConnectSALV_r(void)
{
- assert((programType != salvageServer) &&
- (programType != volumeUtility));
return SALVSYNC_clientInit();
}
int
VDisconnectSALV_r(void)
{
- assert((programType != salvageServer) &&
- (programType != volumeUtility));
return SALVSYNC_clientFinis();
}
int
VReconnectSALV_r(void)
{
- assert((programType != salvageServer) &&
- (programType != volumeUtility));
return SALVSYNC_clientReconnect();
}
#endif /* SALVSYNC_BUILD_CLIENT */
VOL_UNLOCK;
}
-static int
+int
VChildProcReconnectFS_r(void)
{
return FSYNC_clientChildProcReconnect();