From 246f17c6c32174d80643c61f3184ba82aa653e50 Mon Sep 17 00:00:00 2001 From: Mark Vitale Date: Fri, 4 May 2018 17:32:51 -0400 Subject: [PATCH] ubik: improve logging for database synchonizations As an aid for debugging database synchronization issues, ensure that the logging is consistent and unambiguous for both the client and server sides of DISK_GetFile and DISK_SendFile. Add new error messages as required. In addition, rework the "recovery sending version to " message in urecovery_Interact. This message is misleading because the new version database is only sent to a DB server if its version is not up to date. Instead, move this message into the version check block immediately below it. Also reword it for clarity and promote its log level from 5 to 0. Finally, remove the now-superfluous "recovery stating local database" log message. Reviewed-on: https://gerrit.openafs.org/13079 Reviewed-by: Michael Meffie Tested-by: BuildBot Reviewed-by: Andrew Deason Reviewed-by: Marcio Brito Barbosa Reviewed-by: Benjamin Kaduk (cherry picked from commit 0e1c042615d1aeb919a22568cdd2b2ea42c677ba) Change-Id: I26e876e5bcd5adc004b985ea8c3f716cb6a72b5d Reviewed-on: https://gerrit.openafs.org/13908 Reviewed-by: Michael Meffie Reviewed-by: Andrew Deason Reviewed-by: Cheyenne Wills Reviewed-by: Mark Vitale Reviewed-by: Marcio Brito Barbosa Tested-by: BuildBot Reviewed-by: Stephan Wiesand --- src/ubik/recovery.c | 37 ++++++++++++++++++++------ src/ubik/remote.c | 64 +++++++++++++++++++++++++++++++++++---------- 2 files changed, 79 insertions(+), 22 deletions(-) diff --git a/src/ubik/recovery.c b/src/ubik/recovery.c index b0644cd91..27a358866 100644 --- a/src/ubik/recovery.c +++ b/src/ubik/recovery.c @@ -607,7 +607,8 @@ urecovery_Interact(void *dummy) UBIK_ADDR_LOCK; rxcall = rx_NewCall(bestServer->disk_rxcid); - ViceLog(0, ("Ubik: Synchronize database via DISK_GetFile to server %s\n", + ViceLog(0, ("Ubik: Synchronize database: receive (via GetFile) " + "from server %s begin\n", afs_inet_ntoa_r(bestServer->addr[0], hoststr))); UBIK_ADDR_UNLOCK; @@ -724,10 +725,17 @@ urecovery_Interact(void *dummy) ubik_dbase->version.epoch = 0; ubik_dbase->version.counter = 0; UBIK_VERSION_UNLOCK; - ViceLog(0, ("Ubik: Synchronize database failed (error = %d)\n", - code)); + ViceLog(0, + ("Ubik: Synchronize database: receive (via GetFile) " + "from server %s failed (error = %d)\n", + afs_inet_ntoa_r(bestServer->addr[0], hoststr), code)); } else { - ViceLog(0, ("Ubik: Synchronize database completed\n")); + ViceLog(0, + ("Ubik: Synchronize database: receive (via GetFile) " + "from server %s complete, version: %d.%d\n", + afs_inet_ntoa_r(bestServer->addr[0], hoststr), + ubik_dbase->version.epoch, ubik_dbase->version.counter)); + urecovery_state |= UBIK_RECHAVEDB; } udisk_Invalidate(ubik_dbase, 0); /* data has changed */ @@ -818,10 +826,11 @@ urecovery_Interact(void *dummy) continue; } UBIK_BEACON_UNLOCK; - ViceLog(5, ("recovery sending version to %s\n", - afs_inet_ntoa_r(inAddr.s_addr, hoststr))); + if (vcmp(ts->version, ubik_dbase->version) != 0) { - ViceLog(5, ("recovery stating local database\n")); + ViceLog(0, ("Synchronize database: send (via SendFile) " + "to server %s begin\n", + afs_inet_ntoa_r(inAddr.s_addr, hoststr))); /* Rx code to do the Bulk Store */ code = (*ubik_dbase->stat) (ubik_dbase, 0, &ubikstat); @@ -864,12 +873,24 @@ urecovery_Interact(void *dummy) StoreEndCall: code = rx_EndCall(rxcall, code); } + if (code == 0) { /* we set a new file, process its header */ ts->version = ubik_dbase->version; ts->currentDB = 1; - } else + ViceLog(0, + ("Ubik: Synchronize database: send (via SendFile) " + "to server %s complete, version: %d.%d\n", + afs_inet_ntoa_r(inAddr.s_addr, hoststr), + ts->version.epoch, ts->version.counter)); + + } else { dbok = 0; + ViceLog(0, + ("Ubik: Synchronize database: send (via SendFile) " + "to server %s failed (error = %d)\n", + afs_inet_ntoa_r(inAddr.s_addr, hoststr), code)); + } } else { /* mark file up to date */ ts->currentDB = 1; diff --git a/src/ubik/remote.c b/src/ubik/remote.c index c2131abce..a7ddcc4b6 100644 --- a/src/ubik/remote.c +++ b/src/ubik/remote.c @@ -396,45 +396,72 @@ SDISK_GetFile(struct rx_call *rxcall, afs_int32 file, char tbuffer[256]; afs_int32 tlen; afs_int32 length; + struct rx_peer *tpeer; + struct rx_connection *tconn; + afs_uint32 otherHost = 0; + char hoststr[16]; if ((code = ubik_CheckAuth(rxcall))) { return code; } + + tconn = rx_ConnectionOf(rxcall); + tpeer = rx_PeerOf(tconn); + otherHost = ubikGetPrimaryInterfaceAddr(rx_HostOf(tpeer)); + ViceLog(0, ("Ubik: Synchronize database: send (via GetFile) " + "to server %s begin\n", + afs_inet_ntoa_r(otherHost, hoststr))); + dbase = ubik_dbase; DBHOLD(dbase); code = (*dbase->stat) (dbase, file, &ubikstat); if (code < 0) { - DBRELE(dbase); - return code; + ViceLog(0, ("database stat() error:%d\n", code)); + goto failed; } length = ubikstat.size; tlen = htonl(length); code = rx_Write(rxcall, (char *)&tlen, sizeof(afs_int32)); if (code != sizeof(afs_int32)) { - DBRELE(dbase); ViceLog(0, ("Rx-write length error=%d\n", code)); - return BULK_ERROR; + code = BULK_ERROR; + goto failed; } offset = 0; while (length > 0) { tlen = (length > sizeof(tbuffer) ? sizeof(tbuffer) : length); code = (*dbase->read) (dbase, file, tbuffer, offset, tlen); if (code != tlen) { - DBRELE(dbase); ViceLog(0, ("read failed error=%d\n", code)); - return UIOERROR; + code = UIOERROR; + goto failed; } code = rx_Write(rxcall, tbuffer, tlen); if (code != tlen) { - DBRELE(dbase); - ViceLog(0, ("Rx-write length error=%d\n", code)); - return BULK_ERROR; + ViceLog(0, ("Rx-write data error=%d\n", code)); + code = BULK_ERROR; + goto failed; } length -= tlen; offset += tlen; } code = (*dbase->getlabel) (dbase, file, version); /* return the dbase, too */ + if (code) + ViceLog(0, ("getlabel error=%d\n", code)); + + failed: DBRELE(dbase); + if (code) { + ViceLog(0, + ("Ubik: Synchronize database: send (via GetFile) to " + "server %s failed (error = %d)\n", + afs_inet_ntoa_r(otherHost, hoststr), code)); + } else { + ViceLog(0, + ("Ubik: Synchronize database: send (via GetFile) to " + "server %s complete, version: %d.%d\n", + afs_inet_ntoa_r(otherHost, hoststr), version->epoch, version->counter)); + } return code; } @@ -495,7 +522,7 @@ SDISK_SendFile(struct rx_call *rxcall, afs_int32 file, /* abort any active trans that may scribble over the database */ urecovery_AbortAll(dbase); - ViceLog(0, ("Ubik: Synchronize database via DISK_SendFile from server %s\n", + ViceLog(0, ("Ubik: Synchronize database: receive (via SendFile) from server %s begin\n", afs_inet_ntoa_r(otherHost, hoststr))); offset = 0; @@ -507,12 +534,14 @@ SDISK_SendFile(struct rx_call *rxcall, afs_int32 file, (file<0)?-file:file); fd = open(pbuffer, O_CREAT | O_RDWR | O_TRUNC, 0600); if (fd < 0) { + ViceLog(0, ("Open error=%d\n", errno)); code = errno; ViceLog(0, ("Open error=%d\n", code)); goto failed_locked; } code = lseek(fd, HDRSIZE, 0); if (code != HDRSIZE) { + ViceLog(0, ("lseek error=%d\n", code)); close(fd); goto failed_locked; } @@ -544,8 +573,10 @@ SDISK_SendFile(struct rx_call *rxcall, afs_int32 file, length -= tlen; } code = close(fd); - if (code) + if (code) { + ViceLog(0, ("close failed error=%d\n", code)); goto failed; + } /* sync data first, then write label and resync (resync done by setlabel call). * This way, good label is only on good database. */ @@ -595,11 +626,16 @@ failed: (*dbase->setlabel) (dbase, file, &tversion); UBIK_VERSION_UNLOCK; } - ViceLog(0, ("Ubik: Synchronize database with server %s failed (error = %d)\n", - afs_inet_ntoa_r(otherHost, hoststr), code)); + ViceLog(0, + ("Ubik: Synchronize database: receive (via SendFile) from " + "server %s failed (error = %d)\n", + afs_inet_ntoa_r(otherHost, hoststr), code)); } else { uvote_set_dbVersion(*avers); - ViceLog(0, ("Ubik: Synchronize database completed\n")); + ViceLog(0, + ("Ubik: Synchronize database: receive (via SendFile) from " + "server %s complete, version: %d.%d\n", + afs_inet_ntoa_r(otherHost, hoststr), avers->epoch, avers->counter)); } DBRELE(dbase); return code; -- 2.39.5