]> git.michaelhowe.org Git - packages/o/openafs.git/commitdiff
ubik: improve logging for database synchonizations
authorMark Vitale <mvitale@sinenomine.net>
Fri, 4 May 2018 21:32:51 +0000 (17:32 -0400)
committerStephan Wiesand <stephan.wiesand@desy.de>
Sun, 9 Feb 2020 22:26:50 +0000 (17:26 -0500)
As an aid for debugging database synchronization issues, ensure that the
logging is consistent and unambiguous for both the client and server
sides of DISK_GetFile and DISK_SendFile.  Add new error messages as
required.

In addition, rework the "recovery sending version to <IP>" message in
urecovery_Interact.  This message is misleading because the new version
database is only sent to a DB server if its version is not up to date.
Instead, move this message into the version check block immediately
below it.  Also reword it for clarity and promote its log level from 5
to 0.  Finally, remove the now-superfluous "recovery stating local
database" log message.

Reviewed-on: https://gerrit.openafs.org/13079
Reviewed-by: Michael Meffie <mmeffie@sinenomine.net>
Tested-by: BuildBot <buildbot@rampaginggeek.com>
Reviewed-by: Andrew Deason <adeason@sinenomine.net>
Reviewed-by: Marcio Brito Barbosa <mbarbosa@sinenomine.net>
Reviewed-by: Benjamin Kaduk <kaduk@mit.edu>
(cherry picked from commit 0e1c042615d1aeb919a22568cdd2b2ea42c677ba)

Change-Id: I26e876e5bcd5adc004b985ea8c3f716cb6a72b5d
Reviewed-on: https://gerrit.openafs.org/13908
Reviewed-by: Michael Meffie <mmeffie@sinenomine.net>
Reviewed-by: Andrew Deason <adeason@sinenomine.net>
Reviewed-by: Cheyenne Wills <cwills@sinenomine.net>
Reviewed-by: Mark Vitale <mvitale@sinenomine.net>
Reviewed-by: Marcio Brito Barbosa <mbarbosa@sinenomine.net>
Tested-by: BuildBot <buildbot@rampaginggeek.com>
Reviewed-by: Stephan Wiesand <stephan.wiesand@desy.de>
src/ubik/recovery.c
src/ubik/remote.c

index b0644cd914a13c469e0fe76267631374424133e7..27a358866afea528ab255cd1f42a6dad6bdfa2bd 100644 (file)
@@ -607,7 +607,8 @@ urecovery_Interact(void *dummy)
            UBIK_ADDR_LOCK;
            rxcall = rx_NewCall(bestServer->disk_rxcid);
 
-           ViceLog(0, ("Ubik: Synchronize database via DISK_GetFile to server %s\n",
+           ViceLog(0, ("Ubik: Synchronize database: receive (via GetFile) "
+                       "from server %s begin\n",
                       afs_inet_ntoa_r(bestServer->addr[0], hoststr)));
            UBIK_ADDR_UNLOCK;
 
@@ -724,10 +725,17 @@ urecovery_Interact(void *dummy)
                ubik_dbase->version.epoch = 0;
                ubik_dbase->version.counter = 0;
                UBIK_VERSION_UNLOCK;
-               ViceLog(0, ("Ubik: Synchronize database failed (error = %d)\n",
-                          code));
+               ViceLog(0,
+                   ("Ubik: Synchronize database: receive (via GetFile) "
+                   "from server %s failed (error = %d)\n",
+                   afs_inet_ntoa_r(bestServer->addr[0], hoststr), code));
            } else {
-               ViceLog(0, ("Ubik: Synchronize database completed\n"));
+               ViceLog(0,
+                   ("Ubik: Synchronize database: receive (via GetFile) "
+                   "from server %s complete, version: %d.%d\n",
+                   afs_inet_ntoa_r(bestServer->addr[0], hoststr),
+                   ubik_dbase->version.epoch, ubik_dbase->version.counter));
+
                urecovery_state |= UBIK_RECHAVEDB;
            }
            udisk_Invalidate(ubik_dbase, 0);    /* data has changed */
@@ -818,10 +826,11 @@ urecovery_Interact(void *dummy)
                    continue;
                }
                UBIK_BEACON_UNLOCK;
-               ViceLog(5, ("recovery sending version to %s\n",
-                           afs_inet_ntoa_r(inAddr.s_addr, hoststr)));
+
                if (vcmp(ts->version, ubik_dbase->version) != 0) {
-                   ViceLog(5, ("recovery stating local database\n"));
+                   ViceLog(0, ("Synchronize database: send (via SendFile) "
+                               "to server %s begin\n",
+                           afs_inet_ntoa_r(inAddr.s_addr, hoststr)));
 
                    /* Rx code to do the Bulk Store */
                    code = (*ubik_dbase->stat) (ubik_dbase, 0, &ubikstat);
@@ -864,12 +873,24 @@ urecovery_Interact(void *dummy)
                      StoreEndCall:
                        code = rx_EndCall(rxcall, code);
                    }
+
                    if (code == 0) {
                        /* we set a new file, process its header */
                        ts->version = ubik_dbase->version;
                        ts->currentDB = 1;
-                   } else
+                       ViceLog(0,
+                           ("Ubik: Synchronize database: send (via SendFile) "
+                           "to server %s complete, version: %d.%d\n",
+                           afs_inet_ntoa_r(inAddr.s_addr, hoststr),
+                           ts->version.epoch, ts->version.counter));
+
+                   } else {
                        dbok = 0;
+                       ViceLog(0,
+                           ("Ubik: Synchronize database: send (via SendFile) "
+                            "to server %s failed (error = %d)\n",
+                           afs_inet_ntoa_r(inAddr.s_addr, hoststr), code));
+                   }
                } else {
                    /* mark file up to date */
                    ts->currentDB = 1;
index c2131abced2205d49c6646d926894bce213b10e6..a7ddcc4b6973b8fa4e928d01b7c51e63afddaa9e 100644 (file)
@@ -396,45 +396,72 @@ SDISK_GetFile(struct rx_call *rxcall, afs_int32 file,
     char tbuffer[256];
     afs_int32 tlen;
     afs_int32 length;
+    struct rx_peer *tpeer;
+    struct rx_connection *tconn;
+    afs_uint32 otherHost = 0;
+    char hoststr[16];
 
     if ((code = ubik_CheckAuth(rxcall))) {
        return code;
     }
+
+    tconn = rx_ConnectionOf(rxcall);
+    tpeer = rx_PeerOf(tconn);
+    otherHost = ubikGetPrimaryInterfaceAddr(rx_HostOf(tpeer));
+    ViceLog(0, ("Ubik: Synchronize database: send (via GetFile) "
+               "to server %s begin\n",
+              afs_inet_ntoa_r(otherHost, hoststr)));
+
     dbase = ubik_dbase;
     DBHOLD(dbase);
     code = (*dbase->stat) (dbase, file, &ubikstat);
     if (code < 0) {
-       DBRELE(dbase);
-       return code;
+       ViceLog(0, ("database stat() error:%d\n", code));
+       goto failed;
     }
     length = ubikstat.size;
     tlen = htonl(length);
     code = rx_Write(rxcall, (char *)&tlen, sizeof(afs_int32));
     if (code != sizeof(afs_int32)) {
-       DBRELE(dbase);
        ViceLog(0, ("Rx-write length error=%d\n", code));
-       return BULK_ERROR;
+       code = BULK_ERROR;
+       goto failed;
     }
     offset = 0;
     while (length > 0) {
        tlen = (length > sizeof(tbuffer) ? sizeof(tbuffer) : length);
        code = (*dbase->read) (dbase, file, tbuffer, offset, tlen);
        if (code != tlen) {
-           DBRELE(dbase);
            ViceLog(0, ("read failed error=%d\n", code));
-           return UIOERROR;
+           code = UIOERROR;
+           goto failed;
        }
        code = rx_Write(rxcall, tbuffer, tlen);
        if (code != tlen) {
-           DBRELE(dbase);
-           ViceLog(0, ("Rx-write length error=%d\n", code));
-           return BULK_ERROR;
+           ViceLog(0, ("Rx-write data error=%d\n", code));
+           code = BULK_ERROR;
+           goto failed;
        }
        length -= tlen;
        offset += tlen;
     }
     code = (*dbase->getlabel) (dbase, file, version);  /* return the dbase, too */
+    if (code)
+       ViceLog(0, ("getlabel error=%d\n", code));
+
+ failed:
     DBRELE(dbase);
+    if (code) {
+       ViceLog(0,
+           ("Ubik: Synchronize database: send (via GetFile) to "
+            "server %s failed (error = %d)\n",
+            afs_inet_ntoa_r(otherHost, hoststr), code));
+    } else {
+       ViceLog(0,
+           ("Ubik: Synchronize database: send (via GetFile) to "
+            "server %s complete, version: %d.%d\n",
+           afs_inet_ntoa_r(otherHost, hoststr), version->epoch, version->counter));
+    }
     return code;
 }
 
@@ -495,7 +522,7 @@ SDISK_SendFile(struct rx_call *rxcall, afs_int32 file,
     /* abort any active trans that may scribble over the database */
     urecovery_AbortAll(dbase);
 
-    ViceLog(0, ("Ubik: Synchronize database via DISK_SendFile from server %s\n",
+    ViceLog(0, ("Ubik: Synchronize database: receive (via SendFile) from server %s begin\n",
               afs_inet_ntoa_r(otherHost, hoststr)));
 
     offset = 0;
@@ -507,12 +534,14 @@ SDISK_SendFile(struct rx_call *rxcall, afs_int32 file,
             (file<0)?-file:file);
     fd = open(pbuffer, O_CREAT | O_RDWR | O_TRUNC, 0600);
     if (fd < 0) {
+       ViceLog(0, ("Open error=%d\n", errno));
        code = errno;
        ViceLog(0, ("Open error=%d\n", code));
        goto failed_locked;
     }
     code = lseek(fd, HDRSIZE, 0);
     if (code != HDRSIZE) {
+       ViceLog(0, ("lseek error=%d\n", code));
        close(fd);
        goto failed_locked;
     }
@@ -544,8 +573,10 @@ SDISK_SendFile(struct rx_call *rxcall, afs_int32 file,
        length -= tlen;
     }
     code = close(fd);
-    if (code)
+    if (code) {
+       ViceLog(0, ("close failed error=%d\n", code));
        goto failed;
+    }
 
     /* sync data first, then write label and resync (resync done by setlabel call).
      * This way, good label is only on good database. */
@@ -595,11 +626,16 @@ failed:
            (*dbase->setlabel) (dbase, file, &tversion);
            UBIK_VERSION_UNLOCK;
        }
-       ViceLog(0, ("Ubik: Synchronize database with server %s failed (error = %d)\n",
-            afs_inet_ntoa_r(otherHost, hoststr), code));
+       ViceLog(0,
+           ("Ubik: Synchronize database: receive (via SendFile) from "
+            "server %s failed (error = %d)\n",
+           afs_inet_ntoa_r(otherHost, hoststr), code));
     } else {
        uvote_set_dbVersion(*avers);
-       ViceLog(0, ("Ubik: Synchronize database completed\n"));
+       ViceLog(0,
+           ("Ubik: Synchronize database: receive (via SendFile) from "
+            "server %s complete, version: %d.%d\n",
+           afs_inet_ntoa_r(otherHost, hoststr), avers->epoch, avers->counter));
     }
     DBRELE(dbase);
     return code;