]> git.michaelhowe.org Git - packages/o/openafs.git/commitdiff
STABLE14-ubik-avoid-truncating-live-database-during-recovery-20071210
authorDerrick Brashear <shadow@dementia.org>
Mon, 10 Dec 2007 22:46:07 +0000 (22:46 +0000)
committerDerrick Brashear <shadow@dementia.org>
Mon, 10 Dec 2007 22:46:07 +0000 (22:46 +0000)
LICENSE IPL10
FIXES 77183

currently we can truncate the live database while we are doing recovery.
address that.

(cherry picked from commit 0f9529171c37c4ef1e76156da111d83d04b38505)

src/ubik/phys.c
src/ubik/recovery.c
src/ubik/remote.c

index 2611056831cc29475177ec816344cbb90cf992d0..69153a7bf96fdcf1cb69037a82c9ba712aff4a14 100644 (file)
@@ -78,15 +78,8 @@ uphys_open(register struct ubik_dbase *adbase, afs_int32 afid)
     }
 
     /* not found, open it and try to enter in cache */
-    strcpy(pbuffer, adbase->pathName);
-    strcat(pbuffer, ".DB");
-    if (afid < 0) {
-       i = -afid;
-       strcat(pbuffer, "SYS");
-    } else
-       i = afid;
-    sprintf(temp, "%d", i);
-    strcat(pbuffer, temp);
+    afs_snprintf(pbuffer, sizeof(pbuffer), "%s.DB%s%d", adbase->pathName, 
+                (afid<0)?"SYS":"", (afid<0)?-afid:afid);
     fd = open(pbuffer, O_CREAT | O_RDWR, 0600);
     if (fd < 0) {
        /* try opening read-only */
index 389a214913e3badb36954cc94beee66392b20724..47259be73595b8bd2f9406c10453f56ccaa2fe18 100644 (file)
@@ -17,6 +17,7 @@ RCSID
 #ifdef AFS_NT40_ENV
 #include <winsock2.h>
 #include <time.h>
+#include <fcntl.h>
 #else
 #include <sys/file.h>
 #include <netinet/in.h>
@@ -425,9 +426,13 @@ urecovery_Interact(void)
     struct timeval tv;
     int length, tlen, offset, file, nbytes;
     struct rx_call *rxcall;
-    char tbuffer[256];
+    char tbuffer[1024];
     struct ubik_stat ubikstat;
     struct in_addr inAddr;
+#ifndef OLD_URECOVERY
+    char pbuffer[1028];
+    int flen, fd = -1;
+#endif
 
     /* otherwise, begin interaction */
     urecovery_state = 0;
@@ -530,11 +535,7 @@ urecovery_Interact(void)
            urecovery_state |= UBIK_RECHAVEDB;
        } else {
            /* we don't have the best version; we should fetch it. */
-#if defined(UBIK_PAUSE)
            DBHOLD(ubik_dbase);
-#else
-           ObtainWriteLock(&ubik_dbase->versionLock);
-#endif /* UBIK_PAUSE */
            urecovery_AbortAll(ubik_dbase);
 
            /* Rx code to do the Bulk fetch */
@@ -558,7 +559,8 @@ urecovery_Interact(void)
                goto FetchEndCall;
            }
 
-           /* Truncate the file firest */
+#ifdef OLD_URECOVERY
+           /* Truncate the file first */
            code = (*ubik_dbase->truncate) (ubik_dbase, file, 0);
            if (code) {
                ubik_dprint("truncate io error=%d\n", code);
@@ -573,6 +575,20 @@ urecovery_Interact(void)
                ubik_dprint("setlabel io error=%d\n", code);
                goto FetchEndCall;
            }
+#else
+           flen = length;
+           afs_snprintf(pbuffer, sizeof(pbuffer), "%s.DB0.TMP", ubik_dbase->pathName);
+           fd = open(pbuffer, O_CREAT | O_RDWR | O_TRUNC, 0600);
+           if (fd < 0) {
+               code = errno;
+               goto FetchEndCall;
+           }
+           code = lseek(fd, HDRSIZE, 0);
+           if (code != HDRSIZE) {
+               close(fd);
+               goto FetchEndCall;
+           }
+#endif
 
            while (length > 0) {
                tlen = (length > sizeof(tbuffer) ? sizeof(tbuffer) : length);
@@ -580,18 +596,29 @@ urecovery_Interact(void)
                if (nbytes != tlen) {
                    ubik_dprint("Rx-read bulk error=%d\n", code = BULK_ERROR);
                    code = EIO;
+                   close(fd);
                    goto FetchEndCall;
                }
+#ifdef OLD_URECOVERY
                nbytes =
                    (*ubik_dbase->write) (ubik_dbase, file, tbuffer, offset,
                                          tlen);
+#else
+               nbytes = write(fd, tbuffer, tlen);
+#endif
                if (nbytes != tlen) {
                    code = UIOERROR;
+                   close(fd);
                    goto FetchEndCall;
                }
                offset += tlen;
                length -= tlen;
            }
+#ifndef OLD_URECOVERY
+           code = close(fd);
+           if (code)
+               goto FetchEndCall;
+#endif     
            code = EndDISK_GetFile(rxcall, &tversion);
          FetchEndCall:
            tcode = rx_EndCall(rxcall, code);
@@ -602,13 +629,36 @@ urecovery_Interact(void)
                urecovery_state |= UBIK_RECHAVEDB;
                memcpy(&ubik_dbase->version, &tversion,
                       sizeof(struct ubik_version));
+#ifdef OLD_URECOVERY
                (*ubik_dbase->sync) (ubik_dbase, 0);    /* get data out first */
+#else
+               afs_snprintf(tbuffer, sizeof(tbuffer), "%s.DB0", ubik_dbase->pathName);
+#ifdef AFS_NT40_ENV
+               afs_snprintf(pbuffer, sizeof(pbuffer), "%s.DB0.OLD", ubik_dbase->pathName);
+               code = unlink(pbuffer);
+               if (!code)
+                   code = rename(tbuffer, pbuffer);
+               afs_snprintf(pbuffer, sizeof(pbuffer), "%s.DB0.TMP", ubik_dbase->pathName);
+#endif
+               if (!code) 
+                   code = rename(pbuffer, tbuffer);
+               if (!code)
+#endif
                /* after data is good, sync disk with correct label */
                code =
                    (*ubik_dbase->setlabel) (ubik_dbase, 0,
                                             &ubik_dbase->version);
+#ifndef OLD_URECOVERY
+#ifdef AFS_NT40_ENV
+               afs_snprintf(pbuffer, sizeof(pbuffer), "%s.DB0.OLD", ubik_dbase->pathName);
+               unlink(pbuffer);
+#endif
+#endif
            }
            if (code) {
+#ifndef OLD_URECOVERY
+               unlink(pbuffer);
+#endif
                ubik_dbase->version.epoch = 0;
                ubik_dbase->version.counter = 0;
                ubik_print("Ubik: Synchronize database failed (error = %d)\n",
@@ -618,11 +668,7 @@ urecovery_Interact(void)
            }
            udisk_Invalidate(ubik_dbase, 0);    /* data has changed */
            LWP_NoYieldSignal(&ubik_dbase->version);
-#if defined(UBIK_PAUSE)
            DBRELE(ubik_dbase);
-#else
-           ReleaseWriteLock(&ubik_dbase->versionLock);
-#endif /* UBIK_PAUSE */
        }
 #if defined(UBIK_PAUSE)
        if (!(urecovery_state & UBIK_RECSYNCSITE))
@@ -637,11 +683,7 @@ urecovery_Interact(void)
         * database and overwrite this one.
         */
        if (ubik_dbase->version.epoch == 1) {
-#if defined(UBIK_PAUSE)
            DBHOLD(ubik_dbase);
-#else
-           ObtainWriteLock(&ubik_dbase->versionLock);
-#endif /* UBIK_PAUSE */
            urecovery_AbortAll(ubik_dbase);
            ubik_epochTime = 2;
            ubik_dbase->version.epoch = ubik_epochTime;
@@ -650,11 +692,7 @@ urecovery_Interact(void)
                (*ubik_dbase->setlabel) (ubik_dbase, 0, &ubik_dbase->version);
            udisk_Invalidate(ubik_dbase, 0);    /* data may have changed */
            LWP_NoYieldSignal(&ubik_dbase->version);
-#if defined(UBIK_PAUSE)
            DBRELE(ubik_dbase);
-#else
-           ReleaseWriteLock(&ubik_dbase->versionLock);
-#endif /* UBIK_PAUSE */
        }
 
        /* Check the other sites and send the database to them if they
@@ -664,11 +702,7 @@ urecovery_Interact(void)
            /* now propagate out new version to everyone else */
            dbok = 1;           /* start off assuming they all worked */
 
-#if defined(UBIK_PAUSE)
            DBHOLD(ubik_dbase);
-#else
-           ObtainWriteLock(&ubik_dbase->versionLock);
-#endif /* UBIK_PAUSE */
            /*
             * Check if a write transaction is in progress. We can't send the
             * db when a write is in progress here because the db would be
@@ -684,20 +718,12 @@ urecovery_Interact(void)
                tv.tv_sec = 0;
                tv.tv_usec = 50000;
                while ((ubik_dbase->flags & DBWRITING) && (safety < 500)) {
-#if defined(UBIK_PAUSE)
                    DBRELE(ubik_dbase);
-#else
-                   ReleaseWriteLock(&ubik_dbase->versionLock);
-#endif /* UBIK_PAUSE */
                    /* sleep for a little while */
                    IOMGR_Select(0, 0, 0, 0, &tv);
                    tv.tv_usec += 10000;
                    safety++;
-#if defined(UBIK_PAUSE)
                    DBHOLD(ubik_dbase);
-#else
-                   ObtainWriteLock(&ubik_dbase->versionLock);
-#endif /* UBIK_PAUSE */
                }
            }
 
@@ -764,11 +790,7 @@ urecovery_Interact(void)
                    ts->currentDB = 1;
                }
            }
-#if defined(UBIK_PAUSE)
            DBRELE(ubik_dbase);
-#else
-           ReleaseWriteLock(&ubik_dbase->versionLock);
-#endif /* UBIK_PAUSE */
            if (dbok)
                urecovery_state |= UBIK_RECSENTDB;
        }
index 1f57cdd78fe4eda46592fae831943156a239b040..2d4f0e3886cedd3f6796d0ebfc1bb97b7aad6d81 100644 (file)
@@ -16,6 +16,7 @@ RCSID
 #include <sys/types.h>
 #ifdef AFS_NT40_ENV
 #include <winsock2.h>
+#include <fcntl.h>
 #else
 #include <sys/file.h>
 #include <netinet/in.h>
@@ -24,6 +25,7 @@ RCSID
 #include <lock.h>
 #include <rx/xdr.h>
 #include <rx/rx.h>
+#include <errno.h>
 #include <afs/afsutil.h>
 
 #define UBIK_INTERNALS
@@ -488,13 +490,17 @@ SDISK_SendFile(rxcall, file, length, avers)
 {
     register afs_int32 code;
     register struct ubik_dbase *dbase;
-    char tbuffer[256];
+    char tbuffer[1024];
     afs_int32 offset;
     struct ubik_version tversion;
     register int tlen;
     struct rx_peer *tpeer;
     struct rx_connection *tconn;
     afs_uint32 otherHost;
+#ifndef OLD_URECOVERY
+    char pbuffer[1028];
+    int flen, fd = -1;
+#endif
 
     /* send the file back to the requester */
 
@@ -532,10 +538,25 @@ SDISK_SendFile(rxcall, file, length, avers)
               afs_inet_ntoa(otherHost));
 
     offset = 0;
+#ifdef OLD_URECOVERY
     (*dbase->truncate) (dbase, file, 0);       /* truncate first */
     tversion.epoch = 0;                /* start off by labelling in-transit db as invalid */
     tversion.counter = 0;
     (*dbase->setlabel) (dbase, file, &tversion);       /* setlabel does sync */
+#else
+    flen = length;
+    afs_snprintf(pbuffer, sizeof(pbuffer), "%s.DB0.TMP", ubik_dbase->pathName);
+    fd = open(pbuffer, O_CREAT | O_RDWR | O_TRUNC, 0600);
+    if (fd < 0) {
+       code = errno;
+       goto failed;
+    }
+    code = lseek(fd, HDRSIZE, 0);
+    if (code != HDRSIZE) {
+       close(fd);
+       goto failed;
+    }
+#endif
     memcpy(&ubik_dbase->version, &tversion, sizeof(struct ubik_version));
     while (length > 0) {
        tlen = (length > sizeof(tbuffer) ? sizeof(tbuffer) : length);
@@ -544,29 +565,63 @@ SDISK_SendFile(rxcall, file, length, avers)
            DBRELE(dbase);
            ubik_dprint("Rx-read length error=%d\n", code);
            code = BULK_ERROR;
+           close(fd);
            goto failed;
        }
+#ifdef OLD_URECOVERY
        code = (*dbase->write) (dbase, file, tbuffer, offset, tlen);
+#else
+       code = write(fd, tbuffer, tlen);
+#endif
        if (code != tlen) {
            DBRELE(dbase);
            ubik_dprint("write failed error=%d\n", code);
            code = UIOERROR;
+           close(fd);
            goto failed;
        }
        offset += tlen;
        length -= tlen;
     }
+#ifndef OLD_URECOVERY
+    code = close(fd);
+    if (code)
+       goto failed;
+#endif     
 
     /* sync data first, then write label and resync (resync done by setlabel call).
      * This way, good label is only on good database. */
+#ifdef OLD_URECOVERY
     (*ubik_dbase->sync) (dbase, file);
+#else
+    afs_snprintf(tbuffer, sizeof(tbuffer), "%s.DB0", ubik_dbase->pathName);
+#ifdef AFS_NT40_ENV
+    afs_snprintf(pbuffer, sizeof(pbuffer), "%s.DB0.OLD", ubik_dbase->pathName);
+    code = unlink(pbuffer);
+    if (!code)
+       code = rename(tbuffer, pbuffer);
+    afs_snprintf(pbuffer, sizeof(pbuffer), "%s.DB0.TMP", ubik_dbase->pathName);
+#endif
+    if (!code) 
+       code = rename(pbuffer, tbuffer);
+    if (!code)
+#endif
     code = (*ubik_dbase->setlabel) (dbase, file, avers);
+#ifndef OLD_URECOVERY
+#ifdef AFS_NT40_ENV
+    afs_snprintf(pbuffer, sizeof(pbuffer), "%s.DB0.OLD", ubik_dbase->pathName);
+    unlink(pbuffer);
+#endif
+#endif
     memcpy(&ubik_dbase->version, avers, sizeof(struct ubik_version));
     udisk_Invalidate(dbase, file);     /* new dbase, flush disk buffers */
     LWP_NoYieldSignal(&dbase->version);
     DBRELE(dbase);
   failed:
     if (code) {
+#ifndef OLD_URECOVERY
+       unlink(pbuffer);
+#endif
        ubik_print
            ("Ubik: Synchronize database with server %s failed (error = %d)\n",
             afs_inet_ntoa(otherHost), code);