]> git.michaelhowe.org Git - packages/o/openafs.git/commitdiff
DEVEL15-windows-vnovol-20080912
authorJeffrey Altman <jaltman@secure-endpoints.com>
Sat, 13 Sep 2008 05:20:48 +0000 (05:20 +0000)
committerJeffrey Altman <jaltman@secure-endpoints.com>
Sat, 13 Sep 2008 05:20:48 +0000 (05:20 +0000)
LICENSE MIT

The cm_serverRef_t list reference counts were undercounting and
prematurely freeing the server lists for volumes that experienced
VNOVOL and VMOVED errors.  cm_Analyze() must release the server
list before forcibly updating the volume location info.  Otherwise,
the list that gets freed is the old one concatenated with the new
one.

Add more trace messages.

(cherry picked from commit 1456a67c5ca024c523e0fc3edcba720780d4be9e)

src/WINNT/afsd/cm_callback.c
src/WINNT/afsd/cm_conn.c
src/WINNT/afsd/cm_ioctl.c
src/WINNT/afsd/cm_server.c
src/WINNT/afsd/cm_volume.c

index 899c9b1733d222f9f3b4d5632ac9fc0293dc8094..1ce5b001dcd84b73f8828ee915f340a876a38842 100644 (file)
@@ -1867,6 +1867,8 @@ long cm_CBServersUp(cm_scache_t *scp, time_t * downTime)
         return 1;
 
     for (found = 0,tsrp = statep->serversp; tsrp; tsrp=tsrp->next) {
+        if (tsrp->status == srv_deleted)
+            continue;
         if (tsrp->server == scp->cbServerp)
             found = 1;
         if (tsrp->server->downTime > *downTime)
index 81b17ec862fad721d791a3d1f2c48993e90a19d2..a09c41fffd523ddb3aaede73df6d464684829fa6 100644 (file)
@@ -232,6 +232,8 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp,
         if (cellp == NULL && serversp) {
             struct cm_serverRef * refp;
             for ( refp=serversp ; cellp == NULL && refp != NULL; refp=refp->next) {
+                if (refp->status == srv_deleted)
+                    continue;
                 if ( refp->server )
                     cellp = refp->server->cellp;
             }
@@ -335,6 +337,8 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp,
                         }
                         lock_ObtainWrite(&cm_serverLock);
                         for (tsrp = serversp; tsrp; tsrp=tsrp->next) {
+                            if (tsrp->status == srv_deleted)
+                                continue;
                             if (tsrp->status == srv_busy) {
                                 tsrp->status = srv_not_busy;
                             }       
@@ -342,7 +346,8 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp,
                         lock_ReleaseWrite(&cm_serverLock);
                         if (free_svr_list) {
                             cm_FreeServerList(&serversp, 0);
-                            *serverspp = serversp;
+                            *serverspp = serversp = NULL;
+                            free_svr_list = 0;
                         }
 
                         cm_UpdateVolumeStatus(volp, fidp->volume);
@@ -364,6 +369,8 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp,
                 if (serversp) {
                     lock_ObtainWrite(&cm_serverLock);
                     for (tsrp = serversp; tsrp; tsrp=tsrp->next) {
+                        if (tsrp->status == srv_deleted)
+                            continue;
                         if (tsrp->status == srv_busy) {
                             tsrp->status = srv_not_busy;
                         }
@@ -386,6 +393,8 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp,
         }
         lock_ObtainWrite(&cm_serverLock);
         for (tsrp = serversp; tsrp; tsrp=tsrp->next) {
+            if (tsrp->status == srv_deleted)
+                continue;
             if (tsrp->server == serverp && tsrp->status == srv_not_busy) {
                 tsrp->status = srv_busy;
                 if (fidp) { /* File Server query */
@@ -412,7 +421,8 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp,
 
         if (free_svr_list) {
             cm_FreeServerList(&serversp, 0);
-            *serverspp = serversp;
+            *serverspp = serversp = NULL;
+            free_svr_list = 0;
         }
         retry = 1;
     }
@@ -467,13 +477,15 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp,
         if (!serversp && fidp) {
             code = cm_GetServerList(fidp, userp, reqp, &serverspp);
             if (code == 0) {
-                serversp = *serverspp;
+                serversp = *serverspp = NULL;
                 free_svr_list = 1;
             }
         }
 
         lock_ObtainWrite(&cm_serverLock);
         for (tsrp = serversp; tsrp; tsrp=tsrp->next) {
+            if (tsrp->status == srv_deleted)
+                continue;
             if (tsrp->server == serverp) {
                 /* REDIRECT */
                 if (errorCode == VMOVED || errorCode == VNOVOL) {
@@ -483,38 +495,46 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp,
                 } else {
                     tsrp->status = srv_offline;
                 }
-
-                if (fidp) { /* File Server query */
-                    lock_ReleaseWrite(&cm_serverLock);
-                    code = cm_FindVolumeByID(cellp, fidp->volume, userp, reqp, 
-                                             CM_GETVOL_FLAG_NO_LRU_UPDATE, 
-                                             &volp);
-                    if (code == 0)
-                        cm_VolumeStateByID(volp, fidp->volume);
-                    lock_ObtainWrite(&cm_serverLock);
-                }   
             }
         }   
         lock_ReleaseWrite(&cm_serverLock);
 
-        if (fidp && (errorCode == VMOVED || errorCode == VNOVOL)) {
-            code = cm_ForceUpdateVolume(fidp, userp, reqp);
-            if (code) 
-                timeLeft = 0;   /* prevent a retry on failure */
+        /* Free the server list before cm_ForceUpdateVolume is called */
+        if (free_svr_list) {
+            cm_FreeServerList(&serversp, 0);
+            *serverspp = serversp = NULL;
+            free_svr_list = 0;
         }
 
-        if (statep) {
-            cm_UpdateVolumeStatus(volp, statep->ID);
-            lock_ObtainRead(&cm_volumeLock);
-            cm_PutVolume(volp);
-            lock_ReleaseRead(&cm_volumeLock);
-            volp = NULL;
-        }
+        if (fidp) { /* File Server query */
+            code = cm_FindVolumeByID(cellp, fidp->volume, userp, reqp, 
+                                      CM_GETVOL_FLAG_NO_LRU_UPDATE, 
+                                      &volp);
+            if (code == 0)
+                statep = cm_VolumeStateByID(volp, fidp->volume);
+
+            if (errorCode == VMOVED || errorCode == VNOVOL) {
+                code = cm_ForceUpdateVolume(fidp, userp, reqp);
+                if (code) 
+                    timeLeft = 0;   /* prevent a retry on failure */
+                osi_Log3(afsd_logp, "cm_Analyze called cm_ForceUpdateVolume cell %u vol %u code 0x%x",
+                        fidp->cell, fidp->volume, code);
+            }
 
-        if (free_svr_list) {
-            cm_FreeServerList(&serversp, 0);
-            *serverspp = serversp;
+            if (statep) {
+                cm_UpdateVolumeStatus(volp, statep->ID);
+                osi_Log3(afsd_logp, "cm_Analyze NewVolState cell %u vol %u state %u", 
+                         fidp->cell, fidp->volume, statep->state);
+            }
+
+            if (volp) {
+                lock_ObtainRead(&cm_volumeLock);
+                cm_PutVolume(volp);
+                lock_ReleaseRead(&cm_volumeLock);
+                volp = NULL;
+            }
         }
+
         if ( timeLeft > 2 )
             retry = 1;
     } else if ( errorCode == VNOVNODE ) {
@@ -798,6 +818,9 @@ long cm_ConnByMServers(cm_serverRef_t *serversp, cm_user_t *usersp,
 
     lock_ObtainRead(&cm_serverLock);
     for (tsrp = serversp; tsrp; tsrp=tsrp->next) {
+        if (tsrp->status == srv_deleted)
+            continue;
+
         tsp = tsrp->server;
         if (reqp->tokenIdleErrorServp) {
             /* 
@@ -813,7 +836,7 @@ long cm_ConnByMServers(cm_serverRef_t *serversp, cm_user_t *usersp,
         if (tsp) {
             cm_GetServerNoLock(tsp);
             lock_ReleaseRead(&cm_serverLock);
-            if ((tsrp->status != srv_deleted) && !(tsp->flags & CM_SERVERFLAG_DOWN)) {
+            if (!(tsp->flags & CM_SERVERFLAG_DOWN)) {
                 allDown = 0;
                 if (tsrp->status == srv_busy) {
                     allOffline = 0;
@@ -1042,8 +1065,10 @@ long cm_ServerAvailable(struct cm_fid *fidp, struct cm_user *userp)
 
     lock_ObtainRead(&cm_serverLock);
     for (tsrp = *serverspp; tsrp; tsrp=tsrp->next) {
+        if (tsrp->status == srv_deleted)
+            continue;
         tsp = tsrp->server;
-        if ((tsrp->status != srv_deleted) && !(tsp->flags & CM_SERVERFLAG_DOWN)) {
+        if (!(tsp->flags & CM_SERVERFLAG_DOWN)) {
            allDown = 0;
             if (tsrp->status == srv_busy) {
                allOffline = 0;
index 0bda7f65b126ee85549055e1eda153d57a720aed..46dcf80f5f5bb5ec26098c279a4bac22567c3d8e 100644 (file)
@@ -3049,10 +3049,12 @@ cm_CheckServersStatus(cm_serverRef_t *serversp)
 
     lock_ObtainRead(&cm_serverLock);
     for (tsrp = serversp; tsrp; tsrp=tsrp->next) {
+        if (tsrp->status == srv_deleted)
+            continue;
         if (tsp = tsrp->server) {
             cm_GetServerNoLock(tsp);
             lock_ReleaseRead(&cm_serverLock);
-            if ((tsrp->status == srv_deleted) && !(tsp->flags & CM_SERVERFLAG_DOWN)) {
+            if (!(tsp->flags & CM_SERVERFLAG_DOWN)) {
                 allDown = 0;
                 if (tsrp->status == srv_busy) {
                     allOffline = 0;
index eba1fd246e404727521da30fe605ae29848c965d..a2748d5f155f3b7d50113451a76675d744c1c8a0 100644 (file)
@@ -1034,6 +1034,8 @@ LONG_PTR cm_ChecksumServerList(cm_serverRef_t *serversp)
 
     lock_ObtainRead(&cm_serverLock);
     for (tsrp = serversp; tsrp; tsrp=tsrp->next) {
+        if (tsrp->status == srv_deleted)
+            continue;
         if (first)
             first = 0;
         else
@@ -1259,6 +1261,9 @@ void cm_FreeServerList(cm_serverRef_t** list, afs_uint32 flags)
     cm_serverRef_t  **nextp = 0;
     cm_serverRef_t  * next = 0;
 
+       if (*list == NULL)
+               return;
+
     lock_ObtainWrite(&cm_serverLock);
 
     while (*current)
index 5af199377c35b383812c5aa4f011a4d0a2094dfb..d0dd1d6014af6b9b81ce5fdb68a23b7512a18bf8 100644 (file)
@@ -227,7 +227,9 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t *
             cm_UpdateCell(cellp, 0);
 
         /* now we have volume structure locked and held; make RPC to fill it */
-       osi_Log2(afsd_logp, "CALL VL_GetEntryByName{UNO} name %s:%s", volp->cellp->name, volp->namep);
+       osi_Log2(afsd_logp, "CALL VL_GetEntryByName{UNO} name %s:%s", 
+                  osi_LogSaveString(afsd_logp,volp->cellp->name), 
+                  osi_LogSaveString(afsd_logp,volp->namep));
         do {
             struct rx_connection * rxconnp;
 
@@ -254,10 +256,12 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t *
         code = cm_MapVLRPCError(code, reqp);
        if ( code )
            osi_Log3(afsd_logp, "CALL VL_GetEntryByName{UNO} name %s:%s FAILURE, code 0x%x", 
-                     volp->cellp->name, volp->namep, code);
+                     osi_LogSaveString(afsd_logp,volp->cellp->name), 
+                      osi_LogSaveString(afsd_logp,volp->namep), code);
        else
            osi_Log2(afsd_logp, "CALL VL_GetEntryByName{UNO} name %s:%s SUCCESS", 
-                     volp->cellp->name, volp->namep);
+                     osi_LogSaveString(afsd_logp,volp->cellp->name), 
+                      osi_LogSaveString(afsd_logp,volp->namep));
     }
 
     /* We can end up here with code == CM_ERROR_NOSUCHVOLUME if the base volume name
@@ -271,7 +275,8 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t *
         snprintf(name, VL_MAXNAMELEN, "%s.readonly", volp->namep);
                 
         /* now we have volume structure locked and held; make RPC to fill it */
-       osi_Log2(afsd_logp, "CALL VL_GetEntryByName{UNO} name %s:%s", volp->cellp->name, 
+       osi_Log2(afsd_logp, "CALL VL_GetEntryByName{UNO} name %s:%s", 
+                 osi_LogSaveString(afsd_logp,volp->cellp->name),
                  osi_LogSaveString(afsd_logp,name));
         do {
             struct rx_connection * rxconnp;
@@ -299,10 +304,12 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t *
         code = cm_MapVLRPCError(code, reqp);
        if ( code )
            osi_Log3(afsd_logp, "CALL VL_GetEntryByName{UNO} name %s:%s FAILURE, code 0x%x", 
-                     volp->cellp->name, osi_LogSaveString(afsd_logp,name), code);
+                    osi_LogSaveString(afsd_logp,volp->cellp->name), 
+                     osi_LogSaveString(afsd_logp,name), code);
        else
            osi_Log2(afsd_logp, "CALL VL_GetEntryByName{UNO} name %s:%s SUCCESS", 
-                     volp->cellp->name, osi_LogSaveString(afsd_logp,name));
+                    osi_LogSaveString(afsd_logp,volp->cellp->name), 
+                     osi_LogSaveString(afsd_logp,name));
     }
     
     lock_ObtainWrite(&volp->rw);
@@ -427,7 +434,8 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t *
                 name[len - 9] = '\0';
             }
             
-            osi_Log2(afsd_logp, "cm_UpdateVolume name %s -> %s", volp->namep, name);
+            osi_Log2(afsd_logp, "cm_UpdateVolume name %s -> %s", 
+                     osi_LogSaveString(afsd_logp,volp->namep), osi_LogSaveString(afsd_logp,name));
 
             if (volp->flags & CM_VOLUMEFLAG_IN_HASH)
                 cm_RemoveVolumeFromNameHashTable(volp);
@@ -558,6 +566,7 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t *
             cm_RandomizeServer(&volp->vol[ROVOL].serversp);
         }
 
+
         rwNewstate = rwServers_alldown ? vl_alldown : vl_online;
         roNewstate = roServers_alldown ? vl_alldown : vl_online;
         bkNewstate = bkServers_alldown ? vl_alldown : vl_online;
@@ -604,7 +613,8 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t *
 
     volp->flags &= ~CM_VOLUMEFLAG_UPDATING_VL;
     osi_Log4(afsd_logp, "cm_UpdateVolumeLocation done, waking others name %s:%s flags 0x%x code 0x%x", 
-             volp->cellp->name, volp->namep, volp->flags, code);
+             osi_LogSaveString(afsd_logp,volp->cellp->name), 
+             osi_LogSaveString(afsd_logp,volp->namep), volp->flags, code);
     osi_Wakeup((LONG_PTR) &volp->flags);
 
     return code;
@@ -846,6 +856,7 @@ long cm_FindVolumeByName(struct cm_cell *cellp, char *volumeNamep,
                     cm_VolumeStatusNotification(volp, volp->vol[volType].ID, volp->vol[volType].state, vl_unknown);
                 volp->vol[volType].ID = 0;
                 cm_SetFid(&volp->vol[volType].dotdotFid, 0, 0, 0, 0);
+                cm_FreeServerList(&volp->vol[volType].serversp, CM_FREESERVERLIST_DELETE);
             }
        } else {
            volp = &cm_data.volumeBaseAddress[cm_data.currentVolumes++];
@@ -999,7 +1010,7 @@ long cm_ForceUpdateVolume(cm_fid_t *fidp, cm_user_t *userp, cm_req_t *reqp)
 cm_serverRef_t **cm_GetVolServers(cm_volume_t *volp, afs_uint32 volume)
 {
     cm_serverRef_t **serverspp;
-    cm_serverRef_t *current;;
+    cm_serverRef_t *current;
 
     lock_ObtainWrite(&cm_serverLock);
 
@@ -1012,7 +1023,11 @@ cm_serverRef_t **cm_GetVolServers(cm_volume_t *volp, afs_uint32 volume)
     else 
         osi_panic("bad volume ID in cm_GetVolServers", __FILE__, __LINE__);
         
-    for (current = *serverspp; current; current = current->next)
+    /* 
+     * Increment the refCount on deleted items as well.
+     * They will be freed by cm_FreeServerList when they get to zero 
+     */
+    for (current = *serverspp; current; current = current->next) 
         current->refCount++;
 
     lock_ReleaseWrite(&cm_serverLock);
@@ -1120,12 +1135,14 @@ cm_CheckOfflineVolumeState(cm_volume_t *volp, cm_vol_state_t *statep, afs_uint32
             alldown = 1;
             alldeleted = 1;
             for (serversp = statep->serversp; serversp; serversp = serversp->next) {
-                if (serversp->status != srv_deleted) {
-                    alldeleted = 0;
-                    *onlinep = 1;
-                    alldown = 0;
-                }
-                if (serversp->status == srv_busy || serversp->status == srv_offline) 
+                if (serversp->status == srv_deleted)
+                    continue;
+
+                alldeleted = 0;
+                *onlinep = 1;
+                alldown = 0;
+                
+                if (serversp->status == srv_busy || serversp->status == srv_offline)
                     serversp->status = srv_not_busy;
             }
 
@@ -1240,6 +1257,8 @@ cm_UpdateVolumeStatusInt(cm_volume_t *volp, struct cm_vol_state *statep)
 
     lock_ObtainWrite(&cm_serverLock);
     for (tsrp = statep->serversp; tsrp; tsrp=tsrp->next) {
+        if (tsrp->status == srv_deleted)
+            continue;
         tsp = tsrp->server;
         if (tsp) {
             cm_GetServerNoLock(tsp);