]> git.michaelhowe.org Git - packages/o/openafs.git/commitdiff
windows-vnovol-20080912
authorJeffrey Altman <jaltman@secure-endpoints.com>
Sat, 13 Sep 2008 05:20:02 +0000 (05:20 +0000)
committerJeffrey Altman <jaltman@secure-endpoints.com>
Sat, 13 Sep 2008 05:20:02 +0000 (05:20 +0000)
LICENSE MIT

The cm_serverRef_t list reference counts were undercounting and
prematurely freeing the server lists for volumes that experienced
VNOVOL and VMOVED errors.  cm_Analyze() must release the server
list before forcibly updating the volume location info.  Otherwise,
the list that gets freed is the old one concatenated with the new
one.

Add more trace messages.

src/WINNT/afsd/cm_callback.c
src/WINNT/afsd/cm_conn.c
src/WINNT/afsd/cm_ioctl.c
src/WINNT/afsd/cm_server.c
src/WINNT/afsd/cm_volume.c

index 899c9b1733d222f9f3b4d5632ac9fc0293dc8094..1ce5b001dcd84b73f8828ee915f340a876a38842 100644 (file)
@@ -1867,6 +1867,8 @@ long cm_CBServersUp(cm_scache_t *scp, time_t * downTime)
         return 1;
 
     for (found = 0,tsrp = statep->serversp; tsrp; tsrp=tsrp->next) {
+        if (tsrp->status == srv_deleted)
+            continue;
         if (tsrp->server == scp->cbServerp)
             found = 1;
         if (tsrp->server->downTime > *downTime)
index c919ebb9c0af4e4c90a2571d8f952bac86a6e02d..79e3294fc4cb88dfa2a46afec765503e3f9de73f 100644 (file)
@@ -232,6 +232,8 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp,
         if (cellp == NULL && serversp) {
             struct cm_serverRef * refp;
             for ( refp=serversp ; cellp == NULL && refp != NULL; refp=refp->next) {
+                if (refp->status == srv_deleted)
+                    continue;
                 if ( refp->server )
                     cellp = refp->server->cellp;
             }
@@ -335,6 +337,8 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp,
                         }
                         lock_ObtainWrite(&cm_serverLock);
                         for (tsrp = serversp; tsrp; tsrp=tsrp->next) {
+                            if (tsrp->status == srv_deleted)
+                                continue;
                             if (tsrp->status == srv_busy) {
                                 tsrp->status = srv_not_busy;
                             }       
@@ -342,7 +346,8 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp,
                         lock_ReleaseWrite(&cm_serverLock);
                         if (free_svr_list) {
                             cm_FreeServerList(&serversp, 0);
-                            *serverspp = serversp;
+                            *serverspp = serversp = NULL;
+                            free_svr_list = 0;
                         }
 
                         cm_UpdateVolumeStatus(volp, fidp->volume);
@@ -364,6 +369,8 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp,
                 if (serversp) {
                     lock_ObtainWrite(&cm_serverLock);
                     for (tsrp = serversp; tsrp; tsrp=tsrp->next) {
+                        if (tsrp->status == srv_deleted)
+                            continue;
                         if (tsrp->status == srv_busy) {
                             tsrp->status = srv_not_busy;
                         }
@@ -386,6 +393,8 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp,
         }
         lock_ObtainWrite(&cm_serverLock);
         for (tsrp = serversp; tsrp; tsrp=tsrp->next) {
+            if (tsrp->status == srv_deleted)
+                continue;
             if (tsrp->server == serverp && tsrp->status == srv_not_busy) {
                 tsrp->status = srv_busy;
                 if (fidp) { /* File Server query */
@@ -412,7 +421,8 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp,
 
         if (free_svr_list) {
             cm_FreeServerList(&serversp, 0);
-            *serverspp = serversp;
+            *serverspp = serversp = NULL;
+            free_svr_list = 0;
         }
         retry = 1;
     }
@@ -467,13 +477,15 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp,
         if (!serversp && fidp) {
             code = cm_GetServerList(fidp, userp, reqp, &serverspp);
             if (code == 0) {
-                serversp = *serverspp;
+                serversp = *serverspp = NULL;
                 free_svr_list = 1;
             }
         }
 
         lock_ObtainWrite(&cm_serverLock);
         for (tsrp = serversp; tsrp; tsrp=tsrp->next) {
+            if (tsrp->status == srv_deleted)
+                continue;
             if (tsrp->server == serverp) {
                 /* REDIRECT */
                 if (errorCode == VMOVED || errorCode == VNOVOL) {
@@ -483,38 +495,46 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp,
                 } else {
                     tsrp->status = srv_offline;
                 }
-
-                if (fidp) { /* File Server query */
-                    lock_ReleaseWrite(&cm_serverLock);
-                    code = cm_FindVolumeByID(cellp, fidp->volume, userp, reqp, 
-                                             CM_GETVOL_FLAG_NO_LRU_UPDATE, 
-                                             &volp);
-                    if (code == 0)
-                        cm_VolumeStateByID(volp, fidp->volume);
-                    lock_ObtainWrite(&cm_serverLock);
-                }   
             }
         }   
         lock_ReleaseWrite(&cm_serverLock);
 
-        if (fidp && (errorCode == VMOVED || errorCode == VNOVOL)) {
-            code = cm_ForceUpdateVolume(fidp, userp, reqp);
-            if (code) 
-                timeLeft = 0;   /* prevent a retry on failure */
+        /* Free the server list before cm_ForceUpdateVolume is called */
+        if (free_svr_list) {
+            cm_FreeServerList(&serversp, 0);
+            *serverspp = serversp = NULL;
+            free_svr_list = 0;
         }
 
-        if (statep) {
-            cm_UpdateVolumeStatus(volp, statep->ID);
-            lock_ObtainRead(&cm_volumeLock);
-            cm_PutVolume(volp);
-            lock_ReleaseRead(&cm_volumeLock);
-            volp = NULL;
-        }
+        if (fidp) { /* File Server query */
+            code = cm_FindVolumeByID(cellp, fidp->volume, userp, reqp, 
+                                      CM_GETVOL_FLAG_NO_LRU_UPDATE, 
+                                      &volp);
+            if (code == 0)
+                statep = cm_VolumeStateByID(volp, fidp->volume);
+
+            if (errorCode == VMOVED || errorCode == VNOVOL) {
+                code = cm_ForceUpdateVolume(fidp, userp, reqp);
+                if (code) 
+                    timeLeft = 0;   /* prevent a retry on failure */
+                osi_Log3(afsd_logp, "cm_Analyze called cm_ForceUpdateVolume cell %u vol %u code 0x%x",
+                        fidp->cell, fidp->volume, code);
+            }
 
-        if (free_svr_list) {
-            cm_FreeServerList(&serversp, 0);
-            *serverspp = serversp;
+            if (statep) {
+                cm_UpdateVolumeStatus(volp, statep->ID);
+                osi_Log3(afsd_logp, "cm_Analyze NewVolState cell %u vol %u state %u", 
+                         fidp->cell, fidp->volume, statep->state);
+            }
+
+            if (volp) {
+                lock_ObtainRead(&cm_volumeLock);
+                cm_PutVolume(volp);
+                lock_ReleaseRead(&cm_volumeLock);
+                volp = NULL;
+            }
         }
+
         if ( timeLeft > 2 )
             retry = 1;
     } else if ( errorCode == VNOVNODE ) {
@@ -798,6 +818,9 @@ long cm_ConnByMServers(cm_serverRef_t *serversp, cm_user_t *usersp,
 
     lock_ObtainRead(&cm_serverLock);
     for (tsrp = serversp; tsrp; tsrp=tsrp->next) {
+        if (tsrp->status == srv_deleted)
+            continue;
+
         tsp = tsrp->server;
         if (reqp->tokenIdleErrorServp) {
             /* 
@@ -813,7 +836,7 @@ long cm_ConnByMServers(cm_serverRef_t *serversp, cm_user_t *usersp,
         if (tsp) {
             cm_GetServerNoLock(tsp);
             lock_ReleaseRead(&cm_serverLock);
-            if ((tsrp->status != srv_deleted) && !(tsp->flags & CM_SERVERFLAG_DOWN)) {
+            if (!(tsp->flags & CM_SERVERFLAG_DOWN)) {
                 allDown = 0;
                 if (tsrp->status == srv_busy) {
                     allOffline = 0;
@@ -1042,8 +1065,10 @@ long cm_ServerAvailable(struct cm_fid *fidp, struct cm_user *userp)
 
     lock_ObtainRead(&cm_serverLock);
     for (tsrp = *serverspp; tsrp; tsrp=tsrp->next) {
+        if (tsrp->status == srv_deleted)
+            continue;
         tsp = tsrp->server;
-        if ((tsrp->status != srv_deleted) && !(tsp->flags & CM_SERVERFLAG_DOWN)) {
+        if (!(tsp->flags & CM_SERVERFLAG_DOWN)) {
            allDown = 0;
             if (tsrp->status == srv_busy) {
                allOffline = 0;
index 9f5bee81d2a34b15fbbc93e8ef7956022a9a09e4..3086bb3c694f5bc9534e552357c0f4087c9f706a 100644 (file)
@@ -3048,10 +3048,12 @@ cm_CheckServersStatus(cm_serverRef_t *serversp)
 
     lock_ObtainRead(&cm_serverLock);
     for (tsrp = serversp; tsrp; tsrp=tsrp->next) {
+        if (tsrp->status == srv_deleted)
+            continue;
         if (tsp = tsrp->server) {
             cm_GetServerNoLock(tsp);
             lock_ReleaseRead(&cm_serverLock);
-            if ((tsrp->status == srv_deleted) && !(tsp->flags & CM_SERVERFLAG_DOWN)) {
+            if (!(tsp->flags & CM_SERVERFLAG_DOWN)) {
                 allDown = 0;
                 if (tsrp->status == srv_busy) {
                     allOffline = 0;
index eba1fd246e404727521da30fe605ae29848c965d..a2748d5f155f3b7d50113451a76675d744c1c8a0 100644 (file)
@@ -1034,6 +1034,8 @@ LONG_PTR cm_ChecksumServerList(cm_serverRef_t *serversp)
 
     lock_ObtainRead(&cm_serverLock);
     for (tsrp = serversp; tsrp; tsrp=tsrp->next) {
+        if (tsrp->status == srv_deleted)
+            continue;
         if (first)
             first = 0;
         else
@@ -1259,6 +1261,9 @@ void cm_FreeServerList(cm_serverRef_t** list, afs_uint32 flags)
     cm_serverRef_t  **nextp = 0;
     cm_serverRef_t  * next = 0;
 
+       if (*list == NULL)
+               return;
+
     lock_ObtainWrite(&cm_serverLock);
 
     while (*current)
index 5af199377c35b383812c5aa4f011a4d0a2094dfb..d0dd1d6014af6b9b81ce5fdb68a23b7512a18bf8 100644 (file)
@@ -227,7 +227,9 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t *
             cm_UpdateCell(cellp, 0);
 
         /* now we have volume structure locked and held; make RPC to fill it */
-       osi_Log2(afsd_logp, "CALL VL_GetEntryByName{UNO} name %s:%s", volp->cellp->name, volp->namep);
+       osi_Log2(afsd_logp, "CALL VL_GetEntryByName{UNO} name %s:%s", 
+                  osi_LogSaveString(afsd_logp,volp->cellp->name), 
+                  osi_LogSaveString(afsd_logp,volp->namep));
         do {
             struct rx_connection * rxconnp;
 
@@ -254,10 +256,12 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t *
         code = cm_MapVLRPCError(code, reqp);
        if ( code )
            osi_Log3(afsd_logp, "CALL VL_GetEntryByName{UNO} name %s:%s FAILURE, code 0x%x", 
-                     volp->cellp->name, volp->namep, code);
+                     osi_LogSaveString(afsd_logp,volp->cellp->name), 
+                      osi_LogSaveString(afsd_logp,volp->namep), code);
        else
            osi_Log2(afsd_logp, "CALL VL_GetEntryByName{UNO} name %s:%s SUCCESS", 
-                     volp->cellp->name, volp->namep);
+                     osi_LogSaveString(afsd_logp,volp->cellp->name), 
+                      osi_LogSaveString(afsd_logp,volp->namep));
     }
 
     /* We can end up here with code == CM_ERROR_NOSUCHVOLUME if the base volume name
@@ -271,7 +275,8 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t *
         snprintf(name, VL_MAXNAMELEN, "%s.readonly", volp->namep);
                 
         /* now we have volume structure locked and held; make RPC to fill it */
-       osi_Log2(afsd_logp, "CALL VL_GetEntryByName{UNO} name %s:%s", volp->cellp->name, 
+       osi_Log2(afsd_logp, "CALL VL_GetEntryByName{UNO} name %s:%s", 
+                 osi_LogSaveString(afsd_logp,volp->cellp->name),
                  osi_LogSaveString(afsd_logp,name));
         do {
             struct rx_connection * rxconnp;
@@ -299,10 +304,12 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t *
         code = cm_MapVLRPCError(code, reqp);
        if ( code )
            osi_Log3(afsd_logp, "CALL VL_GetEntryByName{UNO} name %s:%s FAILURE, code 0x%x", 
-                     volp->cellp->name, osi_LogSaveString(afsd_logp,name), code);
+                    osi_LogSaveString(afsd_logp,volp->cellp->name), 
+                     osi_LogSaveString(afsd_logp,name), code);
        else
            osi_Log2(afsd_logp, "CALL VL_GetEntryByName{UNO} name %s:%s SUCCESS", 
-                     volp->cellp->name, osi_LogSaveString(afsd_logp,name));
+                    osi_LogSaveString(afsd_logp,volp->cellp->name), 
+                     osi_LogSaveString(afsd_logp,name));
     }
     
     lock_ObtainWrite(&volp->rw);
@@ -427,7 +434,8 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t *
                 name[len - 9] = '\0';
             }
             
-            osi_Log2(afsd_logp, "cm_UpdateVolume name %s -> %s", volp->namep, name);
+            osi_Log2(afsd_logp, "cm_UpdateVolume name %s -> %s", 
+                     osi_LogSaveString(afsd_logp,volp->namep), osi_LogSaveString(afsd_logp,name));
 
             if (volp->flags & CM_VOLUMEFLAG_IN_HASH)
                 cm_RemoveVolumeFromNameHashTable(volp);
@@ -558,6 +566,7 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t *
             cm_RandomizeServer(&volp->vol[ROVOL].serversp);
         }
 
+
         rwNewstate = rwServers_alldown ? vl_alldown : vl_online;
         roNewstate = roServers_alldown ? vl_alldown : vl_online;
         bkNewstate = bkServers_alldown ? vl_alldown : vl_online;
@@ -604,7 +613,8 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t *
 
     volp->flags &= ~CM_VOLUMEFLAG_UPDATING_VL;
     osi_Log4(afsd_logp, "cm_UpdateVolumeLocation done, waking others name %s:%s flags 0x%x code 0x%x", 
-             volp->cellp->name, volp->namep, volp->flags, code);
+             osi_LogSaveString(afsd_logp,volp->cellp->name), 
+             osi_LogSaveString(afsd_logp,volp->namep), volp->flags, code);
     osi_Wakeup((LONG_PTR) &volp->flags);
 
     return code;
@@ -846,6 +856,7 @@ long cm_FindVolumeByName(struct cm_cell *cellp, char *volumeNamep,
                     cm_VolumeStatusNotification(volp, volp->vol[volType].ID, volp->vol[volType].state, vl_unknown);
                 volp->vol[volType].ID = 0;
                 cm_SetFid(&volp->vol[volType].dotdotFid, 0, 0, 0, 0);
+                cm_FreeServerList(&volp->vol[volType].serversp, CM_FREESERVERLIST_DELETE);
             }
        } else {
            volp = &cm_data.volumeBaseAddress[cm_data.currentVolumes++];
@@ -999,7 +1010,7 @@ long cm_ForceUpdateVolume(cm_fid_t *fidp, cm_user_t *userp, cm_req_t *reqp)
 cm_serverRef_t **cm_GetVolServers(cm_volume_t *volp, afs_uint32 volume)
 {
     cm_serverRef_t **serverspp;
-    cm_serverRef_t *current;;
+    cm_serverRef_t *current;
 
     lock_ObtainWrite(&cm_serverLock);
 
@@ -1012,7 +1023,11 @@ cm_serverRef_t **cm_GetVolServers(cm_volume_t *volp, afs_uint32 volume)
     else 
         osi_panic("bad volume ID in cm_GetVolServers", __FILE__, __LINE__);
         
-    for (current = *serverspp; current; current = current->next)
+    /* 
+     * Increment the refCount on deleted items as well.
+     * They will be freed by cm_FreeServerList when they get to zero 
+     */
+    for (current = *serverspp; current; current = current->next) 
         current->refCount++;
 
     lock_ReleaseWrite(&cm_serverLock);
@@ -1120,12 +1135,14 @@ cm_CheckOfflineVolumeState(cm_volume_t *volp, cm_vol_state_t *statep, afs_uint32
             alldown = 1;
             alldeleted = 1;
             for (serversp = statep->serversp; serversp; serversp = serversp->next) {
-                if (serversp->status != srv_deleted) {
-                    alldeleted = 0;
-                    *onlinep = 1;
-                    alldown = 0;
-                }
-                if (serversp->status == srv_busy || serversp->status == srv_offline) 
+                if (serversp->status == srv_deleted)
+                    continue;
+
+                alldeleted = 0;
+                *onlinep = 1;
+                alldown = 0;
+                
+                if (serversp->status == srv_busy || serversp->status == srv_offline)
                     serversp->status = srv_not_busy;
             }
 
@@ -1240,6 +1257,8 @@ cm_UpdateVolumeStatusInt(cm_volume_t *volp, struct cm_vol_state *statep)
 
     lock_ObtainWrite(&cm_serverLock);
     for (tsrp = statep->serversp; tsrp; tsrp=tsrp->next) {
+        if (tsrp->status == srv_deleted)
+            continue;
         tsp = tsrp->server;
         if (tsp) {
             cm_GetServerNoLock(tsp);