From: Jeffrey Altman Date: Wed, 18 May 2005 23:03:02 +0000 (+0000) Subject: STABLE14-rx-makecall-race-fix-20050518 X-Git-Tag: openafs-devel-1_3_83~40 X-Git-Url: https://git.michaelhowe.org/gitweb/?a=commitdiff_plain;h=95c5005b388c4ed670c5db9c9ed3c129098824ca;p=packages%2Fo%2Fopenafs.git STABLE14-rx-makecall-race-fix-20050518 On at least one system it was noticed that threads waiting in rx_NewCall would starve forever (aka deadlock). This was the result of one out of two problems related to a race condition on the RX_CONN_MAKECALL_WAITING bit flag. This flag was set once in rx_NewCall and cleared in rx_EndCall. However, it was possible for the flag to be cleared even though there were additional flags waiting in rx_NewCall. This was due to a failure to check the value of makeCallWaiters before clearing the flag and also due to a failure to properly lock the access to the makeCallWaiters field. The second problem was an ability to destroy a connection on which threads are waiting within rx_NewCall. (cherry picked from commit 10f6e5d6e2960469eb4d0e75f62fa9b33629b132) --- diff --git a/src/rx/rx.c b/src/rx/rx.c index e2c474d6a..abe659151 100644 --- a/src/rx/rx.c +++ b/src/rx/rx.c @@ -1073,14 +1073,32 @@ rx_NewCall(register struct rx_connection *conn) * If so, let them go first to avoid starving them. * This is a fairly simple scheme, and might not be * a complete solution for large numbers of waiters. + * + * makeCallWaiters keeps track of the number of + * threads waiting to make calls and the + * RX_CONN_MAKECALL_WAITING flag bit is used to + * indicate that there are indeed calls waiting. + * The flag is set when the waiter is incremented. + * It is only cleared in rx_EndCall when + * makeCallWaiters is 0. This prevents us from + * accidently destroying the connection while it + * is potentially about to be used. */ + MUTEX_ENTER(&conn->conn_data_lock); if (conn->makeCallWaiters) { + conn->flags |= RX_CONN_MAKECALL_WAITING; + conn->makeCallWaiters++; + MUTEX_EXIT(&conn->conn_data_lock); + #ifdef RX_ENABLE_LOCKS - CV_WAIT(&conn->conn_call_cv, &conn->conn_call_lock); + CV_WAIT(&conn->conn_call_cv, &conn->conn_call_lock); #else - osi_rxSleep(conn); + osi_rxSleep(conn); #endif - } + MUTEX_ENTER(&conn->conn_data_lock); + conn->makeCallWaiters--; + } + MUTEX_EXIT(&conn->conn_data_lock); for (;;) { for (i = 0; i < RX_MAXCALLS; i++) { @@ -1103,15 +1121,17 @@ rx_NewCall(register struct rx_connection *conn) } MUTEX_ENTER(&conn->conn_data_lock); conn->flags |= RX_CONN_MAKECALL_WAITING; + conn->makeCallWaiters++; MUTEX_EXIT(&conn->conn_data_lock); - conn->makeCallWaiters++; #ifdef RX_ENABLE_LOCKS CV_WAIT(&conn->conn_call_cv, &conn->conn_call_lock); #else osi_rxSleep(conn); #endif + MUTEX_ENTER(&conn->conn_data_lock); conn->makeCallWaiters--; + MUTEX_EXIT(&conn->conn_data_lock); } /* * Wake up anyone else who might be giving us a chance to @@ -1876,6 +1896,9 @@ rx_EndCall(register struct rx_call *call, afs_int32 rc) * rx_NewCall is in a stable state. Otherwise, rx_NewCall may * have checked this call, found it active and by the time it * goes to sleep, will have missed the signal. + * + * Do not clear the RX_CONN_MAKECALL_WAITING flag as long as + * there are threads waiting to use the conn object. */ MUTEX_EXIT(&call->lock); MUTEX_ENTER(&conn->conn_call_lock); @@ -1883,7 +1906,8 @@ rx_EndCall(register struct rx_call *call, afs_int32 rc) MUTEX_ENTER(&conn->conn_data_lock); conn->flags |= RX_CONN_BUSY; if (conn->flags & RX_CONN_MAKECALL_WAITING) { - conn->flags &= (~RX_CONN_MAKECALL_WAITING); + if (conn->makeCallWaiters == 0) + conn->flags &= (~RX_CONN_MAKECALL_WAITING); MUTEX_EXIT(&conn->conn_data_lock); #ifdef RX_ENABLE_LOCKS CV_BROADCAST(&conn->conn_call_cv); @@ -2169,7 +2193,7 @@ rxi_FreeCall(register struct rx_call *call) * If someone else destroys a connection, they either have no * call lock held or are going through this section of code. */ - if (conn->flags & RX_CONN_DESTROY_ME) { + if (conn->flags & RX_CONN_DESTROY_ME && !(conn->flags & RX_CONN_MAKECALL_WAITING)) { MUTEX_ENTER(&conn->conn_data_lock); conn->refCount++; MUTEX_EXIT(&conn->conn_data_lock);