From 036c01d2f129b7118a77ecd6d89fbc779d91c224 Mon Sep 17 00:00:00 2001 From: Andrew Deason Date: Thu, 2 Nov 2017 16:41:52 -0500 Subject: [PATCH] rx: Convert rxinit_status to rx_IsRunning() Currently, all rx code examines the atomic rxinit_status to determine if rx is running (that is, if rx_InitHost has been called, and rx_Finalize/shutdown_rx hasn't been called). This is used in rx.c to see if we're redundantly calling our setup/teardown functions, and outside of rx.c in a couple of places to see if rx-related resources have been initialized. The usage of rxinit_status is a little confusing, since setting bit 0 indicates that rx is not running, and clearing bit 0 indicates rx is running. Since using rxinit_status requires atomic functions, this makes code checking or setting rxinit_status a little verbose, and it can be hard to see what it is checking for. (For example, does 'if (!rx_atomic_test_and_clear_bit(&rxinit_status, 0))' succeed when rx running, or when rx is not running?) The current usage of rxinit_status in rx_InitHost also does not handle initialization errors correctly. rx_InitHost clears rxinit_status near the beginning of the function, but does not set rxinit_status if an error is encountered. This means that any code that checks rxinit_status (such as another rx_InitHost call) will think that rx was initialized successfully, but various resources aren't actually setup. This can cause segfaults and other errors as the code tries to actually use rx. This can easily be seen in bosserver, if bosserver is started up while the local host/port is in use by someone else. bosserver will try to rx_InitHost, which will fail, and then we'll try to rx_InitHost again, which will immediately succeed without doing any init. We then segfault quickly afterwards as we try to use unitialized rx resources. To fix all of this, refactor code using rxinit_status to use a new function, called rx_IsRunning(), to make it a little clearer what we're checking for. We also re-introduce the LOCK_RX_INIT locks to prevent functions like rx_InitHost and rx_Finalize from running in parallel. Note that non-init/shutdown code (such as rx_upcall or rx_GetIFInfo) does not need to wait for LOCK_RX_INIT to check if rx is running or not. These functions only care if rx is currently setup enough to be used, so we can immediately return a 'yes' or 'no' answer. That is, if rx_InitHost is in the middle of running, rx_IsRunning returns 0, since some resouces may not be fully initialized. Reviewed-on: https://gerrit.openafs.org/12761 Reviewed-by: Michael Meffie Reviewed-by: Benjamin Kaduk Tested-by: BuildBot (cherry picked from commit 5ced6025b9f11fadbdf2e092bf40cc87499ed277) Change-Id: I38ef9e3aea8a1f20e9db488a44da4535f76432d1 Reviewed-on: https://gerrit.openafs.org/13416 Tested-by: BuildBot Reviewed-by: Michael Meffie Reviewed-by: Cheyenne Wills Reviewed-by: Andrew Deason Reviewed-by: Stephan Wiesand --- src/rx/DARWIN/rx_knet.c | 4 +- src/rx/rx.c | 87 +++++++++++++++++++++++++++++++---------- src/rx/rx_internal.h | 1 + src/rx/rx_user.c | 7 ++-- 4 files changed, 72 insertions(+), 27 deletions(-) diff --git a/src/rx/DARWIN/rx_knet.c b/src/rx/DARWIN/rx_knet.c index 8cc24c8e9..ca7f2c6fd 100644 --- a/src/rx/DARWIN/rx_knet.c +++ b/src/rx/DARWIN/rx_knet.c @@ -22,8 +22,6 @@ #endif #ifdef RXK_UPCALL_ENV -extern rx_atomic_t rxinit_status; - void rx_upcall(socket_t so, void *arg, __unused int waitflag) { @@ -41,7 +39,7 @@ rx_upcall(socket_t so, void *arg, __unused int waitflag) size_t nbytes, resid, noffset; /* we stopped rx but the socket isn't closed yet */ - if (rx_atomic_test_bit(&rxinit_status, 0)) + if (!rxi_IsRunning()) return; /* See if a check for additional packets was issued */ diff --git a/src/rx/rx.c b/src/rx/rx.c index 651136a2a..43f094bc0 100644 --- a/src/rx/rx.c +++ b/src/rx/rx.c @@ -160,6 +160,12 @@ static void rxi_CancelDelayedAbortEvent(struct rx_call *call); static void rxi_CancelGrowMTUEvent(struct rx_call *call); static void update_nextCid(void); +#ifndef KERNEL +static void rxi_Finalize_locked(void); +#elif defined(UKERNEL) +# define rxi_Finalize_locked() do { } while (0) +#endif + #ifdef RX_ENABLE_LOCKS struct rx_tq_debug { rx_atomic_t rxi_start_aborted; /* rxi_start awoke after rxi_Send in error.*/ @@ -442,19 +448,35 @@ static int rxdb_fileID = RXDB_FILE_RX; #endif /* RX_ENABLE_LOCKS */ struct rx_serverQueueEntry *rx_waitForPacket = 0; +/* + * This mutex serializes calls to our initialization and shutdown routines + * (rx_InitHost, rx_Finalize and shutdown_rx). Only one thread can be running + * these at any time; all other threads must wait for it to finish running, and + * then examine the value of rxi_running afterwards. + */ +#ifdef AFS_PTHREAD_ENV +# define LOCK_RX_INIT MUTEX_ENTER(&rx_init_mutex) +# define UNLOCK_RX_INIT MUTEX_EXIT(&rx_init_mutex) +#else +# define LOCK_RX_INIT +# define UNLOCK_RX_INIT +#endif + /* ------------Exported Interfaces------------- */ +static rx_atomic_t rxi_running = RX_ATOMIC_INIT(0); +int +rxi_IsRunning(void) +{ + return rx_atomic_read(&rxi_running); +} + /* Initialize rx. A port number may be mentioned, in which case this * becomes the default port number for any service installed later. * If 0 is provided for the port number, a random port will be chosen * by the kernel. Whether this will ever overlap anything in * /etc/services is anybody's guess... Returns 0 on success, -1 on * error. */ -#if !(defined(AFS_NT40_ENV) || defined(RXK_UPCALL_ENV)) -static -#endif -rx_atomic_t rxinit_status = RX_ATOMIC_INIT(1); - int rx_InitHost(u_int host, u_int port) { @@ -468,15 +490,17 @@ rx_InitHost(u_int host, u_int port) SPLVAR; INIT_PTHREAD_LOCKS; - if (!rx_atomic_test_and_clear_bit(&rxinit_status, 0)) + LOCK_RX_INIT; + if (rxi_IsRunning()) { + UNLOCK_RX_INIT; return 0; /* already started */ - + } #ifdef RXDEBUG rxi_DebugInit(); #endif #ifdef AFS_NT40_ENV if (afs_winsockInit() < 0) - return -1; + goto error; #endif #ifndef KERNEL @@ -492,7 +516,7 @@ rx_InitHost(u_int host, u_int port) rx_socket = rxi_GetHostUDPSocket(host, (u_short) port); if (rx_socket == OSI_NULLSOCKET) { - return RX_ADDRINUSE; + goto addrinuse; } #if defined(RX_ENABLE_LOCKS) && defined(KERNEL) #ifdef RX_LOCKS_DB @@ -580,19 +604,19 @@ rx_InitHost(u_int host, u_int port) socklen_t addrlen = sizeof(addr); #endif if (getsockname((intptr_t)rx_socket, (struct sockaddr *)&addr, &addrlen)) { - rx_Finalize(); + rxi_Finalize_locked(); osi_Free(htable, rx_hashTableSize * sizeof(struct rx_connection *)); - return -1; + goto error; } rx_port = addr.sin_port; #endif } rx_stats.minRtt.sec = 9999999; if (RAND_bytes(&rx_epoch, sizeof(rx_epoch)) != 1) - return -1; + goto error; rx_epoch = (rx_epoch & ~0x40000000) | 0x80000000; if (RAND_bytes(&rx_nextCid, sizeof(rx_nextCid)) != 1) - return -1; + goto error; rx_nextCid &= RX_CIDMASK; MUTEX_ENTER(&rx_quota_mutex); rxi_dataQuota += rx_extraQuota; /* + extra pkts caller asked to rsrv */ @@ -623,8 +647,19 @@ rx_InitHost(u_int host, u_int port) rxi_StartListener(); USERPRI; - rx_atomic_clear_bit(&rxinit_status, 0); + + rx_atomic_set(&rxi_running, 1); + UNLOCK_RX_INIT; + return 0; + + addrinuse: + UNLOCK_RX_INIT; + return RX_ADDRINUSE; + + error: + UNLOCK_RX_INIT; + return -1; } int @@ -2492,12 +2527,21 @@ rx_EndCall(struct rx_call *call, afs_int32 rc) void rx_Finalize(void) { - struct rx_connection **conn_ptr, **conn_end; - INIT_PTHREAD_LOCKS; - if (rx_atomic_test_and_set_bit(&rxinit_status, 0)) + LOCK_RX_INIT; + if (!rxi_IsRunning()) { + UNLOCK_RX_INIT; return; /* Already shutdown. */ + } + rxi_Finalize_locked(); + UNLOCK_RX_INIT; +} +static void +rxi_Finalize_locked(void) +{ + struct rx_connection **conn_ptr, **conn_end; + rx_atomic_set(&rxi_running, 0); rxi_DeleteCachedConnections(); if (rx_connHashTable) { MUTEX_ENTER(&rx_connHashTable_lock); @@ -2534,7 +2578,6 @@ rx_Finalize(void) #ifdef AFS_NT40_ENV afs_winsockCleanup(); #endif - } #endif @@ -7843,9 +7886,12 @@ shutdown_rx(void) struct rx_serverQueueEntry *sq; #endif /* KERNEL */ - if (rx_atomic_test_and_set_bit(&rxinit_status, 0)) + LOCK_RX_INIT; + if (!rxi_IsRunning()) { + UNLOCK_RX_INIT; return; /* Already shutdown. */ - + } + rx_atomic_set(&rxi_running, 0); #ifndef KERNEL rx_port = 0; #ifndef AFS_PTHREAD_ENV @@ -7967,6 +8013,7 @@ shutdown_rx(void) rxi_dataQuota = RX_MAX_QUOTA; rxi_availProcs = rxi_totalMin = rxi_minDeficit = 0; MUTEX_EXIT(&rx_quota_mutex); + UNLOCK_RX_INIT; } #ifndef KERNEL diff --git a/src/rx/rx_internal.h b/src/rx/rx_internal.h index 800732e50..8d5fd79c6 100644 --- a/src/rx/rx_internal.h +++ b/src/rx/rx_internal.h @@ -20,6 +20,7 @@ extern rx_atomic_t rx_nWaited; /* Prototypes for internal functions */ /* rx.c */ +extern int rxi_IsRunning(void); extern void rxi_CancelDelayedAckEvent(struct rx_call *); extern void rxi_PacketsUnWait(void); extern void rxi_SetPeerMtu(struct rx_peer *peer, afs_uint32 host, diff --git a/src/rx/rx_user.c b/src/rx/rx_user.c index 479349404..0ba068b5a 100644 --- a/src/rx/rx_user.c +++ b/src/rx/rx_user.c @@ -352,7 +352,6 @@ rx_getAllAddrMaskMtu(afs_uint32 addrBuffer[], afs_uint32 maskBuffer[], #endif #ifdef AFS_NT40_ENV -extern rx_atomic_t rxinit_status; void rxi_InitMorePackets(void) { int npackets, ncbufs; @@ -373,7 +372,7 @@ rx_GetIFInfo(void) LOCK_IF_INIT; if (Inited) { - if (Inited < 2 && !rx_atomic_test_bit(&rxinit_status, 0)) { + if (Inited < 2 && rxi_IsRunning()) { /* We couldn't initialize more packets earlier. * Do it now. */ rxi_InitMorePackets(); @@ -407,11 +406,11 @@ rx_GetIFInfo(void) UNLOCK_IF; /* - * If rxinit_status is still set, rx_InitHost() has yet to be called + * If rxi_IsRunning is false, rx_InitHost() has yet to be called * and we therefore do not have any mutex locks initialized. As a * result we cannot call rxi_MorePackets() without crashing. */ - if (rx_atomic_test_bit(&rxinit_status, 0)) + if (!rxi_IsRunning()) return; rxi_InitMorePackets(); -- 2.39.5