From 920a00e075b679f59e31b9fcbe7f5db15e345a95 Mon Sep 17 00:00:00 2001 From: Andrew Deason Date: Tue, 29 Mar 2011 12:28:46 -0500 Subject: [PATCH] vol: Add timeouts to SYNC server select() calls Normally *SYNC server processes wait indefinitely for activity to occur on one of the SYNC sockets. On some Linux kernels, there exists a race condition where data can come in on a socket, but the select() call continues to wait. To ensure that we do not hang forever in such a scenario, add a timeout to the select() call, which will ensure we notice the new data within 10 seconds. Raise the timeout on non-Linux to reduce impact elsewhere. The Linux kernel bug is tracked in , though that bug report may not represent all affected kernels. Change-Id: I3250eb53d59610ccbcffe9e8e283984d5ae0e2b4 Reviewed-on: http://gerrit.openafs.org/4377 Tested-by: BuildBot Reviewed-by: Derrick Brashear --- src/vol/daemon_com.h | 9 +++++++++ src/vol/fssync-server.c | 5 ++++- src/vol/salvsync-server.c | 5 ++++- 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/src/vol/daemon_com.h b/src/vol/daemon_com.h index 81fd38c21..249c270da 100644 --- a/src/vol/daemon_com.h +++ b/src/vol/daemon_com.h @@ -87,6 +87,15 @@ enum SYNCReasonCode { afs_int64 _##buf##_l[SYNC_PROTO_MAX_LEN/sizeof(afs_int64)]; \ char * buf = (char *)(_##buf##_l) +#ifdef AFS_LINUX26_ENV +/* Some Linux kernels have a bug where we are not woken up immediately from a + * select() when data is available. Work around this by having a low select() + * timeout, so we don't hang in those situations. */ +# define SYNC_SELECT_TIMEOUT 10 +#else +# define SYNC_SELECT_TIMEOUT 86400 +#endif + #ifdef USE_UNIX_SOCKETS #include #include diff --git a/src/vol/fssync-server.c b/src/vol/fssync-server.c index 0d2aa7e06..0b50bde51 100644 --- a/src/vol/fssync-server.c +++ b/src/vol/fssync-server.c @@ -310,12 +310,15 @@ FSYNC_sync(void * args) CallHandler(FSYNC_readfds, nfds, POLLIN|POLLPRI); #else int maxfd; + struct timeval s_timeout; GetHandler(&FSYNC_readfds, &maxfd); + s_timeout.tv_sec = SYNC_SELECT_TIMEOUT; + s_timeout.tv_usec = 0; /* Note: check for >= 1 below is essential since IOMGR_select * doesn't have exactly same semantics as select. */ #ifdef AFS_PTHREAD_ENV - if (select(maxfd + 1, &FSYNC_readfds, NULL, NULL, NULL) >= 1) + if (select(maxfd + 1, &FSYNC_readfds, NULL, NULL, &s_timeout) >= 1) #else /* AFS_PTHREAD_ENV */ if (IOMGR_Select(maxfd + 1, &FSYNC_readfds, NULL, NULL, NULL) >= 1) #endif /* AFS_PTHREAD_ENV */ diff --git a/src/vol/salvsync-server.c b/src/vol/salvsync-server.c index f6b530a22..f054dd6c6 100644 --- a/src/vol/salvsync-server.c +++ b/src/vol/salvsync-server.c @@ -342,11 +342,14 @@ SALVSYNC_syncThread(void * args) for (;;) { int maxfd; + struct timeval s_timeout; GetHandler(&SALVSYNC_readfds, &maxfd); + s_timeout.tv_sec = SYNC_SELECT_TIMEOUT; + s_timeout.tv_usec = 0; /* Note: check for >= 1 below is essential since IOMGR_select * doesn't have exactly same semantics as select. */ - if (select(maxfd + 1, &SALVSYNC_readfds, NULL, NULL, NULL) >= 1) + if (select(maxfd + 1, &SALVSYNC_readfds, NULL, NULL, &s_timeout) >= 1) CallHandler(&SALVSYNC_readfds); } -- 2.39.5