From: Simon Wilkinson Date: Fri, 17 Jun 2011 21:06:54 +0000 (+0100) Subject: rx: Compute smoothed RTT per call, not per peer. X-Git-Tag: upstream/1.8.0_pre1^2~3621 X-Git-Url: https://git.michaelhowe.org/gitweb/?a=commitdiff_plain;h=39484c6e57cf993a713b4a989d1c0c227e6f496c;p=packages%2Fo%2Fopenafs.git rx: Compute smoothed RTT per call, not per peer. RX uses the TCP RTT smoothing algorithm as described in RFC2988. However, the TCP algorithm is designed to accept samples from a single connection, accepting a new sample once per RTT. RFC2988 suggests that "when multiple samples are taken per RTT the [ alogrithm ] may keep an inadequate RTT history." In RX's implementation, we use a single instance of this alogrithm per peer, and input all of the samples from all of the active calls and connections into this same instance. This leads to us taking a significantly (potentially many magnitudes) larger number of samples per RTT, and rapidly losing the RTT history. With RX's implementation, short lived network events may easily bias the RTT, and cause large numbers of packets to time out. This change fixes this by moving the RTT calculation onto a per call basis. We still update the peer with our caclulated value, so that new calls may be created with an RTT corresponding to the current value for the connection, rather than having to start high and converge downwards. Change-Id: I2ed6bce63adf160c03518686ec25cbecc5084f5f Reviewed-on: http://gerrit.openafs.org/4861 Reviewed-by: Jeffrey Altman Reviewed-by: Derrick Brashear Tested-by: Derrick Brashear --- diff --git a/src/rx/rx.c b/src/rx/rx.c index 5c0873646..0131f76cd 100644 --- a/src/rx/rx.c +++ b/src/rx/rx.c @@ -98,7 +98,8 @@ int (*swapNameProgram) (PROCESS, const char *, char *) = 0; /* Local static routines */ static void rxi_DestroyConnectionNoLock(struct rx_connection *conn); static void rxi_ComputeRoundTripTime(struct rx_packet *, struct rx_ackPacket *, - struct rx_peer *, struct clock *); + struct rx_call *, struct rx_peer *, + struct clock *); #ifdef RX_ENABLE_LOCKS static void rxi_SetAcksInTransmitQueue(struct rx_call *call); @@ -614,6 +615,17 @@ rx_Init(u_int port) return rx_InitHost(htonl(INADDR_ANY), port); } +/** + * Set an initial round trip timeout for a peer connection + * + * @param[in] secs The timeout to set in seconds + */ + +void +rx_rto_setPeerTimeoutSecs(struct rx_peer *peer, int secs) { + peer->rtt = secs * 8000; +} + /** * Sets the error generated when a busy call channel is detected. * @@ -4252,7 +4264,7 @@ rxi_ReceiveAckPacket(struct rx_call *call, struct rx_packet *np, if (!(tp->flags & RX_PKTFLAG_ACKED)) { newAckCount++; - rxi_ComputeRoundTripTime(tp, ap, call->conn->peer, &now); + rxi_ComputeRoundTripTime(tp, ap, call, peer, &now); } #ifdef ADAPT_WINDOW @@ -4324,8 +4336,7 @@ rxi_ReceiveAckPacket(struct rx_call *call, struct rx_packet *np, if (!(tp->flags & RX_PKTFLAG_ACKED)) { newAckCount++; tp->flags |= RX_PKTFLAG_ACKED; - - rxi_ComputeRoundTripTime(tp, ap, call->conn->peer, &now); + rxi_ComputeRoundTripTime(tp, ap, call, peer, &now); #ifdef ADAPT_WINDOW rxi_ComputeRate(call->conn->peer, call, tp, np, ap->reason); #endif @@ -4347,7 +4358,7 @@ rxi_ReceiveAckPacket(struct rx_call *call, struct rx_packet *np, if (!(tp->flags & RX_PKTFLAG_ACKED) && !clock_IsZero(&tp->retryTime)) { tp->retryTime = tp->timeSent; - clock_Add(&tp->retryTime, &peer->timeout); + clock_Add(&tp->retryTime, &call->rto); /* shift by eight because one quarter-sec ~ 256 milliseconds */ clock_Addmsec(&(tp->retryTime), ((afs_uint32) tp->backoff) << 8); } @@ -4364,7 +4375,7 @@ rxi_ReceiveAckPacket(struct rx_call *call, struct rx_packet *np, while (!queue_IsEnd(&call->tq, tp) && !clock_IsZero(&tp->retryTime)) { tp->retryTime = tp->timeSent; - clock_Add(&tp->retryTime, &peer->timeout); + clock_Add(&tp->retryTime, &call->rto); clock_Addmsec(&tp->retryTime, ((afs_uint32) tp->backoff) << 8); tp = queue_Next(tp, rx_packet); } @@ -5196,6 +5207,11 @@ rxi_ResetCall(struct rx_call *call, int newcall) call->ssthresh = rx_maxSendWindow; call->nDgramPackets = peer->nDgramPackets; call->congestSeq = peer->congestSeq; + call->rtt = peer->rtt; + call->rtt_dev = peer->rtt_dev; + clock_Zero(&call->rto); + clock_Addmsec(&call->rto, + MAX(((call->rtt >> 3) + call->rtt_dev), rx_minPeerTimeout) + 200); MUTEX_EXIT(&peer->peer_lock); flags = call->flags; @@ -5599,7 +5615,7 @@ rxi_SendList(struct rx_call *call, struct xmitlist *xmit, peer->nSent += xmit->len; if (xmit->resending) peer->reSends += xmit->len; - retryTime = peer->timeout; + retryTime = call->rto; MUTEX_EXIT(&peer->peer_lock); if (rx_stats_active) { @@ -6166,8 +6182,8 @@ rxi_CheckCall(struct rx_call *call) } #endif /* RTT + 8*MDEV, rounded up to the next second. */ - fudgeFactor = (((afs_uint32) conn->peer->rtt >> 3) + - ((afs_uint32) conn->peer->rtt_dev << 1) + 1023) >> 10; + fudgeFactor = (((afs_uint32) call->rtt >> 3) + + ((afs_uint32) call->rtt_dev << 1) + 1023) >> 10; deadTime = conn->secondsUntilDead + fudgeFactor; now = clock_Sec(); @@ -6675,6 +6691,7 @@ rxi_ChallengeOn(struct rx_connection *conn) static void rxi_ComputeRoundTripTime(struct rx_packet *p, struct rx_ackPacket *ack, + struct rx_call *call, struct rx_peer *peer, struct clock *now) { @@ -6752,11 +6769,11 @@ rxi_ComputeRoundTripTime(struct rx_packet *p, /* better rtt calculation courtesy of UMich crew (dave,larry,peter,?) */ /* Apply VanJacobson round-trip estimations */ - if (peer->rtt) { + if (call->rtt) { int delta; /* - * srtt (peer->rtt) is in units of one-eighth-milliseconds. + * srtt (call->rtt) is in units of one-eighth-milliseconds. * srtt is stored as fixed point with 3 bits after the binary * point (i.e., scaled by 8). The following magic is * equivalent to the smoothing algorithm in rfc793 with an @@ -6767,8 +6784,8 @@ rxi_ComputeRoundTripTime(struct rx_packet *p, * srtt' = srtt + (rtt - srtt)/8 */ - delta = _8THMSEC(&thisRtt) - peer->rtt; - peer->rtt += (delta >> 3); + delta = _8THMSEC(&thisRtt) - call->rtt; + call->rtt += (delta >> 3); /* * We accumulate a smoothed rtt variance (actually, a smoothed @@ -6791,8 +6808,8 @@ rxi_ComputeRoundTripTime(struct rx_packet *p, if (delta < 0) delta = -delta; - delta -= (peer->rtt_dev << 1); - peer->rtt_dev += (delta >> 3); + delta -= (call->rtt_dev << 1); + call->rtt_dev += (delta >> 3); } else { /* I don't have a stored RTT so I start with this value. Since I'm * probably just starting a call, and will be pushing more data down @@ -6800,8 +6817,8 @@ rxi_ComputeRoundTripTime(struct rx_packet *p, * little, and I set deviance to half the rtt. In practice, * deviance tends to approach something a little less than * half the smoothed rtt. */ - peer->rtt = _8THMSEC(&thisRtt) + 8; - peer->rtt_dev = peer->rtt >> 2; /* rtt/2: they're scaled differently */ + call->rtt = _8THMSEC(&thisRtt) + 8; + call->rtt_dev = call->rtt >> 2; /* rtt/2: they're scaled differently */ } /* the smoothed RTT time is RTT + 4*MDEV * @@ -6811,13 +6828,17 @@ rxi_ComputeRoundTripTime(struct rx_packet *p, * add on a fixed 200ms to account for that timer expiring. */ - rtt_timeout = MAX(((peer->rtt >> 3) + peer->rtt_dev), + rtt_timeout = MAX(((call->rtt >> 3) + call->rtt_dev), rx_minPeerTimeout) + 200; - clock_Zero(&(peer->timeout)); - clock_Addmsec(&(peer->timeout), rtt_timeout); + clock_Zero(&call->rto); + clock_Addmsec(&call->rto, rtt_timeout); + + /* Update the peer, so any new calls start with our values */ + peer->rtt_dev = call->rtt_dev; + peer->rtt = call->rtt; dpf(("rxi_ComputeRoundTripTime(call=%d packet=%"AFS_PTR_FMT" rtt=%d ms, srtt=%d ms, rtt_dev=%d ms, timeout=%d.%06d sec)\n", - p->header.callNumber, p, MSEC(&thisRtt), peer->rtt >> 3, peer->rtt_dev >> 2, (peer->timeout.sec), (peer->timeout.usec))); + p->header.callNumber, p, MSEC(&thisRtt), call->rtt >> 3, call->rtt_dev >> 2, (call->rto.sec), (call->rto.usec))); } @@ -7085,7 +7106,7 @@ rxi_ComputeRate(struct rx_peer *peer, struct rx_call *call, case RX_ACK_REQUESTED: xferSize = p->length + RX_HEADER_SIZE + call->conn->securityMaxTrailerSize; - xferMs = peer->rtt; + xferMs = call->rtt; break; case RX_ACK_PING_RESPONSE: @@ -7391,9 +7412,8 @@ rx_PrintPeerStats(FILE * file, struct rx_peer *peer) (int)peer->burstWait.sec, (int)peer->burstWait.usec); fprintf(file, - " Rtt %d, " "retry time %u.%06d, " "total sent %d, " - "resent %d\n", peer->rtt, (int)peer->timeout.sec, - (int)peer->timeout.usec, peer->nSent, peer->reSends); + " Rtt %d, " "total sent %d, " "resent %d\n", + peer->rtt, peer->nSent, peer->reSends); fprintf(file, " Packet size %d, " "max in packet skew %d, " @@ -7771,8 +7791,8 @@ rx_GetServerPeers(osi_socket socket, afs_uint32 remoteAddr, peer->burstWait.usec = ntohl(peer->burstWait.usec); peer->rtt = ntohl(peer->rtt); peer->rtt_dev = ntohl(peer->rtt_dev); - peer->timeout.sec = ntohl(peer->timeout.sec); - peer->timeout.usec = ntohl(peer->timeout.usec); + peer->timeout.sec = 0; + peer->timeout.usec = 0; peer->nSent = ntohl(peer->nSent); peer->reSends = ntohl(peer->reSends); peer->inPacketSkew = ntohl(peer->inPacketSkew); @@ -7830,8 +7850,8 @@ rx_GetLocalPeers(afs_uint32 peerHost, afs_uint16 peerPort, peerStats->burstWait.usec = tp->burstWait.usec; peerStats->rtt = tp->rtt; peerStats->rtt_dev = tp->rtt_dev; - peerStats->timeout.sec = tp->timeout.sec; - peerStats->timeout.usec = tp->timeout.usec; + peerStats->timeout.sec = 0; + peerStats->timeout.usec = 0; peerStats->nSent = tp->nSent; peerStats->reSends = tp->reSends; peerStats->inPacketSkew = tp->inPacketSkew; diff --git a/src/rx/rx.h b/src/rx/rx.h index 1372bcb99..e18aa16bf 100644 --- a/src/rx/rx.h +++ b/src/rx/rx.h @@ -393,7 +393,6 @@ struct rx_peer { struct rx_queue congestionQueue; /* Calls that are waiting for non-zero burst value */ int rtt; /* Smoothed round trip time, measured in milliseconds/8 */ int rtt_dev; /* Smoothed rtt mean difference, in milliseconds/4 */ - struct clock timeout; /* Current retransmission delay */ int nSent; /* Total number of distinct data packets sent, not including retransmissions */ int reSends; /* Total number of retransmissions for this peer, since this structure was created */ @@ -527,6 +526,9 @@ struct rx_call { u_short nSoftAcks; /* The number of delayed soft acks */ u_short nHardAcks; /* The number of delayed hard acks */ u_short congestSeq; /* Peer's congestion sequence counter */ + int rtt; + int rtt_dev; + struct clock rto; /* The round trip timeout calculated for this call */ struct rxevent *resendEvent; /* If this is non-Null, there is a retransmission event pending */ struct rxevent *timeoutEvent; /* If this is non-Null, then there is an overall timeout for this call */ struct rxevent *keepAliveEvent; /* Scheduled periodically in active calls to keep call alive */ diff --git a/src/rx/rx_kcommon.c b/src/rx/rx_kcommon.c index 2f5b05fef..a381c50c4 100644 --- a/src/rx/rx_kcommon.c +++ b/src/rx/rx_kcommon.c @@ -370,12 +370,10 @@ rxi_InitPeerParams(struct rx_peer *pp) i = rxi_Findcbi(pp->host); if (i == -1) { - pp->timeout.sec = 3; - /* pp->timeout.usec = 0; */ + rx_rto_setPeerTimeoutSecs(pp, 3); pp->ifMTU = MIN(RX_REMOTE_PACKET_SIZE, rx_MyMaxSendSize); } else { - pp->timeout.sec = 2; - /* pp->timeout.usec = 0; */ + rx_rto_setPeerTimeoutSecs(pp, 2); pp->ifMTU = MIN(RX_MAX_PACKET_SIZE, rx_MyMaxSendSize); mtu = ntohl(afs_cb_interface.mtu[i]); /* Diminish the packet size to one based on the MTU given by @@ -396,13 +394,12 @@ rxi_InitPeerParams(struct rx_peer *pp) ifn = rxi_FindIfnet(pp->host, NULL); if (ifn) { - pp->timeout.sec = 2; - /* pp->timeout.usec = 0; */ + rx_rto_setPeerTimeoutSecs(pp, 2); pp->ifMTU = MIN(RX_MAX_PACKET_SIZE, rx_MyMaxSendSize); # ifdef IFF_POINTOPOINT if (rx_ifnet_flags(ifn) & IFF_POINTOPOINT) { /* wish we knew the bit rate and the chunk size, sigh. */ - pp->timeout.sec = 4; + rx_rto_setPeerTimeoutSecs(pp, 4); pp->ifMTU = RX_PP_PACKET_SIZE; } # endif /* IFF_POINTOPOINT */ @@ -414,8 +411,7 @@ rxi_InitPeerParams(struct rx_peer *pp) pp->ifMTU = rxmtu; } } else { /* couldn't find the interface, so assume the worst */ - pp->timeout.sec = 3; - /* pp->timeout.usec = 0; */ + rx_rto_setPeerTimeoutSecs(pp, 3); pp->ifMTU = MIN(RX_REMOTE_PACKET_SIZE, rx_MyMaxSendSize); } # endif /* else AFS_USERSPACE_IP_ADDR */ @@ -425,12 +421,10 @@ rxi_InitPeerParams(struct rx_peer *pp) mtu = rxi_FindIfMTU(pp->host); if (mtu <= 0) { - pp->timeout.sec = 3; - /* pp->timeout.usec = 0; */ + rx_rto_setPeerTimeoutSecs(pp, 3); pp->ifMTU = MIN(RX_REMOTE_PACKET_SIZE, rx_MyMaxSendSize); } else { - pp->timeout.sec = 2; - /* pp->timeout.usec = 0; */ + rx_rto_setPeerTimeoutSecs(pp, 2); pp->ifMTU = MIN(RX_MAX_PACKET_SIZE, rx_MyMaxSendSize); /* Diminish the packet size to one based on the MTU given by @@ -444,7 +438,7 @@ rxi_InitPeerParams(struct rx_peer *pp) # endif /* AFS_SUN5_ENV */ #else /* ADAPT_MTU */ pp->rateFlag = 2; /* start timing after two full packets */ - pp->timeout.sec = 2; + rx_rto_setPeerTimeoutSecs(pp, 2); pp->ifMTU = OLD_MAX_PACKET_SIZE; #endif /* else ADAPT_MTU */ pp->ifMTU = rxi_AdjustIfMTU(pp->ifMTU); diff --git a/src/rx/rx_packet.c b/src/rx/rx_packet.c index 6f10329f1..7f1b85cfd 100644 --- a/src/rx/rx_packet.c +++ b/src/rx/rx_packet.c @@ -1986,8 +1986,6 @@ rxi_ReceiveDebugPacket(struct rx_packet *ap, osi_socket asocket, tpeer.burstWait.usec = htonl(tp->burstWait.usec); tpeer.rtt = htonl(tp->rtt); tpeer.rtt_dev = htonl(tp->rtt_dev); - tpeer.timeout.sec = htonl(tp->timeout.sec); - tpeer.timeout.usec = htonl(tp->timeout.usec); tpeer.nSent = htonl(tp->nSent); tpeer.reSends = htonl(tp->reSends); tpeer.inPacketSkew = htonl(tp->inPacketSkew); diff --git a/src/rx/rx_prototypes.h b/src/rx/rx_prototypes.h index eabc87da8..fb3169ef0 100644 --- a/src/rx/rx_prototypes.h +++ b/src/rx/rx_prototypes.h @@ -20,6 +20,7 @@ extern int (*swapNameProgram) (PROCESS, const char *, char *); extern int (*rx_justReceived) (struct rx_packet *, struct sockaddr_in *); extern int (*rx_almostSent) (struct rx_packet *, struct sockaddr_in *); +extern void rx_rto_setPeerTimeoutSecs(struct rx_peer *, int secs); extern void rx_SetEpoch(afs_uint32 epoch); extern int rx_Init(u_int port); diff --git a/src/rx/rx_user.c b/src/rx/rx_user.c index e4b59bba5..7f0b5fa1a 100644 --- a/src/rx/rx_user.c +++ b/src/rx/rx_user.c @@ -668,8 +668,6 @@ rxi_InitPeerParams(struct rx_peer *pp) struct sockaddr_in addr; #endif - - LOCK_IF_INIT; if (!Inited) { UNLOCK_IF_INIT; @@ -688,19 +686,20 @@ rxi_InitPeerParams(struct rx_peer *pp) ppaddr = ntohl(pp->host); pp->ifMTU = 0; - pp->timeout.sec = 2; + rx_rto_setPeerTimeoutSecs(pp, 2); pp->rateFlag = 2; /* start timing after two full packets */ /* I don't initialize these, because I presume they are bzero'd... * pp->burstSize pp->burst pp->burstWait.sec pp->burstWait.usec - * pp->timeout.usec */ + */ LOCK_IF; for (ix = 0; ix < rxi_numNetAddrs; ++ix) { if ((rxi_NetAddrs[ix] & myNetMasks[ix]) == (ppaddr & myNetMasks[ix])) { #ifdef IFF_POINTOPOINT if (myNetFlags[ix] & IFF_POINTOPOINT) - pp->timeout.sec = 4; + rx_rto_setPeerTimeoutSecs(pp, 4); #endif /* IFF_POINTOPOINT */ + rxmtu = myNetMTUs[ix] - RX_IPUDP_SIZE; if (rxmtu < RX_MIN_PACKET_SIZE) rxmtu = RX_MIN_PACKET_SIZE; @@ -710,12 +709,12 @@ rxi_InitPeerParams(struct rx_peer *pp) } UNLOCK_IF; if (!pp->ifMTU) { /* not local */ - pp->timeout.sec = 3; + rx_rto_setPeerTimeoutSecs(pp, 3); pp->ifMTU = MIN(rx_MyMaxSendSize, RX_REMOTE_PACKET_SIZE); } #else /* ADAPT_MTU */ pp->rateFlag = 2; /* start timing after two full packets */ - pp->timeout.sec = 2; + rx_rto_setPeerTimeoutSecs(pp, 2); pp->ifMTU = MIN(rx_MyMaxSendSize, OLD_MAX_PACKET_SIZE); #endif /* ADAPT_MTU */ #if defined(ADAPT_PMTU) && defined(IP_MTU) diff --git a/src/rx/test/testclient.c b/src/rx/test/testclient.c index 1e1f0c894..4a2978bbf 100644 --- a/src/rx/test/testclient.c +++ b/src/rx/test/testclient.c @@ -241,8 +241,7 @@ main(int argc, char **argv) if (!clock_IsZero(&burstTime)) conn->peer->burstWait = burstTime; if (!clock_IsZero(&retryTime)) - conn->peer->timeout = retryTime; - + conn->peer->rtt = _8THMSEC(&retryTime); if (sendFile) SendFile(sendFile, conn); else {