From e8c4fc4f37912a5d30694caef45d27374e9201e7 Mon Sep 17 00:00:00 2001 From: Simon Wilkinson Date: Sat, 18 Jun 2011 12:43:44 +0100 Subject: [PATCH] rx: Enter loss recovery when we retransmit Since I mistakenly wrote commit 36e2d13b, RX hasn't entered congestion avoidance when a loss event occurs. This is bad, because on todays networks the majority of packet losses are due to some form of congestion. Now that the timeout code has been restructured, the chances of entering the retransmit routine in error are much much smaller, so this code needs to be restored. This change reverts 36e2d13b55085c996d38b30d003296c602ef8ee3. However, the original RX code has the problem that it assumes that all forms of fast recovery are the same - in particular, that the call settings that result from entering fast recovery due to a fast retransmit are identical to those resulting from a timeout. This is not the case, and this will be fixed in a later change. Change-Id: Iedb34437db9fcfbc90307b01e566a8d089eef4bb Reviewed-on: http://gerrit.openafs.org/4866 Reviewed-by: Derrick Brashear Reviewed-by: Jeffrey Altman Tested-by: Jeffrey Altman Tested-by: BuildBot --- src/rx/rx.c | 41 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/src/rx/rx.c b/src/rx/rx.c index ade91cd93..6e50c223d 100644 --- a/src/rx/rx.c +++ b/src/rx/rx.c @@ -5918,10 +5918,14 @@ static void rxi_Resend(struct rxevent *event, void *arg0, void *arg1, int istack) { struct rx_call *call = arg0; + struct rx_peer *peer; struct rx_packet *p, *nxp; struct clock maxTimeout = { 60, 0 }; MUTEX_ENTER(&call->lock); + + peer = call->conn->peer; + /* Make sure that the event pointer is removed from the call * structure, since there is no longer a per-call retransmission * event pending. */ @@ -5943,6 +5947,21 @@ rxi_Resend(struct rxevent *event, void *arg0, void *arg1, int istack) goto out; } +#ifdef AFS_GLOBAL_RXLOCK_KERNEL + if (call->flags & RX_CALL_FAST_RECOVER_WAIT) { + /* Someone else is waiting to start recovery */ + goto out; + } + call->flags |= RX_CALL_FAST_RECOVER_WAIT; + rxi_WaitforTQBusy(call); + call->flags &= ~RX_CALL_FAST_RECOVER_WAIT; + if (call->error) + goto out; +#endif + + /* We're in loss recovery */ + call->flags |= RX_CALL_FAST_RECOVER; + /* Mark all of the pending packets in the queue as being lost */ for (queue_Scan(&call->tq, p, nxp, rx_packet)) { if (!(p->flags & RX_PKTFLAG_ACKED)) @@ -5952,12 +5971,32 @@ rxi_Resend(struct rxevent *event, void *arg0, void *arg1, int istack) /* We're resending, so we double the timeout of the call. This will be * dropped back down by the first successful ACK that we receive. * - * We apply a maximum value here of 60 second + * We apply a maximum value here of 60 seconds */ clock_Add(&call->rto, &call->rto); if (clock_Gt(&call->rto, &maxTimeout)) call->rto = maxTimeout; + /* Packet loss is most likely due to congestion, so drop our window size + * and start again from the beginning */ + if (peer->maxDgramPackets >1) { + call->MTU = RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE; + call->MTU = MIN(peer->natMTU, peer->maxMTU); + } + call->ssthresh = MAX(4, MIN((int)call->cwind, (int)call->twind)) >> 1; + call->nDgramPackets = 1; + call->cwind = 1; + call->nextCwind = 1; + call->nAcks = 0; + call->nNacks = 0; + MUTEX_ENTER(&peer->peer_lock); + peer->MTU = call->MTU; + peer->cwind = call->cwind; + peer->nDgramPackets = 1; + peer->congestSeq++; + call->congestSeq = peer->congestSeq; + MUTEX_EXIT(&peer->peer_lock); + rxi_Start(call, istack); out: -- 2.39.5