From c7f9307c35c0c89f7ec8ada315c81ebc47517f86 Mon Sep 17 00:00:00 2001 From: Derrick Brashear Date: Thu, 10 Jun 2010 14:37:39 -0400 Subject: [PATCH] rx mtu ping handling engage mtu pings. only occur while a call is active but results are cached per-peer. requires idle dead time to be in use, or does not activate. (needed to time out the call, otherwise, keepalives will succeed and the call will thus not hit regular "down server" timeout) Change-Id: Ic29b1b5713fb6debde14f4adfd5019ee80cc9e3c Reviewed-on: http://gerrit.openafs.org/2115 Reviewed-by: Derrick Brashear Tested-by: Derrick Brashear --- src/rx/rx.c | 139 ++++++++++++++++++++++++++++++++++++++--- src/rx/rx.h | 4 +- src/rx/rx_packet.c | 22 +++++-- src/rx/rx_prototypes.h | 5 ++ 4 files changed, 156 insertions(+), 14 deletions(-) diff --git a/src/rx/rx.c b/src/rx/rx.c index 161e3e611..ea383f640 100644 --- a/src/rx/rx.c +++ b/src/rx/rx.c @@ -1301,6 +1301,9 @@ rx_NewCall(struct rx_connection *conn) /* Turn on busy protocol. */ rxi_KeepAliveOn(call); + /* Attempt MTU discovery */ + rxi_GrowMTUOn(call); + /* * We are no longer the active thread in rx_NewCall */ @@ -3855,15 +3858,30 @@ rxi_ReceiveAckPacket(struct rx_call *call, struct rx_packet *np, if (conn->lastPacketSizeSeq) { MUTEX_ENTER(&conn->conn_data_lock); - if (first >= conn->lastPacketSizeSeq) { + if ((first > conn->lastPacketSizeSeq) && (conn->lastPacketSize)) { pktsize = conn->lastPacketSize; conn->lastPacketSize = conn->lastPacketSizeSeq = 0; } MUTEX_EXIT(&conn->conn_data_lock); + } + if ((ap->reason == RX_ACK_PING_RESPONSE) && (conn->lastPingSizeSer)) { + MUTEX_ENTER(&conn->conn_data_lock); + if ((conn->lastPingSizeSer == serial) && (conn->lastPingSize)) { + /* process mtu ping ack */ + pktsize = conn->lastPingSize; + conn->lastPingSizeSer = conn->lastPingSize = 0; + } + MUTEX_EXIT(&conn->conn_data_lock); + } + + if (pktsize) { MUTEX_ENTER(&peer->peer_lock); - /* start somewhere */ + /* + * Start somewhere. Can't assume we can send what we can receive, + * but we are clearly receiving. + */ if (!peer->maxPacketSize) - peer->maxPacketSize = np->length+RX_IPUDP_SIZE; + peer->maxPacketSize = RX_MIN_PACKET_SIZE+RX_IPUDP_SIZE; if (pktsize > peer->maxPacketSize) { peer->maxPacketSize = pktsize; @@ -4997,6 +5015,7 @@ rxi_SendAck(struct rx_call *call, struct rx_packet *p; u_char offset; afs_int32 templ; + afs_uint32 padbytes = 0; #ifdef RX_ENABLE_TSFPQ struct rx_ts_info_t * rx_ts_info; #endif @@ -5008,6 +5027,28 @@ rxi_SendAck(struct rx_call *call, call->conn->rwind[call->channel] = call->rwind = rx_maxReceiveWindow; } + /* Don't attempt to grow MTU if this is a critical ping */ + if ((reason == RX_ACK_PING) && !(call->conn->flags & RX_CONN_ATTACHWAIT) + && ((clock_Sec() - call->lastSendTime) < call->conn->secondsUntilPing)) + { + /* keep track of per-call attempts, if we're over max, do in small + * otherwise in larger? set a size to increment by, decrease + * on failure, here? + */ + if (call->conn->peer->maxPacketSize && + (call->conn->peer->maxPacketSize < OLD_MAX_PACKET_SIZE + +RX_IPUDP_SIZE)) + padbytes = call->conn->peer->maxPacketSize+16; + else + padbytes = call->conn->peer->maxMTU + 128; + + /* do always try a minimum size ping */ + padbytes = MAX(padbytes, RX_MIN_PACKET_SIZE+RX_IPUDP_SIZE+4); + + /* subtract the ack payload */ + padbytes -= (rx_AckDataSize(call->rwind) + 4 * sizeof(afs_int32)); + } + call->nHardAcks = 0; call->nSoftAcks = 0; if (call->rnext > call->lastAcked) @@ -5035,7 +5076,7 @@ rxi_SendAck(struct rx_call *call, } #endif - templ = + templ = padbytes + rx_AckDataSize(call->rwind) + 4 * sizeof(afs_int32) - rx_GetDataSize(p); if (templ > 0) { @@ -5135,6 +5176,18 @@ rxi_SendAck(struct rx_call *call, #ifdef ADAPT_WINDOW clock_GetTime(&call->pingRequestTime); #endif + if (padbytes) { + p->length = padbytes + + rx_AckDataSize(call->rwind) + 4 * sizeof(afs_int32); + + while (padbytes--) + /* not fast but we can potentially use this if truncated + * fragments are delivered to figure out the mtu. + */ + rx_packetwrite(p, rx_AckDataSize(offset) + 4 * + sizeof(afs_int32), sizeof(afs_int32), + &padbytes); + } } if (call->conn->type == RX_CLIENT_CONNECTION) p->header.flags |= RX_CLIENT_INITIATED; @@ -5899,15 +5952,15 @@ rxi_CheckCall(struct rx_call *call) && ((call->startWait + conn->idleDeadTime) < now) && (call->flags & RX_CALL_READER_WAIT)) { if (call->state == RX_STATE_ACTIVE) { - rxi_CallError(call, RX_CALL_TIMEOUT); - return -1; + cerror = RX_CALL_TIMEOUT; + goto mtuout; } } if (call->lastSendData && conn->idleDeadTime && (conn->idleDeadErr != 0) && ((call->lastSendData + conn->idleDeadTime) < now)) { if (call->state == RX_STATE_ACTIVE) { - rxi_CallError(call, conn->idleDeadErr); - return -1; + cerror = conn->idleDeadErr; + goto mtuout; } } /* see if we have a hard timeout */ @@ -6076,6 +6129,47 @@ rxi_KeepAliveEvent(struct rxevent *event, void *arg1, void *dummy) MUTEX_EXIT(&call->lock); } +/* Does what's on the nameplate. */ +void +rxi_GrowMTUEvent(struct rxevent *event, void *arg1, void *dummy) +{ + struct rx_call *call = arg1; + struct rx_connection *conn; + + MUTEX_ENTER(&call->lock); + CALL_RELE(call, RX_CALL_REFCOUNT_ALIVE); + if (event == call->growMTUEvent) + call->growMTUEvent = NULL; + +#ifdef RX_ENABLE_LOCKS + if (rxi_CheckCall(call, 0)) { + MUTEX_EXIT(&call->lock); + return; + } +#else /* RX_ENABLE_LOCKS */ + if (rxi_CheckCall(call)) + return; +#endif /* RX_ENABLE_LOCKS */ + + /* Don't bother with dallying calls */ + if (call->state == RX_STATE_DALLY) { + MUTEX_EXIT(&call->lock); + return; + } + + conn = call->conn; + + /* + * keep being scheduled, just don't do anything if we're at peak, + * or we're not set up to be properly handled (idle timeout required) + */ + if ((conn->peer->maxPacketSize != 0) && + (conn->peer->natMTU < RX_MAX_PACKET_SIZE) && + (conn->idleDeadErr)) + (void)rxi_SendAck(call, NULL, 0, RX_ACK_PING, 0); + rxi_ScheduleGrowMTUEvent(call); + MUTEX_EXIT(&call->lock); +} void rxi_ScheduleKeepAliveEvent(struct rx_call *call) @@ -6091,6 +6185,25 @@ rxi_ScheduleKeepAliveEvent(struct rx_call *call) } } +void +rxi_ScheduleGrowMTUEvent(struct rx_call *call) +{ + if (!call->growMTUEvent) { + struct clock when, now; + clock_GetTime(&now); + when = now; + if ((call->conn->peer->maxPacketSize != 0) && + (call->conn->peer->ifMTU < OLD_MAX_PACKET_SIZE)) { /*was nat */ + when.sec += MAX(60, MIN(1+6*call->conn->secondsUntilPing, + 1+call->conn->secondsUntilDead)); + } else + when.sec += call->conn->secondsUntilPing - 1; + CALL_HOLD(call, RX_CALL_REFCOUNT_ALIVE); + call->growMTUEvent = + rxevent_PostNow(&when, &now, rxi_GrowMTUEvent, call, 0); + } +} + /* N.B. rxi_KeepAliveOff: is defined earlier as a macro */ void rxi_KeepAliveOn(struct rx_call *call) @@ -6104,6 +6217,16 @@ rxi_KeepAliveOn(struct rx_call *call) rxi_ScheduleKeepAliveEvent(call); } +void +rxi_GrowMTUOn(struct rx_call *call) +{ + struct rx_connection *conn = call->conn; + MUTEX_ENTER(&conn->conn_data_lock); + conn->lastPingSizeSer = conn->lastPingSize = 0; + MUTEX_EXIT(&conn->conn_data_lock); + rxi_ScheduleGrowMTUEvent(call); +} + /* This routine is called to send connection abort messages * that have been delayed to throttle looping clients. */ void diff --git a/src/rx/rx.h b/src/rx/rx.h index 81e419017..727be01e1 100644 --- a/src/rx/rx.h +++ b/src/rx/rx.h @@ -249,7 +249,8 @@ struct rx_connection { afs_int32 maxSerial; /* largest serial number seen on incoming packets */ afs_int32 lastPacketSize; /* last >max attempt */ afs_int32 lastPacketSizeSeq; /* seq number of attempt */ - + afs_int32 lastPingSize; /* last MTU ping attempt */ + afs_int32 lastPingSizeSer; /* serial of last MTU ping attempt */ struct rxevent *challengeEvent; /* Scheduled when the server is challenging a */ struct rxevent *delayedAbortEvent; /* Scheduled to throttle looping client */ struct rxevent *checkReachEvent; /* Scheduled when checking reachability */ @@ -521,6 +522,7 @@ struct rx_call { struct rxevent *resendEvent; /* If this is non-Null, there is a retransmission event pending */ struct rxevent *timeoutEvent; /* If this is non-Null, then there is an overall timeout for this call */ struct rxevent *keepAliveEvent; /* Scheduled periodically in active calls to keep call alive */ + struct rxevent *growMTUEvent; /* Scheduled periodically in active calls to discover true maximum MTU */ struct rxevent *delayedAckEvent; /* Scheduled after all packets are received to send an ack if a reply or new call is not generated soon */ struct rxevent *delayedAbortEvent; /* Scheduled to throttle looping client */ int abortCode; /* error code from last RPC */ diff --git a/src/rx/rx_packet.c b/src/rx/rx_packet.c index 5ec04d29b..9562b2c74 100644 --- a/src/rx/rx_packet.c +++ b/src/rx/rx_packet.c @@ -2218,7 +2218,11 @@ rxi_SendPacket(struct rx_call *call, struct rx_connection *conn, MUTEX_ENTER(&conn->conn_data_lock); p->header.serial = ++conn->serial; if (p->length > conn->peer->maxPacketSize) { - if (p->header.seq != 0) { + if ((p->header.type == RX_PACKET_TYPE_ACK) && + (p->header.flags & RX_REQUEST_ACK)) { + conn->lastPingSize = p->length; + conn->lastPingSizeSer = p->header.serial; + } else if (p->header.seq != 0) { conn->lastPacketSize = p->length; conn->lastPacketSizeSeq = p->header.seq; } @@ -2375,10 +2379,18 @@ rxi_SendPacketList(struct rx_call *call, struct rx_connection *conn, for (i = 0; i < len; i++) { p = list[i]; if (p->length > conn->peer->maxPacketSize) { - if ((p->header.seq != 0) && - ((i == 0) || (p->length >= conn->lastPacketSize))) { - conn->lastPacketSize = p->length; - conn->lastPacketSizeSeq = p->header.seq; + /* a ping *or* a sequenced packet can count */ + if ((p->length > conn->peer->maxPacketSize)) { + if (((p->header.type == RX_PACKET_TYPE_ACK) && + (p->header.flags & RX_REQUEST_ACK)) && + ((i == 0) || (p->length >= conn->lastPingSize))) { + conn->lastPingSize = p->length; + conn->lastPingSizeSer = serial + i; + } else if ((p->header.seq != 0) && + ((i == 0) || (p->length >= conn->lastPacketSize))) { + conn->lastPacketSize = p->length; + conn->lastPacketSizeSeq = p->header.seq; + } } } } diff --git a/src/rx/rx_prototypes.h b/src/rx/rx_prototypes.h index cf2d8ac26..594237dde 100644 --- a/src/rx/rx_prototypes.h +++ b/src/rx/rx_prototypes.h @@ -168,10 +168,15 @@ extern int rxi_CheckCall(struct rx_call *call); extern void rxi_KeepAliveEvent(struct rxevent *event, void *call /* struct rx_call *call */, void *dummy); +extern void rxi_GrowMTUEvent(struct rxevent *event, + void *call /* struct rx_call *call */, + void *dummy); extern void rxi_ScheduleKeepAliveEvent(struct rx_call *call); extern void rxi_ScheduleNatKeepAliveEvent(struct rx_connection *conn); +extern void rxi_ScheduleGrowMTUEvent(struct rx_call *call); extern void rxi_KeepAliveOn(struct rx_call *call); extern void rxi_NatKeepAliveOn(struct rx_connection *conn); +extern void rxi_GrowMTUOn(struct rx_call *call); extern void rx_SetConnSecondsUntilNatPing(struct rx_connection *conn, afs_int32 seconds); extern void rxi_SendDelayedConnAbort(struct rxevent *event, void *conn, /* struct rx_connection *conn */ -- 2.39.5