From 35285dad3e20e50f592afa95d994b68ded5c8b2b Mon Sep 17 00:00:00 2001 From: Tom Keiser Date: Mon, 25 Apr 2005 21:52:59 +0000 Subject: [PATCH] rx-fpq-optimize-20050425 FIXES 17805 here's a patch that reduces the overhead of transfers between the local and global free packet queues. The old algorithm was O(n) in the number of store instructions -- 7 per rx_packet. I've added some bulk transfer macros to the rx_queue package. Now, the number of store instructions is O(1) -- 6 total. This should help reduce bus contention and cache line invalidates on SMPs. --- src/rx/rx_globals.h | 36 +++++++++++++++--------------------- src/rx/rx_queue.h | 24 +++++++++++++++++++++++- 2 files changed, 38 insertions(+), 22 deletions(-) diff --git a/src/rx/rx_globals.h b/src/rx/rx_globals.h index e3040bca4..2cb72941c 100644 --- a/src/rx/rx_globals.h +++ b/src/rx/rx_globals.h @@ -253,11 +253,9 @@ EXT void rxi_FlushLocalPacketsTSFPQ(void); /* flush all thread-local packets to register int i; \ register struct rx_packet * p; \ register int tsize = (rx_ts_info_p)->_FPQ.len - rx_TSFPQLocalMax + rx_TSFPQGlobSize; \ - for (i=0; i < tsize; i++) { \ - p = queue_Last(&((rx_ts_info_p)->_FPQ), rx_packet); \ - queue_Remove(p); \ - queue_Prepend(&rx_freePacketQueue,p); \ - } \ + for (i=0,p=queue_Last(&((rx_ts_info_p)->_FPQ), rx_packet); \ + i < tsize; i++,p=queue_Prev(p, rx_packet)); \ + queue_SplitAfterPrepend(&((rx_ts_info_p)->_FPQ),&rx_freePacketQueue,p); \ (rx_ts_info_p)->_FPQ.len -= tsize; \ rx_nFreePackets += tsize; \ (rx_ts_info_p)->_FPQ.ltog_ops++; \ @@ -277,11 +275,9 @@ EXT void rxi_FlushLocalPacketsTSFPQ(void); /* flush all thread-local packets to do { \ register int i; \ register struct rx_packet * p; \ - for (i=0; i < (num_transfer); i++) { \ - p = queue_Last(&((rx_ts_info_p)->_FPQ), rx_packet); \ - queue_Remove(p); \ - queue_Prepend(&rx_freePacketQueue,p); \ - } \ + for (i=0,p=queue_Last(&((rx_ts_info_p)->_FPQ), rx_packet); \ + i < (num_transfer); i++,p=queue_Prev(p, rx_packet)); \ + queue_SplitAfterPrepend(&((rx_ts_info_p)->_FPQ),&rx_freePacketQueue,p); \ (rx_ts_info_p)->_FPQ.len -= (num_transfer); \ rx_nFreePackets += (num_transfer); \ (rx_ts_info_p)->_FPQ.ltog_ops++; \ @@ -300,13 +296,13 @@ EXT void rxi_FlushLocalPacketsTSFPQ(void); /* flush all thread-local packets to rx_freePktQ_lock must be held. */ #define RX_TS_FPQ_GTOL(rx_ts_info_p) \ do { \ - register int i; \ + register int i, tsize; \ register struct rx_packet * p; \ - for (i=0; (i < rx_TSFPQGlobSize) && queue_IsNotEmpty(&rx_freePacketQueue); i++) { \ - p = queue_First(&rx_freePacketQueue, rx_packet); \ - queue_Remove(p); \ - queue_Append(&((rx_ts_info_p)->_FPQ),p); \ - } \ + tsize = (rx_TSFPQGlobSize <= rx_nFreePackets) ? \ + rx_TSFPQGlobSize : rx_nFreePackets; \ + for (i=0,p=queue_First(&rx_freePacketQueue, rx_packet); \ + i < tsize; i++,p=queue_Next(p, rx_packet)); \ + queue_SplitBeforeAppend(&rx_freePacketQueue,&((rx_ts_info_p)->_FPQ),p); \ (rx_ts_info_p)->_FPQ.len += i; \ rx_nFreePackets -= i; \ (rx_ts_info_p)->_FPQ.gtol_ops++; \ @@ -317,11 +313,9 @@ EXT void rxi_FlushLocalPacketsTSFPQ(void); /* flush all thread-local packets to do { \ register int i; \ register struct rx_packet * p; \ - for (i=0; i < (num_transfer); i++) { \ - p = queue_First(&rx_freePacketQueue, rx_packet); \ - queue_Remove(p); \ - queue_Append(&((rx_ts_info_p)->_FPQ),p); \ - } \ + for (i=0,p=queue_First(&rx_freePacketQueue, rx_packet); \ + i < (num_transfer); i++,p=queue_Next(p, rx_packet)); \ + queue_SplitBeforeAppend(&rx_freePacketQueue,&((rx_ts_info_p)->_FPQ),p); \ (rx_ts_info_p)->_FPQ.len += i; \ rx_nFreePackets -= i; \ (rx_ts_info_p)->_FPQ.gtol_ops++; \ diff --git a/src/rx/rx_queue.h b/src/rx/rx_queue.h index 3e4096afb..3fbc1861d 100644 --- a/src/rx/rx_queue.h +++ b/src/rx/rx_queue.h @@ -64,10 +64,20 @@ for (n=0, queue_Scan(&myqueue, qe, nqe, myelement), n++) {} /* N.B. I don't think it is possible to write this expression, correctly, with less than one comma (you can easily write an alternative expression with no commas that works with most or all compilers, but it's not clear that it really is un-ambiguous, legal C-code). */ #define _QA(q,i,a,b) (((i->a=q->a)->b=i)->b=q, q->a=i) -/* These ones splice two queues together. If (a,b) is (next,prev) then (*q2) is appended to (*q1), otherwise (*q2) is prepended to (*q1). */ +/* These ones splice two queues together. If (a,b) is (next,prev) then (*q2) is prepended to (*q1), otherwise (*q2) is appended to (*q1). */ #define _QS(q1,q2,a,b) if (queue_IsEmpty(q2)); else \ ((((q2->a->b=q1)->a->b=q2->b)->a=q1->a, q1->a=q2->a), queue_Init(q2)) +/* This one removes part of queue (*q1) and attaches it to queue (*q2). + * If (a,b) is (next,prev) then the subchain is prepended to (*q2), + * otherwise the subchain is appended to (*q2). + * If (c,d) is (prev,next) then the subchain is the elements in (*q1) before (i), + * otherwise the subchain is the elements in (*q1) after (i). + * If (x,y) is (q1,i) then operation is either BeforePrepend of AfterAppend. + * If (x,y) is (i,q1) then operation is either BeforeAppend or AfterPrepend. */ +#define _QSP(q1,q2,i,a,b,c,d,x,y) if (!queue_IsEnd(q1,i->c)) \ + (((y->b->a=q2->a)->b=y->b), ((x->a->b=q2)->a=x->a), ((i->c=q1)->d=i)) + /* Basic remove operation. Doesn't update the queue item to indicate it's been removed */ #define _QR(i) ((_Q(i)->prev->next=_Q(i)->next)->prev=_Q(i)->prev) @@ -94,6 +104,18 @@ for (n=0, queue_Scan(&myqueue, qe, nqe, myelement), n++) {} /* Splice the members of queue (*q2) to the end of (*q1), re-initialize (*q2) */ #define queue_SpliceAppend(q1,q2) _QS(_Q(q1),_Q(q2),prev,next) +/* split the members after i off of queue (*q1), and append them onto queue (*q2) */ +#define queue_SplitAfterAppend(q1,q2,i) _QSP(_Q(q1),_Q(q2),_Q(i),prev,next,next,prev,_Q(q1),_Q(i)) + +/* split the members after i off of queue (*q1), and prepend them onto queue (*q2) */ +#define queue_SplitAfterPrepend(q1,q2,i) _QSP(_Q(q1),_Q(q2),_Q(i),next,prev,next,prev,_Q(i),_Q(q1)) + +/* split the members before i off of queue (*q1), and append them onto queue (*q2) */ +#define queue_SplitBeforeAppend(q1,q2,i) _QSP(_Q(q1),_Q(q2),_Q(i),prev,next,prev,next,_Q(i),_Q(q1)) + +/* split the members before i off of queue (*q1), and prepend them onto queue (*q2) */ +#define queue_SplitBeforePrepend(q1,q2,i) _QSP(_Q(q1),_Q(q2),_Q(i),next,prev,prev,next,_Q(q1),_Q(i)) + /* Replace the queue (*q1) with the contents of the queue (*q2), re-initialize (*q2) */ #define queue_Replace(q1,q2) if (queue_IsEmpty(q2)) queue_Init(q1); else \ (*_Q(q1) = *_Q(q2), _Q(q1)->next->prev = _Q(q1)->prev->next = _Q(q1), queue_Init(q2)) -- 2.39.5