--- /dev/null
+
+ARCH := SPARC
+DEBUGGING := -DNDEBUG
+
+ifeq ($(ARCH),INTEL)
+CC := gcc
+CFLAGS := -O3 -DINTEL -fomit-frame-pointer -march=i686
+LDFLAGS := -lpthread
+endif
+
+ifeq ($(ARCH),PPC)
+CC := cc_r
+CFLAGS := -O3 -DPPC -q64 -w
+LDFLAGS := -lpthread -q64
+ASFLAGS := -a64
+endif
+
+ifeq ($(ARCH),IA64)
+CC := gcc
+CFLAGS := -O3 -DIA64 -fomit-frame-pointer
+LDFLAGS := -lpthread
+endif
+
+ifeq ($(ARCH),MIPS)
+CC := gcc
+CFLAGS := -O3 -DMIPS -fomit-frame-pointer
+LDFLAGS := -lpthread
+endif
+
+ifeq ($(ARCH),SPARC)
+CC := /opt/SUNWspro/bin/cc
+CFLAGS := -xO3 -DSPARC sparc_mcas.il -xarch=v9b
+LDFLAGS := -DSPARC sparc_mcas.il -xarch=v9b -lthread -lrt
+endif
+
+ifeq ($(ARCH),ALPHA)
+CC := cc
+CFLAGS := -accept vaxc_keywords -O3 -DALPHA
+CFLAGS += -fomit-frame-pointer -DWEAK_MEM_ORDER
+LDFLAGS := -lpthread
+endif
+
+CFLAGS += $(DEBUGGING)
+COMMON_DEPS += Makefile $(wildcard *.h)
+
+GC_HARNESS_TARGETS := skip_lock_perlist skip_lock_pernode skip_lock_perpointer
+GC_HARNESS_TARGETS += skip_cas skip_mcas
+
+GC_HARNESS_TARGETS += bst_lock_fraser bst_lock_manber bst_lock_kung
+GC_HARNESS_TARGETS += bst_mcas
+
+GC_HARNESS_TARGETS += rb_lock_concurrentwriters rb_lock_serialisedwriters
+GC_HARNESS_TARGETS += rb_lock_mutex
+
+TARGETS := $(GC_HARNESS_TARGETS)
+TARGETS += rb_stm_fraser rb_stm_herlihy rb_stm_lock
+TARGETS += skip_stm_fraser skip_stm_herlihy skip_stm_lock
+
+all: $(TARGETS) replay
+
+clean:
+ rm -f $(TARGETS) replay *~ core *.o *.a
+
+replay: %: %.c $(COMMON_DEPS)
+ $(CC) $(CFLAGS) -c -o $(patsubst %.c,%.o,$<) $<
+ $(CC) -o $@ $(patsubst %.c,%.o,$<) $(LDFLAGS)
+
+tree_mcas.o: tree_mcas.c mcas.c $(COMMON_DEPS)
+ $(CC) $(CFLAGS) -c -o $@ $<
+skip_lock_perpointer.o: skip_lock.c $(COMMON_DEPS)
+ $(CC) $(CFLAGS) -DTINY_MTX -c -o $@ $<
+skip_lock_pernode.o: skip_lock.c $(COMMON_DEPS)
+ $(CC) $(CFLAGS) -c -o $@ $<
+skip_lock_perlist.o: skip_lock.c $(COMMON_DEPS)
+ $(CC) $(CFLAGS) -DFAT_MTX -c -o $@ $<
+skip_mcas.o: skip_mcas.c mcas.c $(COMMON_DEPS)
+ $(CC) $(CFLAGS) -c -o $@ $<
+
+%.o: %.c $(COMMON_DEPS)
+ $(CC) $(CFLAGS) -c -o $@ $<
+
+skip_stm_lock: skip_stm.o stm_lock.o set_harness.o ptst.o gc.o
+ $(CC) -o $@ $^ $(LDFLAGS)
+skip_stm_fraser: skip_stm.o stm_fraser.o set_harness.o ptst.o gc.o
+ $(CC) -o $@ $^ $(LDFLAGS)
+skip_stm_herlihy: skip_stm.o stm_herlihy.o set_harness.o ptst.o gc.o
+ $(CC) -o $@ $^ $(LDFLAGS)
+
+rb_stm_lock: rb_stm.o stm_lock.o set_harness.o ptst.o gc.o
+ $(CC) -o $@ $^ $(LDFLAGS)
+rb_stm_fraser: rb_stm.o stm_fraser.o set_harness.o ptst.o gc.o
+ $(CC) -o $@ $^ $(LDFLAGS)
+rb_stm_herlihy: rb_stm.o stm_herlihy.o set_harness.o ptst.o gc.o
+ $(CC) -o $@ $^ $(LDFLAGS)
+
+$(GC_HARNESS_TARGETS): %: %.o set_harness.o ptst.o gc.o
+ $(CC) -o $@ $^ $(LDFLAGS)
--- /dev/null
+ The Lock-Free Library
+ =====================
+
+
+1. Building
+-----------
+Edit the Makefile and set ARCH to the appropriate value.
+Type 'make'.
+
+
+2. What you get
+---------------
+'stm_fraser.c' is an object-based STM with the programming API defined
+in 'stm.h'. 'mcas.c' is an implementation of multi-word
+compare-and-swap.
+
+These are used to build a number of search structures: skip lists,
+binary search trees, and red-black trees. The executables are named as
+follows:
+
+ bst_lock_fraser --- BST implementation using per-node locks.
+ No locking for read operations.
+ bst_lock_kung --- BST implementation using per-node locks.
+ No locking for read operations.
+ bst_lock_manber --- BST implementation using per-node locks.
+ No locking for read operations.
+ bst_mcas --- BST implementation based on MCAS.
+
+ rb_lock_concurrentwriters --- Red-black trees with concurrent writers.
+ Based on MCS multi-reader locks.
+ rb_lock_serialisedwriters --- Red-black trees with serialised writers.
+ Based on MCS multi-reader locks.
+ rb_lock_mutex --- Red-black trees with concurrent writers, and
+ no locking for read operations. Very fast!
+ rb_stm_fraser --- Red-black trees using Fraser's STM.
+ rb_stm_herlihy --- Red-black trees using Herlihy et al's STM.
+ rb_stm_lock --- Red-black trees using 2-phase-locking STM.
+
+ skip_lock_perlist --- Skip lists with a single global lock.
+ No locking for read operations.
+ skip_lock_pernode --- Skip lists with a lock per node.
+ No locking for read operations.
+ skip_lock_perpointer --- Skip lists with a lock per pointer.
+ No locking for read operations.
+ skip_cas --- Skip lists built directly from CAS.
+ skip_mcas --- Skip lists based on MCAS.
+ skip_stm_fraser --- Skip lists using Fraser's STM.
+ skip_stm_herlihy --- Skip lists using Herlihy et al's STM.
+ skip_stm_lock --- Skip lists using 2-phase-locking STM.
+
+Each executable is run as:
+ <executable> <num_threads> <read_proportion> <key power>
+
+'executable' is one of the above implementations.
+
+'num_threads' indicates the degree of parallelism.
+
+'read_proportion' determines what proportion of the random workload is
+lookups as opposed to updates or removals. The proportion is out of 256.
+
+'key_power' indicates the key range. Key range is 2 ^ 'key_power'.
+Since updates and removals are equally probable, the mean set size
+will be 2 ^ ('key power' - 1).
+
+
+3. Verifying correctness
+------------------------
+To check that each implementation correctly behaves as a 'set' ought
+to, you can define DO_WRITE_LOG in 'set_harness.c'. This will cause
+each implementation to produce a log describing each operation that
+was executed, and its result.
+
+This can be run through 'replay' which will serach for a linearisable
+schedule.
+
+
+4. Distribution license
+-----------------------
+The license is GPL. See the file COPYING for details.
+
+
+ -- Keir Fraser, 25th September 2003
+
+
+****
+
+This software has been released by its original author, Keir Fraser,
+with permission from his advisors, under a BSD license. For details,
+please see README.LICENSE.
+
+ -- Matt Benjamin, 07/24/2009
--- /dev/null
+A note on BSD licensing of the software contained herin.
+
+This software includes software previously released in 2003 under a
+GPL license, but released by the original copyright holder, Keir
+Fraser, under a BSD license, on 5/28/2008.
+
+The chain of electronic mails by which I, on behalf of the OpenAFS
+project, requested and secured the grant of license (BSD terms, as
+stated above) is included below.
+
+--Matt Benjamin <matt@linuxbox.com>
+5/31/2008
+
+--------------------GRANT OF LICENSE--------------------
+
+Return-Path: <Keir.Fraser@eu.citrix.com>
+X-Original-To: matt@linuxbox.com
+Delivered-To: matt@linuxbox.com
+Received: by trosper.private.linuxbox.com (Postfix, from userid 65534)
+ id CA3F6D9CAB01; Wed, 28 May 2008 10:21:08 -0400 (EDT)
+X-Spam-Checker-Version: SpamAssassin 3.1.7 (2006-10-05) on
+ trosper.private.linuxbox.com
+X-Spam-Level:
+X-Spam-Status: No, score=-2.2 required=4.0 tests=BAYES_00,HTML_30_40,
+ HTML_MESSAGE autolearn=disabled version=3.1.7
+Received: from aa.linuxbox.com (linuxbox.com [10.1.1.1])
+ by trosper.private.linuxbox.com (Postfix) with ESMTP id BD4EEC0A1BE1
+ for <matt@linuxbox.com>; Wed, 28 May 2008 10:20:59 -0400 (EDT)
+Received: from SMTP.EU.CITRIX.COM (smtp.eu.citrix.com [62.200.22.115])
+ by aa.linuxbox.com (8.13.1/8.13.1/SuSE Linux 0.7) with ESMTP id m4SEKIT2032434
+ for <matt@linuxbox.com>; Wed, 28 May 2008 10:20:59 -0400
+X-IronPort-AV: E=Sophos;i="4.27,555,1204520400";
+ d="scan'208,217";a="355906"
+Received: from lonpexchmx01.citrite.net ([10.30.224.191])
+ by LONPIPO01.EU.CITRIX.COM with ESMTP; 28 May 2008 10:19:40 -0400
+Received: from [10.80.3.247] ([10.80.3.247]) by lonpexchmx01.citrite.net with Microsoft SMTPSVC(6.0.3790.3959);
+ Wed, 28 May 2008 15:19:40 +0100
+User-Agent: Microsoft-Entourage/11.4.0.080122
+Date: Wed, 28 May 2008 15:19:24 +0100
+Subject: Re: MCAS licensing
+From: Keir Fraser <keir.fraser@eu.citrix.com>
+To: Tim Harris <tim.harris@gmail.com>,
+ Matt Benjamin <matt@linuxbox.com>
+Message-ID: <C463287C.2157D%keir.fraser@eu.citrix.com>
+Thread-Topic: MCAS licensing
+Thread-Index: AcjAzdQjEv1TZSzBEd2wxgAX8io7RQ==
+In-Reply-To: <bf2c48aa0805280707j5009e77cj8a13b3dddf45df3a@mail.gmail.com>
+Mime-version: 1.0
+Content-type: multipart/alternative;
+ boundary="B_3294832767_9324180"
+X-OriginalArrivalTime: 28 May 2008 14:19:40.0162 (UTC) FILETIME=[DDC5DE20:01C8C0CD]
+X-Greylist: Sender is SPF-compliant, not delayed by milter-greylist-2.0.2 (aa.linuxbox.com [134.215.213.37]); Wed, 28 May 2008 10:20:59 -0400 (EDT)
+
+> This message is in MIME format. Since your mail reader does not understand
+this format, some or all of this message may not be legible.
+
+--B_3294832767_9324180
+Content-type: text/plain;
+ charset="ISO-8859-1"
+Content-transfer-encoding: quoted-printable
+
+On 28/5/08 15:07, "Tim Harris" <tim.harris@gmail.com> wrote:
+
+> I'm personally happy making it available under a different license. Howe=
+ver,
+> most of this work (and almost all the actual implementation) was by Keir
+> Fraser, so he'll need to OK it as well.
+> =20
+> Keir, if its OK with you then can we replace the copy of lock-free-lib at
+> http://www.cl.cam.ac.uk/research/srg/netos/lock-free/src/lockfree-lib.tar=
+.gz
+> with one under a BSD-derived license?
+
+I=B9d be happy to relicense, however I=B9m unlikely to take the time to go
+through the tarball changing all references to GPL to refer to a BSD-alike
+license. Add to this the fact I do not have access to my CL account any
+more. However I=B9m happy for the OpenAFS project to take the tarball and do
+anything you like to it. The question then is simply what degree of
+assurance/provability do you require that we will not renege on this
+agreement later. Perhaps this email will suffice? :-)
+
+ -- Keir
+
+
+
+--B_3294832767_9324180
+Content-type: text/html;
+ charset="ISO-8859-1"
+Content-transfer-encoding: quoted-printable
+
+<HTML>
+<HEAD>
+<TITLE>Re: MCAS licensing</TITLE>
+</HEAD>
+<BODY>
+<FONT FACE=3D"Verdana, Helvetica, Arial"><SPAN STYLE=3D'font-size:12.0px'>On 28=
+/5/08 15:07, "Tim Harris" <tim.harris@gmail.com> wrote:<BR>
+<BR>
+</SPAN></FONT><BLOCKQUOTE><FONT FACE=3D"Verdana, Helvetica, Arial"><SPAN STYL=
+E=3D'font-size:12.0px'>I'm personally happy making it available under a differ=
+ent license. However,<BR>
+most of this work (and almost all the actual implementation) was by Keir<BR=
+>
+Fraser, so he'll need to OK it as well.<BR>
+ <BR>
+Keir, if its OK with you then can we replace the copy of lock-free-lib at<B=
+R>
+<FONT COLOR=3D"#0000FF"><U><a href=3D"http://www.cl.cam.ac.uk/research/srg/neto=
+s/lock-free/src/lockfree-lib.tar.gz">http://www.cl.cam.ac.uk/research/srg/ne=
+tos/lock-free/src/lockfree-lib.tar.gz</a><BR>
+</U></FONT>with one under a BSD-derived license?<BR>
+</SPAN></FONT></BLOCKQUOTE><FONT FACE=3D"Verdana, Helvetica, Arial"><SPAN STY=
+LE=3D'font-size:12.0px'><BR>
+I’d be happy to relicense, however I’m unlikely to take the tim=
+e to go through the tarball changing all references to GPL to refer to a BSD=
+-alike license. Add to this the fact I do not have access to my CL account a=
+ny more. However I’m happy for the OpenAFS project to take the tarball=
+ and do anything you like to it. The question then is simply what degree of =
+assurance/provability do you require that we will not renege on this agreeme=
+nt later. Perhaps this email will suffice? :-)<BR>
+<BR>
+ -- Keir<BR>
+<BR>
+</SPAN></FONT>
+</BODY>
+</HTML>
+
+
+--B_3294832767_9324180--
+
+
+Return-Path: <tim.harris@gmail.com>
+X-Original-To: matt@linuxbox.com
+Delivered-To: matt@linuxbox.com
+Received: by trosper.private.linuxbox.com (Postfix, from userid 65534)
+ id ACE2DD9CAAFA; Wed, 28 May 2008 10:08:33 -0400 (EDT)
+X-Spam-Checker-Version: SpamAssassin 3.1.7 (2006-10-05) on
+ trosper.private.linuxbox.com
+X-Spam-Level:
+X-Spam-Status: No, score=-2.2 required=4.0 tests=BAYES_00,HTML_30_40,
+ HTML_MESSAGE autolearn=disabled version=3.1.7
+Received: from aa.linuxbox.com (linuxbox.com [10.1.1.1])
+ by trosper.private.linuxbox.com (Postfix) with ESMTP id 4A423C0A1BE1
+ for <matt@linuxbox.com>; Wed, 28 May 2008 10:08:19 -0400 (EDT)
+Received: from rv-out-0506.google.com (rv-out-0506.google.com [209.85.198.236])
+ by aa.linuxbox.com (8.13.1/8.13.1/SuSE Linux 0.7) with ESMTP id m4SE81ps000627
+ for <matt@linuxbox.com>; Wed, 28 May 2008 10:08:18 -0400
+Received: by rv-out-0506.google.com with SMTP id f6so3345779rvb.53
+ for <matt@linuxbox.com>; Wed, 28 May 2008 07:07:48 -0700 (PDT)
+DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
+ d=gmail.com; s=gamma;
+ h=domainkey-signature:received:received:message-id:date:from:to:subject:cc:in-reply-to:mime-version:content-type:references;
+ bh=WbCNQe38GAVns49oEFgubKUCi6HmETUXNog1K8qsoNo=;
+ b=oQe0Aw3vaqCldpLw+jf3wyBz8Hi6U6JzIH+7ZT9FzFDbp43+NAZoGNWGJH3VR1greg6LrHXEFwpS37c2GuiHhs6y9l5EDYfoJ40eLZxQZmmHDio4NgX+lhSNFQ68CXIr+sbG4kmiLdFjPLrRzFIbUTvGpiXSNirTBNcWgdgmCyI=
+DomainKey-Signature: a=rsa-sha1; c=nofws;
+ d=gmail.com; s=gamma;
+ h=message-id:date:from:to:subject:cc:in-reply-to:mime-version:content-type:references;
+ b=nKi6rs9htpokBrXzI1sGNFdBJe1q8HhxHiBnR241rmGUFnRokBnulqktV3STx+pHJpS4xQj4fcCWyIUAWrbmHjziiz8j5k7E5gWTieDP8MHtZqM049INLp8IxtqT3Rgjp6YJASlWpwFOYwYO5I/CadqhcU3IsD7xCEtiPjIQ1ss=
+Received: by 10.141.20.7 with SMTP id x7mr1206096rvi.82.1211983668495;
+ Wed, 28 May 2008 07:07:48 -0700 (PDT)
+Received: by 10.140.158.5 with HTTP; Wed, 28 May 2008 07:07:48 -0700 (PDT)
+Message-ID: <bf2c48aa0805280707j5009e77cj8a13b3dddf45df3a@mail.gmail.com>
+Date: Wed, 28 May 2008 15:07:48 +0100
+From: "Tim Harris" <tim.harris@gmail.com>
+To: "Matt Benjamin" <matt@linuxbox.com>
+Subject: Re: MCAS licensing
+Cc: keir.fraser@cl.cam.ac.uk
+In-Reply-To: <483D640E.5090502@linuxbox.com>
+MIME-Version: 1.0
+Content-Type: multipart/alternative;
+ boundary="----=_Part_9134_21622973.1211983668503"
+References: <483D640E.5090502@linuxbox.com>
+X-Greylist: Sender is SPF-compliant, not delayed by milter-greylist-2.0.2 (aa.linuxbox.com [134.215.213.37]); Wed, 28 May 2008 10:08:19 -0400 (EDT)
+
+------=_Part_9134_21622973.1211983668503
+Content-Type: text/plain; charset=ISO-8859-1
+Content-Transfer-Encoding: 7bit
+Content-Disposition: inline
+
+Hi,
+
+I'm personally happy making it available under a different license.
+However,
+most of this work (and almost all the actual implementation) was by Keir
+Fraser, so he'll need to OK it as well.
+
+Keir, if its OK with you then can we replace the copy of lock-free-lib at
+http://www.cl.cam.ac.uk/research/srg/netos/lock-free/src/lockfree-lib.tar.gz
+with one under a BSD-derived license?
+
+Thanks,
+
+Tim
+
+
+
+
+On Wed, May 28, 2008 at 2:54 PM, Matt Benjamin <matt@linuxbox.com> wrote:
+
+> -----BEGIN PGP SIGNED MESSAGE-----
+> Hash: SHA256
+>
+> Hi Tim,
+>
+> Thank you (and colleagues) for your work.
+>
+> I work on a large open-source project (OpenAFS) whose license is,
+> unfortunately, not GPL, and not viral. It seems like an outside chance,
+> but we're experimenting with lock-free data structures in some
+> subsystems, and it would be interesting to us if there were a possiblity
+> of getting a product/project-specific license exemption allowing us to
+> use MCAS. (It would be nice to be using a vetted, free library.)
+>
+> Thanks for your consideration,
+>
+> Matt
+>
+> - --
+>
+> Matt Benjamin
+>
+> The Linux Box
+> 206 South Fifth Ave. Suite 150
+> Ann Arbor, MI 48104
+>
+> http://linuxbox.com
+>
+> tel. 734-761-4689
+> fax. 734-769-8938
+> cel. 734-216-5309
+>
+> -----BEGIN PGP SIGNATURE-----
+> Version: GnuPG v1.4.7 (GNU/Linux)
+> Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org
+>
+> iD8DBQFIPWQOJiSUUSaRdSURCAMDAKCH4PPrl5TXtQj20oIZtwYs8p49qwCbBsXp
+> Ha1vEHYNNIW5dxiet2bSFNo=
+> =0ArW
+> -----END PGP SIGNATURE-----
+>
+
+------=_Part_9134_21622973.1211983668503
+Content-Type: text/html; charset=ISO-8859-1
+Content-Transfer-Encoding: 7bit
+Content-Disposition: inline
+
+<div>Hi,</div>
+<div> </div>
+<div>I'm personally happy making it available under a different license. However,</div>
+<div>most of this work (and almost all the actual implementation) was by Keir</div>
+<div>Fraser, so he'll need to OK it as well.</div>
+<div> </div>
+<div>Keir, if its OK with you then can we replace the copy of lock-free-lib at</div>
+<div><a href="http://www.cl.cam.ac.uk/research/srg/netos/lock-free/src/lockfree-lib.tar.gz">http://www.cl.cam.ac.uk/research/srg/netos/lock-free/src/lockfree-lib.tar.gz</a></div>
+<div>with one under a BSD-derived license?</div>
+<div> </div>
+<div>Thanks,</div>
+<div> </div>
+<div>Tim</div>
+<div> </div>
+<div><br><br> </div>
+<div class="gmail_quote">On Wed, May 28, 2008 at 2:54 PM, Matt Benjamin <<a href="mailto:matt@linuxbox.com">matt@linuxbox.com</a>> wrote:<br>
+<blockquote class="gmail_quote" style="PADDING-LEFT: 1ex; MARGIN: 0px 0px 0px 0.8ex; BORDER-LEFT: #ccc 1px solid">-----BEGIN PGP SIGNED MESSAGE-----<br>Hash: SHA256<br><br>Hi Tim,<br><br>Thank you (and colleagues) for your work.<br>
+<br>I work on a large open-source project (OpenAFS) whose license is,<br>unfortunately, not GPL, and not viral. It seems like an outside chance,<br>but we're experimenting with lock-free data structures in some<br>subsystems, and it would be interesting to us if there were a possiblity<br>
+of getting a product/project-specific license exemption allowing us to<br>use MCAS. (It would be nice to be using a vetted, free library.)<br><br>Thanks for your consideration,<br><br>Matt<br><br>- --<br><br>Matt Benjamin<br>
+<br>The Linux Box<br>206 South Fifth Ave. Suite 150<br>Ann Arbor, MI 48104<br><br><a href="http://linuxbox.com/" target="_blank">http://linuxbox.com</a><br><br>tel. 734-761-4689<br>fax. 734-769-8938<br>cel. 734-216-5309<br>
+<br>-----BEGIN PGP SIGNATURE-----<br>Version: GnuPG v1.4.7 (GNU/Linux)<br>Comment: Using GnuPG with Mozilla - <a href="http://enigmail.mozdev.org/" target="_blank">http://enigmail.mozdev.org</a><br><br>iD8DBQFIPWQOJiSUUSaRdSURCAMDAKCH4PPrl5TXtQj20oIZtwYs8p49qwCbBsXp<br>
+Ha1vEHYNNIW5dxiet2bSFNo=<br>=0ArW<br>-----END PGP SIGNATURE-----<br></blockquote></div><br>
+
+------=_Part_9134_21622973.1211983668503--
--- /dev/null
+#ifndef __ALPHA_DEFNS_H__
+#define __ALPHA_DEFNS_H__
+
+#include <c_asm.h>
+#include <alpha/builtins.h>
+#include <pthread.h>
+
+#ifndef ALPHA
+#define ALPHA
+#endif
+
+#define CACHE_LINE_SIZE 64
+
+
+/*
+ * I. Compare-and-swap, fetch-and-store.
+ */
+
+#define FAS32(_x,_n) asm ( \
+ "1: ldl_l %v0, 0(%a0);" \
+ " bis %a1, 0, %t0;" \
+ " stl_c %t0, 0(%a0);" \
+ " beq %t0, 1b;", (_x), (_n))
+#define FAS64(_x,_n) asm ( \
+ "1: ldq_l %v0, 0(%a0);" \
+ " bis %a1, 0, %t0;" \
+ " stq_c %t0, 0(%a0);" \
+ " beq %t0, 1b;", (_x), (_n))
+#define CAS32(_x,_o,_n) asm ( \
+ "1: ldl_l %v0, 0(%a0);" \
+ " cmpeq %v0, %a1, %t0;" \
+ " beq %t0, 3f;" \
+ " bis %a2, 0, %t0;" \
+ " stl_c %t0, 0(%a0);" \
+ " beq %t0, 1b;" \
+ "3:", (_x), (_o), (_n))
+#define CAS64(_x,_o,_n) asm ( \
+ "1: ldq_l %v0, 0(%a0);" \
+ " cmpeq %v0, %a1, %t0;" \
+ " beq %t0, 3f;" \
+ " bis %a2, 0, %t0;" \
+ " stq_c %t0, 0(%a0);" \
+ " beq %t0, 1b;" \
+ "3:", (_x), (_o), (_n))
+#define CAS(_x,_o,_n) ((sizeof (*_x) == 4)?CAS32(_x,_o,_n):CAS64(_x,_o,_n))
+#define FAS(_x,_n) ((sizeof (*_x) == 4)?FAS32(_x,_n) :FAS64(_x,_n))
+/* Update Integer location, return Old value. */
+#define CASIO(_x,_o,_n) CAS(_x,_o,_n)
+#define FASIO(_x,_n) FAS(_x,_n)
+/* Update Pointer location, return Old value. */
+#define CASPO(_x,_o,_n) (void*)CAS((_x),(void*)(_o),(void*)(_n))
+#define FASPO(_x,_n) (void*)FAS((_x),(void*)(_n))
+#define CAS32O CAS32
+#define CAS64O CAS64
+
+/*
+ * II. Memory barriers.
+ * WMB(): All preceding write operations must commit before any later writes.
+ * RMB(): All preceding read operations must commit before any later reads.
+ * MB(): All preceding memory accesses must commit before any later accesses.
+ *
+ * If the compiler does not observe these barriers (but any sane compiler
+ * will!), then VOLATILE should be defined as 'volatile'.
+ */
+
+#define MB() asm("mb")
+#define WMB() asm("wmb")
+#define RMB() (MB())
+#define VOLATILE /*volatile*/
+
+
+/*
+ * III. Cycle counter access.
+ */
+
+#include <sys/time.h>
+typedef unsigned long tick_t;
+#define RDTICK() asm("rpcc %v0")
+
+
+/*
+ * IV. Types.
+ */
+
+typedef unsigned char _u8;
+typedef unsigned short _u16;
+typedef unsigned int _u32;
+typedef unsigned long _u64;
+
+#endif /* __ALPHA_DEFNS_H__ */
--- /dev/null
+/******************************************************************************
+ * bst_lock_fraser.c
+ *
+ * Lock-free binary serach trees (BSTs), based on per-node spinlocks.
+ * Uses threaded tree presentation as described in my PhD dissertation:
+ * "Practical Lock-Freedom", University of Cambridge, 2003.
+ *
+ * Copyright (c) 2002-2003, K A Fraser
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution. Neither the name of the Keir Fraser
+ * nor the names of its contributors may be used to endorse or
+ * promote products derived from this software without specific
+ * prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#define __SET_IMPLEMENTATION__
+
+#include <ucontext.h>
+#include <signal.h>
+#include <stdio.h>
+#include <limits.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdarg.h>
+#include "portable_defns.h"
+#include "gc.h"
+#include "set.h"
+
+#define MARK_THREAD 1
+#define THREAD(_p) ((node_t *)((int_addr_t)(_p)|(MARK_THREAD)))
+#define UNTHREAD(_p) ((node_t *)((int_addr_t)(_p)&~MARK_THREAD))
+#define IS_THREAD(_p) ((int)((int_addr_t)(_p)&MARK_THREAD))
+
+#define IS_GARBAGE(_n) ((_n)->v == NULL)
+
+typedef struct node_st node_t;
+typedef struct set_st set_t;
+
+struct node_st
+{
+ setkey_t k;
+ setval_t v;
+ node_t *l, *r;
+ mcs_lock_t lock;
+};
+
+struct set_st
+{
+ node_t root;
+ node_t sentinel;
+};
+
+static int gc_id;
+
+/* We use these flags to determine whch nodes are currently locked. */
+#define P_LOCKED 0x01
+#define N_LOCKED 0x02
+#define PAL_LOCKED 0x04
+#define PAR_LOCKED 0x08
+#define AL_LOCKED 0x10
+#define AR_LOCKED 0x20
+
+#define LOCK(_n, _qn, _flag) \
+ do { \
+ mcs_lock(&(_n)->lock, &(_qn)); \
+ if ( IS_GARBAGE(_n) ) { \
+ mcs_unlock(&(_n)->lock, &(_qn)); \
+ goto retry; \
+ } \
+ lock_flags |= (_flag); \
+ } while ( 0 )
+
+#define UNLOCK(_n, _qn, _flag) \
+ do { \
+ if ( (lock_flags & (_flag)) ) \
+ mcs_unlock(&(_n)->lock, &(_qn)); \
+ } while ( 0 )
+
+
+/*
+ * Search for node with key == k. Return NULL if none such, else ptr to node.
+ * @ppn is filled in with parent node, or closest leaf if no match.
+ * p and n will both be unmarked and adjacent on return.
+ */
+static node_t *search(set_t *s, setkey_t k, node_t **ppn)
+{
+ node_t *p, *n, *c;
+
+ retry:
+ p = &s->root;
+ n = p->r;
+
+ while ( !IS_THREAD(n) )
+ {
+ if ( k < n->k ) {
+ c = n->l;
+ assert(UNTHREAD(c)->k < n->k);
+ } else if ( k > n->k ) {
+ c = n->r;
+ assert(UNTHREAD(c)->k > n->k);
+ } else /* k == n->k */
+ goto found;
+
+ p = n; n = c;
+ }
+
+ /* Follow final thread, just in case. */
+ c = UNTHREAD(n);
+ if ( k == c->k ) goto followed_thread;
+
+ found:
+ if ( ppn ) *ppn = p;
+ return n;
+
+ followed_thread:
+ if ( ppn ) { RMB(); goto retry; }
+ return c;
+}
+
+
+set_t *set_alloc(void)
+{
+ set_t *s;
+
+ s = malloc(sizeof(*s));
+ mcs_init(&s->root.lock);
+ s->root.k = SENTINEL_KEYMIN;
+ s->root.v = (setval_t)(~0UL);
+ s->root.l = THREAD(&s->root);
+ s->root.r = THREAD(&s->sentinel);
+
+ mcs_init(&s->sentinel.lock);
+ s->sentinel.k = SENTINEL_KEYMAX;
+
+ return s;
+}
+
+
+setval_t set_update(set_t *s, setkey_t k, setval_t v, int overwrite)
+{
+ setval_t ov;
+ node_t *p, *n, *new = NULL;
+ qnode_t qp, qn;
+ ptst_t *ptst;
+ int lock_flags, r = 0;
+
+ k = CALLER_TO_INTERNAL_KEY(k);
+
+ ptst = critical_enter();
+
+ do {
+ ov = NULL;
+ lock_flags = 0;
+
+ n = search(s, k, &p);
+
+ if ( !IS_THREAD(n) )
+ {
+ LOCK(n, qn, N_LOCKED);
+ ov = n->v;
+ if ( overwrite ) n->v = v;
+ }
+ else
+ {
+ if ( new == NULL )
+ {
+ new = gc_alloc(ptst, gc_id);
+ mcs_init(&new->lock);
+ new->k = k;
+ new->v = v;
+ }
+
+ LOCK(p, qp, P_LOCKED);
+
+ if ( p->k < k )
+ {
+ if ( (p->r != n) || (UNTHREAD(n)->k < k) ) goto retry;
+ new->l = THREAD(p);
+ new->r = n;
+ WMB();
+ p->r = new;
+ }
+ else
+ {
+ if ( (p->l != n) || (UNTHREAD(n)->k > k) ) goto retry;
+ new->l = n;
+ new->r = THREAD(p);
+ WMB();
+ p->l = new;
+ }
+
+ new = NULL; /* node is now in tree */
+ }
+
+ r = 1; /* success */
+
+ retry:
+ UNLOCK(p, qp, P_LOCKED);
+ UNLOCK(n, qn, N_LOCKED);
+ }
+ while ( !r );
+
+ if ( new ) gc_free(ptst, new, gc_id);
+ critical_exit(ptst);
+ return ov;
+}
+
+
+#define FIND_HELPER(_d1, _d2, _n, _ap, _a) \
+{ \
+ node_t *ac; \
+ (_ap) = NULL; \
+ (_a) = (_n); \
+ ac = (_a)->_d1; \
+ while ( !IS_THREAD(ac) ) \
+ { \
+ (_ap) = (_a); \
+ (_a) = ac; \
+ ac = (_a)->_d2; \
+ } \
+}
+
+
+/*
+ * Order of first two cases does matter! If @n is the left-link of @p, then
+ * we use DELETE_HELPER(l, r). What matters is what we do when @n is a leaf.
+ * In this case we end up choosing n->l to propagate to p->l -- this
+ * happens to be the correct choice :-)
+ *
+ * NB. Note symmetric deletion cases dependent on parameter @dir. We
+ * could simplify the algorithm by always following one direction. In fact,
+ * that is slightly worse, or much worse, depending on the chosen case
+ * (hint: works best with dir hardwired to zero :-)....
+ */
+#define dir 0
+#define DELETE_HELPER(_d1, _d2) \
+ FIND_HELPER(_d1, _d2, n, pal, al); \
+ FIND_HELPER(_d2, _d1, n, par, ar); \
+ if ( IS_THREAD(n ## _d2) ) \
+ { \
+ if ( IS_THREAD(n ## _d1) ) \
+ { \
+ *p_pc = n ## _d1; \
+ } \
+ else \
+ { \
+ LOCK(al, qal, AL_LOCKED); \
+ if ( al->_d2 != THREAD(n) ) goto retry; \
+ *p_pc = n ## _d1; \
+ al->_d2 = n ## _d2; \
+ } \
+ } \
+ else if ( IS_THREAD(n ## _d1) ) \
+ { \
+ LOCK(ar, qar, AR_LOCKED); \
+ if ( ar->_d1 != THREAD(n) ) goto retry; \
+ *p_pc = n ## _d2; \
+ ar->_d1 = n ## _d1; \
+ } \
+ else if ( dir ) \
+ { \
+ if ( par != n ) \
+ { \
+ LOCK(par, qpar, PAR_LOCKED); \
+ if ( par->_d1 != ar ) goto retry; \
+ } \
+ LOCK(al, qal, AL_LOCKED); \
+ LOCK(ar, qar, AR_LOCKED); \
+ if ( (al->_d2 != THREAD(n)) || (ar->_d1 != THREAD(n)) ) goto retry; \
+ al->_d2 = THREAD(ar); \
+ ar->_d1 = n ## _d1; \
+ if ( par != n ) \
+ { \
+ ac = ar->_d2; \
+ ar->_d2 = n ## _d2; \
+ par->_d1 = IS_THREAD(ac) ? THREAD(ar) : ac; \
+ } \
+ WMB(); /* New links in AR must appear before it is raised. */ \
+ *p_pc = ar; \
+ } \
+ else \
+ { \
+ if ( pal != n ) \
+ { \
+ LOCK(pal, qpal, PAL_LOCKED); \
+ if ( pal->_d2 != al ) goto retry; \
+ } \
+ LOCK(al, qal, AL_LOCKED); \
+ LOCK(ar, qar, AR_LOCKED); \
+ if ( (al->_d2 != THREAD(n)) || (ar->_d1 != THREAD(n)) ) goto retry; \
+ al->_d2 = n ## _d2; \
+ ar->_d1 = THREAD(al); \
+ if ( pal != n ) \
+ { \
+ ac = al->_d1; \
+ al->_d1 = n ## _d1; \
+ pal->_d2 = IS_THREAD(ac) ? THREAD(al) : ac; \
+ } \
+ WMB(); /* New links in AL must appear before it is raised. */ \
+ *p_pc = al; \
+ }
+
+
+/* @k: key of node to be deleted */
+setval_t set_remove(set_t *s, setkey_t k)
+{
+ node_t *p, *n, *nl, *nr, *al, *ar, *pal, *par, *ac, **p_pc;
+ qnode_t qp, qn, qal, qar, qpal, qpar;
+ int r = 0, lock_flags;
+ setval_t v;
+ ptst_t *ptst;
+
+ k = CALLER_TO_INTERNAL_KEY(k);
+
+ ptst = critical_enter();
+
+ do {
+ v = NULL;
+ lock_flags = 0;
+
+ n = search(s, k, &p);
+ if ( IS_THREAD(n) ) goto out;
+
+ LOCK(p, qp, P_LOCKED);
+ p_pc = (p->k > n->k) ? &p->l : &p->r;
+ if ( *p_pc != n ) goto retry;
+
+ LOCK(n, qn, N_LOCKED);
+
+ nl = n->l;
+ nr = n->r;
+
+ if ( p->k > n->k )
+ {
+ /* @n is leftwards link from @p. */
+ DELETE_HELPER(l, r);
+ }
+ else
+ {
+ /* @n is rightwards link from @p. */
+ DELETE_HELPER(r, l);
+ }
+
+ r = 1;
+ v = n->v;
+ n->v = NULL;
+
+ retry:
+ UNLOCK(p, qp, P_LOCKED);
+ UNLOCK(n, qn, N_LOCKED);
+ UNLOCK(pal, qpal, PAL_LOCKED);
+ UNLOCK(par, qpar, PAR_LOCKED);
+ UNLOCK(al, qal, AL_LOCKED);
+ UNLOCK(ar, qar, AR_LOCKED);
+ }
+ while ( !r );
+
+ gc_free(ptst, n, gc_id);
+
+ out:
+ critical_exit(ptst);
+ return v;
+}
+
+
+setval_t set_lookup(set_t *s, setkey_t k)
+{
+ node_t *n;
+ setval_t v;
+ ptst_t *ptst;
+
+ k = CALLER_TO_INTERNAL_KEY(k);
+
+ ptst = critical_enter();
+
+ n = search(s, k, NULL);
+ v = (!IS_THREAD(n)) ? n->v : NULL;
+
+ critical_exit(ptst);
+ return v;
+}
+
+
+void _init_set_subsystem(void)
+{
+ gc_id = gc_add_allocator(sizeof(node_t));
+}
--- /dev/null
+/******************************************************************************
+ * bst_lock_kung.c
+ *
+ * Lock-based binary search trees (BSTs), based on:
+ * H. T. Kung and Philip L. Lehman.
+ * "Concurrent manipulation of binary search trees".
+ * ACM Tranactions on Database Systems, Vol. 5, No. 3, September 1980.
+ *
+ * Copyright (c) 2002-2003, K A Fraser
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution. Neither the name of the Keir Fraser
+ * nor the names of its contributors may be used to endorse or
+ * promote products derived from this software without specific
+ * prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define __SET_IMPLEMENTATION__
+
+#include <ucontext.h>
+#include <signal.h>
+#include <stdio.h>
+#include <limits.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdarg.h>
+#include "portable_defns.h"
+#include "gc.h"
+#include "set.h"
+
+#define IS_BLUE(_n) ((int)(_n)->v & 1)
+#define MK_BLUE(_n) ((_n)->v = (setval_t)((unsigned long)(_n)->v | 1))
+
+#define GET_VALUE(_n) ((setval_t)((unsigned long)(_n)->v & ~1UL))
+
+#define LEFT 0
+#define RIGHT 1
+#define FOLLOW(_n, _d) ((_d) ? (_n)->r : (_n)->l)
+#define UPDATE(_n, _d, _x) ((_d) ? ((_n)->r = (_x)) : ((_n)->l = (_x)))
+#define FLIP(_d) ((_d)^1)
+
+typedef struct node_st node_t;
+typedef struct set_st set_t;
+
+struct node_st
+{
+ setkey_t k;
+ setval_t v;
+ node_t *l, *r, *p;
+ mcs_lock_t lock;
+};
+
+struct set_st
+{
+ node_t root;
+};
+
+static int gc_id;
+
+#define LOCK(_n, _pqn) mcs_lock(&(_n)->lock, (_pqn))
+#define UNLOCK(_n, _pqn) mcs_unlock(&(_n)->lock, (_pqn))
+
+
+static node_t *weak_find(node_t *n, setkey_t k)
+{
+ while ( n != NULL )
+ {
+ if ( n->k < k )
+ n = n->r;
+ else if ( n->k > k )
+ n = n->l;
+ else
+ break;
+ }
+ return n;
+}
+
+
+static node_t *find(node_t *n, setkey_t k, qnode_t *qn, int *pdir)
+{
+ int dir;
+ node_t *f, *s;
+
+ s = n;
+
+ do {
+ f = s;
+ retry:
+ if ( k < f->k )
+ {
+ dir = LEFT;
+ s = f->l;
+ }
+ else
+ {
+ dir = RIGHT;
+ s = f->r;
+ }
+ }
+ while ( (s != NULL) && (s->k != k) );
+
+ LOCK(f, qn);
+ if ( IS_BLUE(f) )
+ {
+ UNLOCK(f, qn);
+ f = f->p;
+ goto retry;
+ }
+ if ( s != FOLLOW(f, dir) )
+ {
+ UNLOCK(f, qn);
+ goto retry;
+ }
+
+ *pdir = dir;
+ return f;
+}
+
+
+static node_t *rotate(ptst_t *ptst, node_t *a, int dir1,
+ int dir2, node_t **pc, qnode_t *pqn[])
+{
+ node_t *b = FOLLOW(a, dir1), *c = FOLLOW(b, dir2);
+ node_t *bp = gc_alloc(ptst, gc_id), *cp = gc_alloc(ptst, gc_id);
+ qnode_t c_qn;
+
+ LOCK(c, &c_qn);
+
+ memcpy(bp, b, sizeof(*b));
+ memcpy(cp, c, sizeof(*c));
+
+ mcs_init(&bp->lock);
+ mcs_init(&cp->lock);
+
+ LOCK(bp, pqn[3]);
+ LOCK(cp, pqn[2]);
+
+ assert(!IS_BLUE(a));
+ assert(!IS_BLUE(b));
+ assert(!IS_BLUE(c));
+
+ UPDATE(cp, FLIP(dir2), bp);
+ UPDATE(bp, dir2, FOLLOW(c, FLIP(dir2)));
+
+ UPDATE(a, dir1, cp);
+ b->p = a;
+ MK_BLUE(b);
+ c->p = cp;
+ MK_BLUE(c);
+
+ gc_free(ptst, b, gc_id);
+ gc_free(ptst, c, gc_id);
+
+ UNLOCK(a, pqn[0]);
+ UNLOCK(b, pqn[1]);
+ UNLOCK(c, &c_qn);
+
+ *pc = bp;
+ return cp;
+}
+
+
+static void _remove(ptst_t *ptst, node_t *a, int dir1, int dir2, qnode_t **pqn)
+{
+ node_t *b = FOLLOW(a, dir1), *c = FOLLOW(b, dir2);
+ assert(FOLLOW(b, FLIP(dir2)) == NULL);
+ assert(!IS_BLUE(a));
+ assert(!IS_BLUE(b));
+ UPDATE(a, dir1, c);
+ UPDATE(b, FLIP(dir2), c);
+ b->p = a;
+ MK_BLUE(b);
+ gc_free(ptst, b, gc_id);
+ UNLOCK(a, pqn[0]);
+ UNLOCK(b, pqn[1]);
+}
+
+
+static void delete_by_rotation(ptst_t *ptst, node_t *f, int dir,
+ qnode_t *pqn[], int lock_idx)
+{
+ node_t *g, *h, *s = FOLLOW(f, dir);
+
+ if ( s->v != NULL )
+ {
+ UNLOCK(f, pqn[lock_idx+0]);
+ UNLOCK(s, pqn[lock_idx+1]);
+ return;
+ }
+
+ if ( s->l == NULL )
+ _remove(ptst, f, dir, RIGHT, pqn+lock_idx);
+ else if ( s->r == NULL )
+ _remove(ptst, f, dir, LEFT, pqn+lock_idx);
+ else
+ {
+ g = rotate(ptst, f, dir, LEFT, &h, pqn+lock_idx);
+ lock_idx ^= 2;
+ if ( h->l == NULL )
+ {
+ assert(h->v == NULL);
+ _remove(ptst, g, RIGHT, RIGHT, pqn+lock_idx);
+ }
+ else
+ {
+ delete_by_rotation(ptst, g, RIGHT, pqn, lock_idx);
+ LOCK(f, pqn[0]);
+ if ( (g != FOLLOW(f, dir)) || IS_BLUE(f) )
+ {
+ UNLOCK(f, pqn[0]);
+ }
+ else
+ {
+ LOCK(g, pqn[1]);
+ /*
+ * XXX Check that there is a node H to be rotated up.
+ * This is missing from the original paper, and must surely
+ * be a bug (we lost all locks at previous delete_by_rotation,
+ * so we can't know the existence of G's children).
+ */
+ if ( g->r != NULL )
+ {
+ g = rotate(ptst, f, dir, RIGHT, &h, pqn);
+ UNLOCK(g, pqn[2]);
+ UNLOCK(h, pqn[3]);
+ }
+ else
+ {
+ UNLOCK(f, pqn[0]);
+ UNLOCK(g, pqn[1]);
+ }
+ }
+ }
+ }
+}
+
+
+set_t *set_alloc(void)
+{
+ set_t *s;
+
+ s = malloc(sizeof(*s));
+ mcs_init(&s->root.lock);
+ s->root.k = SENTINEL_KEYMIN;
+ s->root.v = (setval_t)(~1UL); /* dummy root node is white. */
+ s->root.l = NULL;
+ s->root.r = NULL;
+
+ return s;
+}
+
+
+setval_t set_update(set_t *s, setkey_t k, setval_t v, int overwrite)
+{
+ node_t *f, *w;
+ qnode_t f_qn, w_qn;
+ int dir;
+ setval_t ov = NULL;
+ ptst_t *ptst;
+
+ k = CALLER_TO_INTERNAL_KEY(k);
+
+ ptst = critical_enter();
+
+ retry:
+ f = find(&s->root, k, &f_qn, &dir);
+
+ if ( (w = FOLLOW(f, dir)) != NULL )
+ {
+ /* Protected by parent lock. */
+ assert(!IS_BLUE(w));
+ ov = w->v;
+ if ( overwrite || (ov == NULL) ) w->v = v;
+ }
+ else
+ {
+ w = gc_alloc(ptst, gc_id);
+ w->l = NULL;
+ w->r = NULL;
+ w->v = v;
+ w->k = k;
+ mcs_init(&w->lock);
+ UPDATE(f, dir, w);
+ }
+
+ UNLOCK(f, &f_qn);
+
+ critical_exit(ptst);
+
+ return ov;
+}
+
+
+setval_t set_remove(set_t *s, setkey_t k)
+{
+ node_t *f, *w;
+ qnode_t qn[4], *pqn[] = { qn+0, qn+1, qn+2, qn+3, qn+0, qn+1 };
+ int dir;
+ setval_t v = NULL;
+ ptst_t *ptst;
+
+ k = CALLER_TO_INTERNAL_KEY(k);
+
+ ptst = critical_enter();
+
+ f = find(&s->root, k, pqn[0], &dir);
+ if ( (w = FOLLOW(f, dir)) != NULL )
+ {
+ LOCK(w, pqn[1]);
+ v = w->v;
+ w->v = NULL;
+ assert(!IS_BLUE(w));
+ delete_by_rotation(ptst, f, dir, pqn, 0);
+ }
+ else
+ {
+ UNLOCK(f, pqn[0]);
+ }
+
+ critical_exit(ptst);
+
+ return v;
+}
+
+
+setval_t set_lookup(set_t *s, setkey_t k)
+{
+ node_t *n;
+ setval_t v = NULL;
+ ptst_t *ptst;
+
+ k = CALLER_TO_INTERNAL_KEY(k);
+
+ ptst = critical_enter();
+
+ n = weak_find(&s->root, k);
+ if ( n != NULL ) v = GET_VALUE(n);
+
+ critical_exit(ptst);
+ return v;
+}
+
+
+void _init_set_subsystem(void)
+{
+ gc_id = gc_add_allocator(sizeof(node_t));
+}
--- /dev/null
+/******************************************************************************
+ * bst_lock_manber.c
+ *
+ * Lock-based binary search trees (BSTs), based on:
+ * Udi Manber and Richard E. Ladner.
+ * "Concurrency control in a dynamic search structure".
+ * ACM Transactions on Database Systems, Vol. 9, No. 3, September 1984.
+ *
+ * Copyright (c) 2002-2003, K A Fraser
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution. Neither the name of the Keir Fraser
+ * nor the names of its contributors may be used to endorse or
+ * promote products derived from this software without specific
+ * prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define __SET_IMPLEMENTATION__
+
+#include <ucontext.h>
+#include <signal.h>
+#include <stdio.h>
+#include <limits.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdarg.h>
+#include "portable_defns.h"
+#include "gc.h"
+#include "set.h"
+
+#define GARBAGE_FLAG 1
+#define REDUNDANT_FLAG 2
+
+#define IS_GARBAGE(_n) ((int)(_n)->v & GARBAGE_FLAG)
+#define MK_GARBAGE(_n) \
+ ((_n)->v = (setval_t)((unsigned long)(_n)->v | GARBAGE_FLAG))
+
+#define IS_REDUNDANT(_n) ((int)(_n)->v & REDUNDANT_FLAG)
+#define MK_REDUNDANT(_n) \
+ ((_n)->v = (setval_t)((unsigned long)(_n)->v | REDUNDANT_FLAG))
+
+#define GET_VALUE(_n) ((setval_t)((unsigned long)(_n)->v & ~3UL))
+
+#define FOLLOW(_n, _k) (((_n)->k < (_k)) ? (_n)->r : (_n)->l)
+
+typedef struct node_st node_t;
+typedef struct set_st set_t;
+
+struct node_st
+{
+ setkey_t k;
+ setval_t v;
+ node_t *l, *r, *p;
+ int copy;
+ mcs_lock_t lock;
+};
+
+struct set_st
+{
+ node_t root;
+};
+
+static int gc_id, hook_id;
+
+#define LOCK(_n, _pqn) mcs_lock(&(_n)->lock, (_pqn))
+#define UNLOCK(_n, _pqn) mcs_unlock(&(_n)->lock, (_pqn))
+
+
+static node_t *weak_search(node_t *n, setkey_t k)
+{
+ while ( (n != NULL) && (n->k != k) ) n = FOLLOW(n, k);
+ return n;
+}
+
+
+static node_t *strong_search(node_t *n, setkey_t k, qnode_t *qn)
+{
+ node_t *b = n;
+ node_t *a = FOLLOW(b, k);
+
+ retry:
+ while ( (a != NULL) && (a->k != k) )
+ {
+ b = a;
+ a = FOLLOW(a, k);
+ }
+
+ if ( a == NULL )
+ {
+ LOCK(b, qn);
+ if ( IS_GARBAGE(b) )
+ {
+ UNLOCK(b, qn);
+ a = b->p;
+ goto retry;
+ }
+ else if ( (a = FOLLOW(b, k)) != NULL )
+ {
+ UNLOCK(b, qn);
+ goto retry;
+ }
+
+ a = b;
+ }
+ else
+ {
+ LOCK(a, qn);
+ if ( IS_GARBAGE(a) )
+ {
+ UNLOCK(a, qn);
+ a = a->p;
+ goto retry;
+ }
+ else if ( IS_REDUNDANT(a) )
+ {
+ UNLOCK(a, qn);
+ a = a->r;
+ goto retry;
+ }
+ }
+
+ return a;
+}
+
+
+static void redundancy_removal(ptst_t *ptst, void *x)
+{
+ node_t *d, *e, *r;
+ qnode_t d_qn, e_qn;
+ setkey_t k;
+
+ if ( x == NULL ) return;
+
+ e = x;
+ k = e->k;
+
+ if ( e->copy )
+ {
+ r = weak_search(e->l, k);
+ assert((r == NULL) || !IS_REDUNDANT(r) || (r->r == e));
+ assert(r != e);
+ redundancy_removal(ptst, r);
+ }
+
+ do {
+ if ( IS_GARBAGE(e) ) return;
+ d = e->p;
+ LOCK(d, &d_qn);
+ if ( IS_GARBAGE(d) ) UNLOCK(d, &d_qn);
+ }
+ while ( IS_GARBAGE(d) );
+
+ LOCK(e, &e_qn);
+
+ if ( IS_GARBAGE(e) || !IS_REDUNDANT(e) ) goto out_de;
+
+ if ( d->l == e )
+ {
+ d->l = e->l;
+ }
+ else
+ {
+ assert(d->r == e);
+ d->r = e->l;
+ }
+
+ assert(e->r != NULL);
+ assert(e->r->k == k);
+ assert(e->r->copy);
+ assert(!IS_GARBAGE(e->r));
+ assert(!e->copy);
+
+ MK_GARBAGE(e);
+
+ if ( e->l != NULL ) e->l->p = d;
+
+ e->r->copy = 0;
+
+ gc_free(ptst, e, gc_id);
+
+ out_de:
+ UNLOCK(d, &d_qn);
+ UNLOCK(e, &e_qn);
+}
+
+
+/* NB. Node X is not locked on entry. */
+static void predecessor_substitution(ptst_t *ptst, set_t *s, node_t *x)
+{
+ node_t *a, *b, *e, *f, **pac;
+ qnode_t a_qn, b_qn, e_qn, f_qn;
+ setkey_t k;
+
+ b = x;
+ k = x->k;
+
+ do {
+ if ( (b == NULL) || (b->v != NULL) ) return;
+ a = b->p;
+ LOCK(a, &a_qn);
+ if ( IS_GARBAGE(a) ) UNLOCK(a, &a_qn);
+ }
+ while ( IS_GARBAGE(a) );
+
+ regain_lock:
+ LOCK(b, &b_qn);
+
+ /*
+ * We do nothing if:
+ * 1. The node is already deleted (and is thus garbage); or
+ * 2. The node is redundant (redundancy removal will do it); or
+ * 3. The node has been reused.
+ * These can all be checked by looking at the value field.
+ */
+ if ( b->v != NULL ) goto out_ab;
+
+ /*
+ * If this node is a copy, then we can do redundancy removal right now.
+ * This is an improvement over Manber and Ladner's work.
+ */
+ if ( b->copy )
+ {
+ e = weak_search(b->l, k);
+ UNLOCK(b, &b_qn);
+ assert((e == NULL) || !IS_REDUNDANT(e) || (e->r == b));
+ assert(e != b);
+ redundancy_removal(ptst, e);
+ goto regain_lock;
+ }
+
+ pac = (a->k < k) ? &a->r : &a->l;
+ assert(*pac == b);
+ assert(b->p == a);
+
+ if ( (b->l == NULL) || (b->r == NULL) )
+ {
+ if ( b->r == NULL ) *pac = b->l; else *pac = b->r;
+ MK_GARBAGE(b);
+ if ( *pac != NULL ) (*pac)->p = a;
+ gc_free(ptst, b, gc_id);
+ goto out_ab;
+ }
+ else
+ {
+ e = strong_search(b->l, b->k, &e_qn);
+ assert(!IS_REDUNDANT(e) && !IS_GARBAGE(e) && (b != e));
+ assert(e->k < b->k);
+ f = gc_alloc(ptst, gc_id);
+ f->k = e->k;
+ f->v = GET_VALUE(e);
+ f->copy = 1;
+ f->r = b->r;
+ f->l = b->l;
+ mcs_init(&f->lock);
+ LOCK(f, &f_qn);
+
+ e->r = f;
+ MK_REDUNDANT(e);
+ *pac = f;
+ f->p = a;
+ f->r->p = f;
+ f->l->p = f;
+
+ MK_GARBAGE(b);
+ gc_free(ptst, b, gc_id);
+ gc_add_ptr_to_hook_list(ptst, e, hook_id);
+ UNLOCK(e, &e_qn);
+ UNLOCK(f, &f_qn);
+ }
+
+ out_ab:
+ UNLOCK(a, &a_qn);
+ UNLOCK(b, &b_qn);
+}
+
+
+set_t *set_alloc(void)
+{
+ set_t *s;
+
+ s = malloc(sizeof(*s));
+ mcs_init(&s->root.lock);
+ s->root.k = SENTINEL_KEYMIN;
+ /* Dummy root isn't redundant, nor is it garbage. */
+ s->root.v = (setval_t)(~3UL);
+ s->root.l = NULL;
+ s->root.r = NULL;
+ s->root.p = NULL;
+ s->root.copy = 0;
+
+ return s;
+}
+
+
+setval_t set_update(set_t *s, setkey_t k, setval_t v, int overwrite)
+{
+ node_t *a, *new;
+ qnode_t qn;
+ setval_t ov = NULL;
+ ptst_t *ptst;
+
+ k = CALLER_TO_INTERNAL_KEY(k);
+
+ ptst = critical_enter();
+
+ a = strong_search(&s->root, k, &qn);
+ if ( a->k != k )
+ {
+ new = gc_alloc(ptst, gc_id);
+ mcs_init(&new->lock);
+ new->k = k;
+ new->v = v;
+ new->l = NULL;
+ new->r = NULL;
+ new->p = a;
+ new->copy = 0;
+ if ( a->k < k ) a->r = new; else a->l = new;
+ }
+ else
+ {
+ /* Direct A->V access is okay, as A isn't garbage or redundant. */
+ ov = a->v;
+ if ( overwrite || (ov == NULL) ) a->v = v;
+ }
+
+ UNLOCK(a, &qn);
+
+ critical_exit(ptst);
+
+ return ov;
+}
+
+
+setval_t set_remove(set_t *s, setkey_t k)
+{
+ node_t *a;
+ qnode_t qn;
+ setval_t v = NULL;
+ ptst_t *ptst;
+
+ k = CALLER_TO_INTERNAL_KEY(k);
+
+ ptst = critical_enter();
+
+ a = strong_search(&s->root, k, &qn);
+ /* Direct check of A->V is okay, as A isn't garbage or redundant. */
+ if ( (a->k == k) && (a->v != NULL) )
+ {
+ v = a->v;
+ a->v = NULL;
+ UNLOCK(a, &qn);
+ predecessor_substitution(ptst, s, a);
+ }
+ else
+ {
+ UNLOCK(a, &qn);
+ }
+
+ critical_exit(ptst);
+
+ return v;
+}
+
+
+setval_t set_lookup(set_t *s, setkey_t k)
+{
+ node_t *n;
+ setval_t v = NULL;
+ ptst_t *ptst;
+
+ k = CALLER_TO_INTERNAL_KEY(k);
+
+ ptst = critical_enter();
+
+ n = weak_search(&s->root, k);
+ if ( n != NULL ) v = GET_VALUE(n);
+
+ critical_exit(ptst);
+ return v;
+}
+
+
+void _init_set_subsystem(void)
+{
+ gc_id = gc_add_allocator(sizeof(node_t));
+ hook_id = gc_add_hook(redundancy_removal);
+}
--- /dev/null
+/******************************************************************************
+ * bst_mcas.c
+ *
+ * Lock-free binary search trees (BSTs), based on MCAS.
+ * Uses a threaded representation to synchronise searches with deletions.
+ *
+ * Copyright (c) 2002-2003, K A Fraser
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution. Neither the name of the Keir Fraser
+ * nor the names of its contributors may be used to endorse or
+ * promote products derived from this software without specific
+ * prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define __SET_IMPLEMENTATION__
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include "portable_defns.h"
+#include "gc.h"
+#include "set.h"
+
+/* Allow MCAS marks to be detected using a single bitop (see IS_MCAS_OWNED). */
+#define MARK_IN_PROGRESS 2
+#define MARK_PTR_TO_CD 3
+
+#define MARK_THREAD 1
+#define MARK_GARBAGE 4
+
+#define THREAD(_p) ((node_t *)((int_addr_t)(_p)|(MARK_THREAD)))
+#define GARBAGE(_p) ((node_t *)((int_addr_t)(_p)|(MARK_GARBAGE)))
+#define UNTHREAD(_p) ((node_t *)((int_addr_t)(_p)&~MARK_THREAD))
+#define UNGARBAGE(_p) ((node_t *)((int_addr_t)(_p)&~MARK_GARBAGE))
+/* Following only matches 2 and 3 (mod 4). Those happen to be MCAS marks :) */
+#define IS_MCAS_OWNED(_p) ((int)((int_addr_t)(_p)&2))
+/* Matches 1 and 3 (mod 4). So only use if the ref is *not* owned by MCAS!! */
+#define IS_THREAD(_p) ((int)((int_addr_t)(_p)&MARK_THREAD))
+/* Only use if the ref is *not* owned by MCAS (which may use bit 2)!! */
+#define IS_GARBAGE(_p) ((int)((int_addr_t)(_p)&MARK_GARBAGE))
+
+#include "mcas.c"
+
+typedef struct node_st node_t;
+typedef struct set_st set_t;
+
+struct node_st
+{
+ setkey_t k;
+ setval_t v;
+ node_t *l, *r;
+};
+
+struct set_st
+{
+ node_t root;
+ node_t sentinel;
+};
+
+static int gc_id;
+
+#define READ_LINK(_var, _link) \
+ do { \
+ (_var) = (_link); \
+ if ( !IS_MCAS_OWNED(_var) ) break; \
+ mcas_fixup((void **)&(_link), (_var)); \
+ } while ( 1 )
+
+#define WEAK_READ_LINK(_var, _link) \
+ do { \
+ READ_LINK(_var, _link); \
+ (_var) = UNGARBAGE(_var); \
+ } while ( 0 )
+
+#define STRONG_READ_LINK(_var, _link) \
+ do { \
+ READ_LINK(_var, _link); \
+ if ( IS_GARBAGE(_var) ) goto retry; \
+ } while ( 0 )
+
+#define PROCESS_VAL(_v,_pv) \
+ do { \
+ while ( IS_MCAS_OWNED(_v) ) \
+ { \
+ mcas_fixup((void **)(_pv), (_v)); \
+ (_v) = *(_pv); \
+ } \
+ } while ( 0 )
+
+
+/*
+ * Search for node with key == k. Return NULL if none such, else ptr to node.
+ * @ppn is filled in with parent node, or closest leaf if no match.
+ * p and n will both be unmarked and adjacent on return.
+ */
+static node_t *search(set_t *s, setkey_t k, node_t **ppn)
+{
+ node_t *p, *n, *c;
+
+ retry:
+ p = &s->root;
+ WEAK_READ_LINK(n, p->r);
+
+ while ( !IS_THREAD(n) )
+ {
+ if ( k < n->k ) {
+ WEAK_READ_LINK(c, n->l);
+ assert(UNTHREAD(c)->k < n->k);
+ } else if ( k > n->k ) {
+ WEAK_READ_LINK(c, n->r);
+ assert(UNTHREAD(c)->k > n->k);
+ } else /* k == n->k */
+ goto found;
+
+ p = n; n = c;
+ }
+
+ /* Follow final thread, just in case. */
+ c = UNTHREAD(n);
+ if ( k == c->k ) goto followed_thread;
+
+ found:
+ if ( ppn ) *ppn = p;
+ return n;
+
+ followed_thread:
+ if ( ppn ) { RMB(); goto retry; }
+ return c;
+}
+
+
+set_t *set_alloc(void)
+{
+ set_t *s;
+
+ static int mcas_inited = 0;
+ if ( !CASIO(&mcas_inited, 0, 1) )
+ {
+ if ( (sizeof(node_t) % 8) != 0 )
+ {
+ fprintf(stderr, "FATAL: node_t must be multiple of 8 bytes\n");
+ *((int*)0)=0;
+ }
+ mcas_init();
+ }
+
+ s = malloc(sizeof(*s));
+ s->root.k = SENTINEL_KEYMIN;
+ s->root.v = NULL;
+ s->root.l = THREAD(&s->root);
+ s->root.r = THREAD(&s->sentinel);
+
+ s->sentinel.k = SENTINEL_KEYMAX;
+
+ return s;
+}
+
+
+setval_t set_update(set_t *s, setkey_t k, setval_t v, int overwrite)
+{
+ setval_t ov, nov;
+ node_t *p, *n, *new = NULL, **ppc;
+ ptst_t *ptst;
+
+ k = CALLER_TO_INTERNAL_KEY(k);
+
+ ptst = critical_enter();
+
+ do {
+ retry:
+ ov = NULL;
+
+ n = search(s, k, &p);
+ if ( !IS_THREAD(n) )
+ {
+ /* Already a @k node in the set: update its mapping. */
+ nov = n->v;
+ do {
+ ov = nov;
+ PROCESS_VAL(ov, &n->v);
+ if ( ov == NULL ) goto retry;
+ }
+ while ( overwrite && ((nov = CASPO(&n->v, ov, v)) != ov) );
+
+ goto out;
+ }
+
+ if ( new == NULL )
+ {
+ new = gc_alloc(ptst, gc_id);
+ new->k = k;
+ new->v = v;
+ }
+
+ if ( p->k < k )
+ {
+ /* Ensure we insert in the correct interval. */
+ if ( UNTHREAD(n)->k < k ) goto retry;
+ new->l = THREAD(p);
+ new->r = n;
+ ppc = &p->r;
+ }
+ else
+ {
+ if ( UNTHREAD(n)->k > k ) goto retry;
+ new->l = n;
+ new->r = THREAD(p);
+ ppc = &p->l;
+ }
+
+ WMB_NEAR_CAS();
+ }
+ while ( CASPO(ppc, n, new) != n );
+
+ new = NULL;
+
+ out:
+ if ( new ) gc_free(ptst, new, gc_id);
+ critical_exit(ptst);
+ return ov;
+}
+
+
+#define FIND_HELPER(_d1, _d2, _n, _ap, _a) \
+{ \
+ node_t *ac; \
+ (_ap) = NULL; \
+ (_a) = (_n); \
+ WEAK_READ_LINK(ac, (_a)->_d1); \
+ while ( !IS_THREAD(ac) ) \
+ { \
+ (_ap) = (_a); \
+ (_a) = ac; \
+ WEAK_READ_LINK(ac, (_a)->_d2); \
+ } \
+}
+
+
+/*
+ * Order of first two cases does matter! If @n is the left-link of @p, then
+ * we use DELETE_HELPER(l, r). What matters is what we do when @n is a leaf.
+ * In this case we end up choosing n->l to propagate to p->l -- this
+ * happens to be the correct choice :-)
+ *
+ * NB. Note symmetric deletion cases dependent on parameter @dir. We
+ * could simplify the algorithm by always following one direction. In fact,
+ * that is slightly worse, or much worse, depending on the chosen case
+ * (hint: works best with dir hardwired to zero :-)....
+ */
+#define dir 0
+#define DELETE_HELPER(_d1, _d2) \
+ FIND_HELPER(_d1, _d2, n, pal, al); \
+ FIND_HELPER(_d2, _d1, n, par, ar); \
+ if ( IS_THREAD(n ## _d2) ) \
+ { \
+ if ( IS_THREAD(n ## _d1) ) \
+ { \
+ r = mcas(4, \
+ (void **)&n->v, v, NULL, \
+ (void **)&n->l, nl, GARBAGE(nl), \
+ (void **)&n->r, nr, GARBAGE(nr), \
+ (void **)p_pc, n, n ## _d1); \
+ } \
+ else \
+ { \
+ if ( al == n ) goto retry; \
+ r = mcas(5, \
+ (void **)&n->v, v, NULL, \
+ (void **)&n->l, nl, GARBAGE(nl), \
+ (void **)&n->r, nr, GARBAGE(nr), \
+ (void **)p_pc, n, n ## _d1, \
+ (void **)&al->_d2, THREAD(n), n ## _d2); \
+ } \
+ } \
+ else if ( IS_THREAD(n ## _d1) ) \
+ { \
+ if ( ar == n ) goto retry; \
+ r = mcas(5, \
+ (void **)&n->v, v, NULL, \
+ (void **)&n->l, nl, GARBAGE(nl), \
+ (void **)&n->r, nr, GARBAGE(nr), \
+ (void **)p_pc, n, n ## _d2, \
+ (void **)&ar->_d1, THREAD(n), n ## _d1); \
+ } \
+ else if ( dir ) \
+ { \
+ if ( (al == n) || (ar == n) ) goto retry; \
+ if ( par == n ) \
+ { \
+ r = mcas(6, \
+ (void **)&n->v, v, NULL, \
+ (void **)&ar->_d1, THREAD(n), n ## _d1, \
+ (void **)&al->_d2, THREAD(n), THREAD(ar), \
+ (void **)&n->l, nl, GARBAGE(nl), \
+ (void **)&n->r, nr, GARBAGE(nr), \
+ (void **)p_pc, n, ar); \
+ } \
+ else \
+ { \
+ STRONG_READ_LINK(ac, ar->_d2); \
+ r = mcas(8, \
+ (void **)&n->v, v, NULL, \
+ (void **)&par->_d1, ar, \
+ (IS_THREAD(ac) ? THREAD(ar) : ac), \
+ (void **)&ar->_d2, ac, n ## _d2, \
+ (void **)&ar->_d1, THREAD(n), n ## _d1, \
+ (void **)&al->_d2, THREAD(n), THREAD(ar), \
+ (void **)&n->l, nl, GARBAGE(nl), \
+ (void **)&n->r, nr, GARBAGE(nr), \
+ (void **)p_pc, n, ar); \
+ } \
+ } \
+ else \
+ { \
+ if ( (al == n) || (ar == n) ) goto retry; \
+ if ( pal == n ) \
+ { \
+ r = mcas(6, \
+ (void **)&n->v, v, NULL, \
+ (void **)&al->_d2, THREAD(n), n ## _d2, \
+ (void **)&ar->_d1, THREAD(n), THREAD(al), \
+ (void **)&n->l, nl, GARBAGE(nl), \
+ (void **)&n->r, nr, GARBAGE(nr), \
+ (void **)p_pc, n, al); \
+ } \
+ else \
+ { \
+ STRONG_READ_LINK(ac, al->_d1); \
+ r = mcas(8, \
+ (void **)&n->v, v, NULL, \
+ (void **)&pal->_d2, al, \
+ (IS_THREAD(ac) ? THREAD(al) : ac), \
+ (void **)&al->_d1, ac, n ## _d1, \
+ (void **)&al->_d2, THREAD(n), n ## _d2, \
+ (void **)&ar->_d1, THREAD(n), THREAD(al), \
+ (void **)&n->l, nl, GARBAGE(nl), \
+ (void **)&n->r, nr, GARBAGE(nr), \
+ (void **)p_pc, n, al); \
+ } \
+ }
+
+
+/* @k: key of node to be deleted */
+setval_t set_remove(set_t *s, setkey_t k)
+{
+ node_t *p, *n, *nl, *nr, *al, *ar, *pal, *par, *ac, **p_pc;
+ int r = 0;
+ setval_t v;
+ ptst_t *ptst;
+
+ k = CALLER_TO_INTERNAL_KEY(k);
+
+ ptst = critical_enter();
+
+ do
+ {
+ retry:
+ v = NULL;
+
+ /* Node present? */
+ n = search(s, k, &p);
+ if ( IS_THREAD(n) ) goto out;
+
+ /* Already deleted? */
+ v = n->v;
+ PROCESS_VAL(v, &n->v);
+ if ( v == NULL ) goto out;
+
+ STRONG_READ_LINK(nl, n->l);
+ STRONG_READ_LINK(nr, n->r);
+ p_pc = (p->k > n->k) ? &p->l : &p->r;
+
+ if ( p->k > n->k )
+ {
+ /* @n is leftwards link from @p. */
+ DELETE_HELPER(l, r);
+ }
+ else
+ {
+ /* @n is rightwards link from @p. */
+ DELETE_HELPER(r, l);
+ }
+ } while ( !r );
+
+ gc_free(ptst, n, gc_id);
+
+ out:
+ critical_exit(ptst);
+ return v;
+}
+
+
+setval_t set_lookup(set_t *s, setkey_t k)
+{
+ node_t *n;
+ setval_t v;
+ ptst_t *ptst;
+
+ k = CALLER_TO_INTERNAL_KEY(k);
+
+ ptst = critical_enter();
+
+ n = search(s, k, NULL);
+ v = (!IS_THREAD(n)) ? n->v : NULL;
+ PROCESS_VAL(v, &n->v);
+
+ critical_exit(ptst);
+ return v;
+}
+
+
+void _init_set_subsystem(void)
+{
+ gc_id = gc_add_allocator(sizeof(node_t));
+}
--- /dev/null
+/******************************************************************************
+ * gc.c
+ *
+ * A fully recycling epoch-based garbage collector. Works by counting
+ * threads in and out of critical regions, to work out when
+ * garbage queues can be fully deleted.
+ *
+ * Copyright (c) 2001-2003, K A Fraser
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution. Neither the name of the Keir Fraser
+ * nor the names of its contributors may be used to endorse or
+ * promote products derived from this software without specific
+ * prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include "portable_defns.h"
+#include "gc.h"
+
+/*#define MINIMAL_GC*/
+/*#define YIELD_TO_HELP_PROGRESS*/
+#define PROFILE_GC
+
+/* Recycled nodes are filled with this value if WEAK_MEM_ORDER. */
+#define INVALID_BYTE 0
+#define INITIALISE_NODES(_p,_c) memset((_p), INVALID_BYTE, (_c));
+
+/* Number of unique block sizes we can deal with. */
+#define MAX_SIZES 20
+
+#define MAX_HOOKS 4
+
+/*
+ * The initial number of allocation chunks for each per-blocksize list.
+ * Popular allocation lists will steadily increase the allocation unit
+ * in line with demand.
+ */
+#define ALLOC_CHUNKS_PER_LIST 10
+
+/*
+ * How many times should a thread call gc_enter(), seeing the same epoch
+ * each time, before it makes a reclaim attempt?
+ */
+#define ENTRIES_PER_RECLAIM_ATTEMPT 100
+
+/*
+ * 0: current epoch -- threads are moving to this;
+ * -1: some threads may still throw garbage into this epoch;
+ * -2: no threads can see this epoch => we can zero garbage lists;
+ * -3: all threads see zeros in these garbage lists => move to alloc lists.
+ */
+#ifdef WEAK_MEM_ORDER
+#define NR_EPOCHS 4
+#else
+#define NR_EPOCHS 3
+#endif
+
+/*
+ * A chunk amortises the cost of allocation from shared lists. It also
+ * helps when zeroing nodes, as it increases per-cacheline pointer density
+ * and means that node locations don't need to be brought into the cache
+ * (most architectures have a non-temporal store instruction).
+ */
+#define BLKS_PER_CHUNK 100
+typedef struct chunk_st chunk_t;
+struct chunk_st
+{
+ chunk_t *next; /* chunk chaining */
+ unsigned int i; /* the next entry in blk[] to use */
+ void *blk[BLKS_PER_CHUNK];
+};
+
+static struct gc_global_st
+{
+ CACHE_PAD(0);
+
+ /* The current epoch. */
+ VOLATILE unsigned int current;
+ CACHE_PAD(1);
+
+ /* Exclusive access to gc_reclaim(). */
+ VOLATILE unsigned int inreclaim;
+ CACHE_PAD(2);
+
+ /*
+ * RUN-TIME CONSTANTS (to first approximation)
+ */
+
+ /* Memory page size, in bytes. */
+ unsigned int page_size;
+
+ /* Node sizes (run-time constants). */
+ int nr_sizes;
+ int blk_sizes[MAX_SIZES];
+
+ /* Registered epoch hooks. */
+ int nr_hooks;
+ hook_fn_t hook_fns[MAX_HOOKS];
+ CACHE_PAD(3);
+
+ /*
+ * DATA WE MAY HIT HARD
+ */
+
+ /* Chain of free, empty chunks. */
+ chunk_t * VOLATILE free_chunks;
+
+ /* Main allocation lists. */
+ chunk_t * VOLATILE alloc[MAX_SIZES];
+ VOLATILE unsigned int alloc_size[MAX_SIZES];
+#ifdef PROFILE_GC
+ VOLATILE unsigned int total_size;
+ VOLATILE unsigned int allocations;
+#endif
+} gc_global;
+
+
+/* Per-thread state. */
+struct gc_st
+{
+ /* Epoch that this thread sees. */
+ unsigned int epoch;
+
+ /* Number of calls to gc_entry() since last gc_reclaim() attempt. */
+ unsigned int entries_since_reclaim;
+
+#ifdef YIELD_TO_HELP_PROGRESS
+ /* Number of calls to gc_reclaim() since we last yielded. */
+ unsigned int reclaim_attempts_since_yield;
+#endif
+
+ /* Used by gc_async_barrier(). */
+ void *async_page;
+ int async_page_state;
+
+ /* Garbage lists. */
+ chunk_t *garbage[NR_EPOCHS][MAX_SIZES];
+ chunk_t *garbage_tail[NR_EPOCHS][MAX_SIZES];
+ chunk_t *chunk_cache;
+
+ /* Local allocation lists. */
+ chunk_t *alloc[MAX_SIZES];
+ unsigned int alloc_chunks[MAX_SIZES];
+
+ /* Hook pointer lists. */
+ chunk_t *hook[NR_EPOCHS][MAX_HOOKS];
+};
+
+
+#define MEM_FAIL(_s) \
+do { \
+ fprintf(stderr, "OUT OF MEMORY: %d bytes at line %d\n", (_s), __LINE__); \
+ exit(1); \
+} while ( 0 )
+
+
+/* Allocate more empty chunks from the heap. */
+#define CHUNKS_PER_ALLOC 1000
+static chunk_t *alloc_more_chunks(void)
+{
+ int i;
+ chunk_t *h, *p;
+
+ h = p = ALIGNED_ALLOC(CHUNKS_PER_ALLOC * sizeof(*h));
+ if ( h == NULL ) MEM_FAIL(CHUNKS_PER_ALLOC * sizeof(*h));
+
+ for ( i = 1; i < CHUNKS_PER_ALLOC; i++ )
+ {
+ p->next = p + 1;
+ p++;
+ }
+
+ p->next = h;
+
+ return(h);
+}
+
+
+/* Put a chain of chunks onto a list. */
+static void add_chunks_to_list(chunk_t *ch, chunk_t *head)
+{
+ chunk_t *h_next, *new_h_next, *ch_next;
+ ch_next = ch->next;
+ new_h_next = head->next;
+ do { ch->next = h_next = new_h_next; WMB_NEAR_CAS(); }
+ while ( (new_h_next = CASPO(&head->next, h_next, ch_next)) != h_next );
+}
+
+
+/* Allocate a chain of @n empty chunks. Pointers may be garbage. */
+static chunk_t *get_empty_chunks(int n)
+{
+ int i;
+ chunk_t *new_rh, *rh, *rt, *head;
+
+ retry:
+ head = gc_global.free_chunks;
+ new_rh = head->next;
+ do {
+ rh = new_rh;
+ rt = head;
+ WEAK_DEP_ORDER_RMB();
+ for ( i = 0; i < n; i++ )
+ {
+ if ( (rt = rt->next) == head )
+ {
+ /* Allocate some more chunks. */
+ add_chunks_to_list(alloc_more_chunks(), head);
+ goto retry;
+ }
+ }
+ }
+ while ( (new_rh = CASPO(&head->next, rh, rt->next)) != rh );
+
+ rt->next = rh;
+ return(rh);
+}
+
+
+/* Get @n filled chunks, pointing at blocks of @sz bytes each. */
+static chunk_t *get_filled_chunks(int n, int sz)
+{
+ chunk_t *h, *p;
+ char *node;
+ int i;
+
+#ifdef PROFILE_GC
+ ADD_TO(gc_global.total_size, n * BLKS_PER_CHUNK * sz);
+ ADD_TO(gc_global.allocations, 1);
+#endif
+
+ node = ALIGNED_ALLOC(n * BLKS_PER_CHUNK * sz);
+ if ( node == NULL ) MEM_FAIL(n * BLKS_PER_CHUNK * sz);
+#ifdef WEAK_MEM_ORDER
+ INITIALISE_NODES(node, n * BLKS_PER_CHUNK * sz);
+#endif
+
+ h = p = get_empty_chunks(n);
+ do {
+ p->i = BLKS_PER_CHUNK;
+ for ( i = 0; i < BLKS_PER_CHUNK; i++ )
+ {
+ p->blk[i] = node;
+ node += sz;
+ }
+ }
+ while ( (p = p->next) != h );
+
+ return(h);
+}
+
+
+/*
+ * gc_async_barrier: Cause an asynchronous barrier in all other threads. We do
+ * this by causing a TLB shootdown to be propagated to all other processors.
+ * Each time such an action is required, this function calls:
+ * mprotect(async_page, <page size>, <new flags>)
+ * Each thread's state contains a memory page dedicated for this purpose.
+ */
+#ifdef WEAK_MEM_ORDER
+static void gc_async_barrier(gc_t *gc)
+{
+ mprotect(gc->async_page, gc_global.page_size,
+ gc->async_page_state ? PROT_READ : PROT_NONE);
+ gc->async_page_state = !gc->async_page_state;
+}
+#else
+#define gc_async_barrier(_g) ((void)0)
+#endif
+
+
+/* Grab a level @i allocation chunk from main chain. */
+static chunk_t *get_alloc_chunk(gc_t *gc, int i)
+{
+ chunk_t *alloc, *p, *new_p, *nh;
+ unsigned int sz;
+
+ alloc = gc_global.alloc[i];
+ new_p = alloc->next;
+
+ do {
+ p = new_p;
+ while ( p == alloc )
+ {
+ sz = gc_global.alloc_size[i];
+ nh = get_filled_chunks(sz, gc_global.blk_sizes[i]);
+ ADD_TO(gc_global.alloc_size[i], sz >> 3);
+ gc_async_barrier(gc);
+ add_chunks_to_list(nh, alloc);
+ p = alloc->next;
+ }
+ WEAK_DEP_ORDER_RMB();
+ }
+ while ( (new_p = CASPO(&alloc->next, p, p->next)) != p );
+
+ p->next = p;
+ assert(p->i == BLKS_PER_CHUNK);
+ return(p);
+}
+
+
+#ifndef MINIMAL_GC
+/*
+ * gc_reclaim: Scans the list of struct gc_perthread looking for the lowest
+ * maximum epoch number seen by a thread that's in the list code. If it's the
+ * current epoch, the "nearly-free" lists from the previous epoch are
+ * reclaimed, and the epoch is incremented.
+ */
+static void gc_reclaim(void)
+{
+ ptst_t *ptst, *first_ptst, *our_ptst = NULL;
+ gc_t *gc = NULL;
+ unsigned long curr_epoch;
+ chunk_t *ch, *t;
+ int two_ago, three_ago, i, j;
+
+ /* Barrier to entering the reclaim critical section. */
+ if ( gc_global.inreclaim || CASIO(&gc_global.inreclaim, 0, 1) ) return;
+
+ /*
+ * Grab first ptst structure *before* barrier -- prevent bugs
+ * on weak-ordered architectures.
+ */
+ first_ptst = ptst_first();
+ MB();
+ curr_epoch = gc_global.current;
+
+ /* Have all threads seen the current epoch, or not in mutator code? */
+ for ( ptst = first_ptst; ptst != NULL; ptst = ptst_next(ptst) )
+ {
+ if ( (ptst->count > 1) && (ptst->gc->epoch != curr_epoch) ) goto out;
+ }
+
+ /*
+ * Three-epoch-old garbage lists move to allocation lists.
+ * Two-epoch-old garbage lists are cleaned out.
+ */
+ two_ago = (curr_epoch+2) % NR_EPOCHS;
+ three_ago = (curr_epoch+1) % NR_EPOCHS;
+ if ( gc_global.nr_hooks != 0 )
+ our_ptst = (ptst_t *)pthread_getspecific(ptst_key);
+ for ( ptst = first_ptst; ptst != NULL; ptst = ptst_next(ptst) )
+ {
+ gc = ptst->gc;
+
+ for ( i = 0; i < gc_global.nr_sizes; i++ )
+ {
+#ifdef WEAK_MEM_ORDER
+ int sz = gc_global.blk_sizes[i];
+ if ( gc->garbage[two_ago][i] != NULL )
+ {
+ chunk_t *head = gc->garbage[two_ago][i];
+ ch = head;
+ do {
+ int j;
+ for ( j = 0; j < ch->i; j++ )
+ INITIALISE_NODES(ch->blk[j], sz);
+ }
+ while ( (ch = ch->next) != head );
+ }
+#endif
+
+ /* NB. Leave one chunk behind, as it is probably not yet full. */
+ t = gc->garbage[three_ago][i];
+ if ( (t == NULL) || ((ch = t->next) == t) ) continue;
+ gc->garbage_tail[three_ago][i]->next = ch;
+ gc->garbage_tail[three_ago][i] = t;
+ t->next = t;
+ add_chunks_to_list(ch, gc_global.alloc[i]);
+ }
+
+ for ( i = 0; i < gc_global.nr_hooks; i++ )
+ {
+ hook_fn_t fn = gc_global.hook_fns[i];
+ ch = gc->hook[three_ago][i];
+ if ( ch == NULL ) continue;
+ gc->hook[three_ago][i] = NULL;
+
+ t = ch;
+ do { for ( j = 0; j < t->i; j++ ) fn(our_ptst, t->blk[j]); }
+ while ( (t = t->next) != ch );
+
+ add_chunks_to_list(ch, gc_global.free_chunks);
+ }
+ }
+
+ /* Update current epoch. */
+ WMB();
+ gc_global.current = (curr_epoch+1) % NR_EPOCHS;
+
+ out:
+ gc_global.inreclaim = 0;
+}
+#endif /* MINIMAL_GC */
+
+
+void *gc_alloc(ptst_t *ptst, int alloc_id)
+{
+ gc_t *gc = ptst->gc;
+ chunk_t *ch;
+
+ ch = gc->alloc[alloc_id];
+ if ( ch->i == 0 )
+ {
+ if ( gc->alloc_chunks[alloc_id]++ == 100 )
+ {
+ gc->alloc_chunks[alloc_id] = 0;
+ add_chunks_to_list(ch, gc_global.free_chunks);
+ gc->alloc[alloc_id] = ch = get_alloc_chunk(gc, alloc_id);
+ }
+ else
+ {
+ chunk_t *och = ch;
+ ch = get_alloc_chunk(gc, alloc_id);
+ ch->next = och->next;
+ och->next = ch;
+ gc->alloc[alloc_id] = ch;
+ }
+ }
+
+ return ch->blk[--ch->i];
+}
+
+
+static chunk_t *chunk_from_cache(gc_t *gc)
+{
+ chunk_t *ch = gc->chunk_cache, *p = ch->next;
+
+ if ( ch == p )
+ {
+ gc->chunk_cache = get_empty_chunks(100);
+ }
+ else
+ {
+ ch->next = p->next;
+ p->next = p;
+ }
+
+ p->i = 0;
+ return(p);
+}
+
+
+void gc_free(ptst_t *ptst, void *p, int alloc_id)
+{
+#ifndef MINIMAL_GC
+ gc_t *gc = ptst->gc;
+ chunk_t *prev, *new, *ch = gc->garbage[gc->epoch][alloc_id];
+
+ if ( ch == NULL )
+ {
+ gc->garbage[gc->epoch][alloc_id] = ch = chunk_from_cache(gc);
+ gc->garbage_tail[gc->epoch][alloc_id] = ch;
+ }
+ else if ( ch->i == BLKS_PER_CHUNK )
+ {
+ prev = gc->garbage_tail[gc->epoch][alloc_id];
+ new = chunk_from_cache(gc);
+ gc->garbage[gc->epoch][alloc_id] = new;
+ new->next = ch;
+ prev->next = new;
+ ch = new;
+ }
+
+ ch->blk[ch->i++] = p;
+#endif
+}
+
+
+void gc_add_ptr_to_hook_list(ptst_t *ptst, void *ptr, int hook_id)
+{
+ gc_t *gc = ptst->gc;
+ chunk_t *och, *ch = gc->hook[gc->epoch][hook_id];
+
+ if ( ch == NULL )
+ {
+ gc->hook[gc->epoch][hook_id] = ch = chunk_from_cache(gc);
+ }
+ else
+ {
+ ch = ch->next;
+ if ( ch->i == BLKS_PER_CHUNK )
+ {
+ och = gc->hook[gc->epoch][hook_id];
+ ch = chunk_from_cache(gc);
+ ch->next = och->next;
+ och->next = ch;
+ }
+ }
+
+ ch->blk[ch->i++] = ptr;
+}
+
+
+void gc_unsafe_free(ptst_t *ptst, void *p, int alloc_id)
+{
+ gc_t *gc = ptst->gc;
+ chunk_t *ch;
+
+ ch = gc->alloc[alloc_id];
+ if ( ch->i < BLKS_PER_CHUNK )
+ {
+ ch->blk[ch->i++] = p;
+ }
+ else
+ {
+ gc_free(ptst, p, alloc_id);
+ }
+}
+
+
+void gc_enter(ptst_t *ptst)
+{
+#ifdef MINIMAL_GC
+ ptst->count++;
+ MB();
+#else
+ gc_t *gc = ptst->gc;
+ int new_epoch, cnt;
+
+ retry:
+ cnt = ptst->count++;
+ MB();
+ if ( cnt == 1 )
+ {
+ new_epoch = gc_global.current;
+ if ( gc->epoch != new_epoch )
+ {
+ gc->epoch = new_epoch;
+ gc->entries_since_reclaim = 0;
+#ifdef YIELD_TO_HELP_PROGRESS
+ gc->reclaim_attempts_since_yield = 0;
+#endif
+ }
+ else if ( gc->entries_since_reclaim++ == 100 )
+ {
+ ptst->count--;
+#ifdef YIELD_TO_HELP_PROGRESS
+ if ( gc->reclaim_attempts_since_yield++ == 10000 )
+ {
+ gc->reclaim_attempts_since_yield = 0;
+ sched_yield();
+ }
+#endif
+ gc->entries_since_reclaim = 0;
+ gc_reclaim();
+ goto retry;
+ }
+ }
+#endif
+}
+
+
+void gc_exit(ptst_t *ptst)
+{
+ MB();
+ ptst->count--;
+}
+
+
+gc_t *gc_init(void)
+{
+ gc_t *gc;
+ int i;
+
+ gc = ALIGNED_ALLOC(sizeof(*gc));
+ if ( gc == NULL ) MEM_FAIL(sizeof(*gc));
+ memset(gc, 0, sizeof(*gc));
+
+#ifdef WEAK_MEM_ORDER
+ /* Initialise shootdown state. */
+ gc->async_page = mmap(NULL, gc_global.page_size, PROT_NONE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if ( gc->async_page == (void *)MAP_FAILED ) MEM_FAIL(gc_global.page_size);
+ gc->async_page_state = 1;
+#endif
+
+ gc->chunk_cache = get_empty_chunks(100);
+
+ /* Get ourselves a set of allocation chunks. */
+ for ( i = 0; i < gc_global.nr_sizes; i++ )
+ {
+ gc->alloc[i] = get_alloc_chunk(gc, i);
+ }
+ for ( ; i < MAX_SIZES; i++ )
+ {
+ gc->alloc[i] = chunk_from_cache(gc);
+ }
+
+ return(gc);
+}
+
+
+int gc_add_allocator(int alloc_size)
+{
+ int ni, i = gc_global.nr_sizes;
+ while ( (ni = CASIO(&gc_global.nr_sizes, i, i+1)) != i ) i = ni;
+ gc_global.blk_sizes[i] = alloc_size;
+ gc_global.alloc_size[i] = ALLOC_CHUNKS_PER_LIST;
+ gc_global.alloc[i] = get_filled_chunks(ALLOC_CHUNKS_PER_LIST, alloc_size);
+ return i;
+}
+
+
+void gc_remove_allocator(int alloc_id)
+{
+ /* This is a no-op for now. */
+}
+
+
+int gc_add_hook(hook_fn_t fn)
+{
+ int ni, i = gc_global.nr_hooks;
+ while ( (ni = CASIO(&gc_global.nr_hooks, i, i+1)) != i ) i = ni;
+ gc_global.hook_fns[i] = fn;
+ return i;
+}
+
+
+void gc_remove_hook(int hook_id)
+{
+ /* This is a no-op for now. */
+}
+
+
+void _destroy_gc_subsystem(void)
+{
+#ifdef PROFILE_GC
+ printf("Total heap: %u bytes (%.2fMB) in %u allocations\n",
+ gc_global.total_size, (double)gc_global.total_size / 1000000,
+ gc_global.allocations);
+#endif
+}
+
+
+void _init_gc_subsystem(void)
+{
+ memset(&gc_global, 0, sizeof(gc_global));
+
+ gc_global.page_size = (unsigned int)sysconf(_SC_PAGESIZE);
+ gc_global.free_chunks = alloc_more_chunks();
+
+ gc_global.nr_hooks = 0;
+ gc_global.nr_sizes = 0;
+}
--- /dev/null
+#ifndef __GC_H__
+#define __GC_H__
+
+typedef struct gc_st gc_t;
+
+/* Most of these functions peek into a per-thread state struct. */
+#include "ptst.h"
+
+/* Initialise GC section of given per-thread state structure. */
+gc_t *gc_init(void);
+
+int gc_add_allocator(int alloc_size);
+void gc_remove_allocator(int alloc_id);
+
+/*
+ * Memory allocate/free. An unsafe free can be used when an object was
+ * not made visible to other processes.
+ */
+void *gc_alloc(ptst_t *ptst, int alloc_id);
+void gc_free(ptst_t *ptst, void *p, int alloc_id);
+void gc_unsafe_free(ptst_t *ptst, void *p, int alloc_id);
+
+/*
+ * Hook registry. Allows users to hook in their own per-epoch delay
+ * lists.
+ */
+typedef void (*hook_fn_t)(ptst_t *, void *);
+int gc_add_hook(hook_fn_t fn);
+void gc_remove_hook(int hook_id);
+void gc_add_ptr_to_hook_list(ptst_t *ptst, void *ptr, int hook_id);
+
+/* Per-thread entry/exit from critical regions */
+void gc_enter(ptst_t *ptst);
+void gc_exit(ptst_t *ptst);
+
+/* Start-of-day initialisation of garbage collector. */
+void _init_gc_subsystem(void);
+void _destroy_gc_subsystem(void);
+
+#endif /* __GC_H__ */
--- /dev/null
+#ifndef __IA64_DEFNS_H__
+#define __IA64_DEFNS_H__
+
+#include <pthread.h>
+#include <sched.h>
+
+#ifndef IA64
+#define IA64
+#endif
+
+#define CACHE_LINE_SIZE 64
+
+/*
+ * I. Compare-and-swap.
+ */
+
+#define CAS32(_a, _o, _n) \
+({ __typeof__(_o) __o = _o; \
+ __asm__ __volatile__("mov ar.ccv=%0 ;;" :: "rO" (_o)); \
+ __asm__ __volatile__("cmpxchg4.acq %0=%1,%2,ar.ccv ;; " \
+ : "=r" (__o), "=m" (*(_a)) \
+ : "r"(_n)); \
+ __o; \
+})
+
+#define CAS64(_a, _o, _n) \
+({ __typeof__(_o) __o = _o; \
+ __asm__ __volatile__("mov ar.ccv=%0 ;;" :: "rO" (_o)); \
+ __asm__ __volatile__("cmpxchg8.acq %0=%1,%2,ar.ccv ;; " \
+ : "=r" (__o), "=m" (*(_a)) \
+ : "r"(_n)); \
+ __o; \
+})
+
+#define FAS32(_a, _n) \
+({ __typeof__(_n) __o; \
+ __asm__ __volatile__("xchg4 %0=%1,%2 ;; " \
+ : "=r" (__o), "=m" (*(_a)) \
+ : "r"(_n)); \
+ __o; \
+})
+
+#define FAS64(_a, _n) \
+({ __typeof__(_n) __o; \
+ __asm__ __volatile__("xchg8 %0=%1,%2 ;; " \
+ : "=r" (__o), "=m" (*(_a)) \
+ : "r"(_n)); \
+ __o; \
+})
+
+#define CAS(_x,_o,_n) ((sizeof (*_x) == 4)?CAS32(_x,_o,_n):CAS64(_x,_o,_n))
+#define FAS(_x,_n) ((sizeof (*_x) == 4)?FAS32(_x,_n) :FAS64(_x,_n))
+
+/* Update Integer location, return Old value. */
+#define CASIO CAS
+#define FASIO FAS
+/* Update Pointer location, return Old value. */
+#define CASPO CAS64
+#define FASPO FAS64
+/* Update 32/64-bit location, return Old value. */
+#define CAS32O CAS32
+#define CAS64O CAS64
+
+
+/*
+ * II. Memory barriers.
+ * WMB(): All preceding write operations must commit before any later writes.
+ * RMB(): All preceding read operations must commit before any later reads.
+ * MB(): All preceding memory accesses must commit before any later accesses.
+ *
+ * If the compiler does not observe these barriers (but any sane compiler
+ * will!), then VOLATILE should be defined as 'volatile'.
+ */
+
+#define MB() __asm__ __volatile__ (";; mf ;; " : : : "memory")
+#define WMB() MB()
+#define RMB() MB()
+#define VOLATILE /*volatile*/
+
+/*
+ * III. Cycle counter access.
+ */
+
+typedef unsigned long long tick_t;
+#define RDTICK() \
+ ({ tick_t __t; __asm__ __volatile__ ("mov %0=ar.itc ;;" : "=rO" (__t)); __t; })
+
+
+
+/*
+ * IV. Types.
+ */
+
+typedef unsigned char _u8;
+typedef unsigned short _u16;
+typedef unsigned int _u32;
+typedef unsigned long long _u64;
+
+#endif /* __IA64_DEFNS_H__ */
--- /dev/null
+#ifndef __INTEL_DEFNS_H__
+#define __INTEL_DEFNS_H__
+
+#include <pthread.h>
+#include <sched.h>
+
+#ifndef INTEL
+#define INTEL
+#endif
+
+#define CACHE_LINE_SIZE 64
+
+#if 0
+#define pthread_mutex_init(_m,_i) \
+({ pthread_mutex_init(_m,_i); (_m)->__m_kind = PTHREAD_MUTEX_ADAPTIVE_NP; })
+#endif
+
+
+/*
+ * I. Compare-and-swap.
+ */
+
+/*
+ * This is a strong barrier! Reads cannot be delayed beyond a later store.
+ * Reads cannot be hoisted beyond a LOCK prefix. Stores always in-order.
+ */
+#define CAS(_a, _o, _n) \
+({ __typeof__(_o) __o = _o; \
+ __asm__ __volatile__( \
+ "lock cmpxchg %3,%1" \
+ : "=a" (__o), "=m" (*(volatile unsigned int *)(_a)) \
+ : "0" (__o), "r" (_n) ); \
+ __o; \
+})
+
+#define FAS(_a, _n) \
+({ __typeof__(_n) __o; \
+ __asm__ __volatile__( \
+ "lock xchg %0,%1" \
+ : "=r" (__o), "=m" (*(volatile unsigned int *)(_a)) \
+ : "0" (_n) ); \
+ __o; \
+})
+
+#define CAS64(_a, _o, _n) \
+({ __typeof__(_o) __o = _o; \
+ __asm__ __volatile__( \
+ "movl %3, %%ecx;" \
+ "movl %4, %%ebx;" \
+ "lock cmpxchg8b %1" \
+ : "=A" (__o), "=m" (*(volatile unsigned long long *)(_a)) \
+ : "0" (__o), "m" (_n >> 32), "m" (_n) \
+ : "ebx", "ecx" ); \
+ __o; \
+})
+
+/* Update Integer location, return Old value. */
+#define CASIO CAS
+#define FASIO FAS
+/* Update Pointer location, return Old value. */
+#define CASPO CAS
+#define FASPO FAS
+/* Update 32/64-bit location, return Old value. */
+#define CAS32O CAS
+#define CAS64O CAS64
+
+/*
+ * II. Memory barriers.
+ * WMB(): All preceding write operations must commit before any later writes.
+ * RMB(): All preceding read operations must commit before any later reads.
+ * MB(): All preceding memory accesses must commit before any later accesses.
+ *
+ * If the compiler does not observe these barriers (but any sane compiler
+ * will!), then VOLATILE should be defined as 'volatile'.
+ */
+
+#define MB() __asm__ __volatile__ ("lock; addl $0,0(%%esp)" : : : "memory")
+#define WMB() __asm__ __volatile__ ("" : : : "memory")
+#define RMB() MB()
+#define VOLATILE /*volatile*/
+
+/* On Intel, CAS is a strong barrier, but not a compile barrier. */
+#define RMB_NEAR_CAS() WMB()
+#define WMB_NEAR_CAS() WMB()
+#define MB_NEAR_CAS() WMB()
+
+
+/*
+ * III. Cycle counter access.
+ */
+
+typedef unsigned long long tick_t;
+#define RDTICK() \
+ ({ tick_t __t; __asm__ __volatile__ ("rdtsc" : "=A" (__t)); __t; })
+
+
+/*
+ * IV. Types.
+ */
+
+typedef unsigned char _u8;
+typedef unsigned short _u16;
+typedef unsigned int _u32;
+typedef unsigned long long _u64;
+
+#endif /* __INTEL_DEFNS_H__ */
--- /dev/null
+/******************************************************************************
+ * mcas.c
+ *
+ * MCAS implemented as described in:
+ * A Practical Multi-Word Compare-and-Swap Operation
+ * Timothy Harris, Keir Fraser and Ian Pratt
+ * Proceedings of the IEEE Symposium on Distributed Computing, Oct 2002
+ *
+ * Copyright (c) 2002-2003, K A Fraser
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution. Neither the name of the Keir Fraser
+ * nor the names of its contributors may be used to endorse or
+ * promote products derived from this software without specific
+ * prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/resource.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+
+typedef struct CasDescriptor CasDescriptor_t;
+typedef struct CasEntry CasEntry_t;
+typedef struct per_thread_state_t per_thread_state_t;
+
+extern int num_threads;
+
+#define ARENA_SIZE 40960
+
+struct per_thread_state_t
+{
+ int id;
+ CasDescriptor_t *next_descriptor;
+ void *arena;
+ void *arena_lim;
+};
+
+
+static pthread_key_t mcas_ptst_key;
+
+typedef struct pad128 { char pad[128]; } pad128_t;
+
+
+/* CAS descriptors. */
+
+#define STATUS_IN_PROGRESS 0
+#define STATUS_SUCCEEDED 1
+#define STATUS_FAILED 2
+#define STATUS_ABORTED 3
+
+struct CasEntry {
+ void **ptr;
+ void *old;
+ void *new;
+};
+
+struct CasDescriptor {
+ int status;
+ int length;
+ CasDescriptor_t *pt[MAX_THREADS];
+ int rc;
+ CasDescriptor_t *fc; /* free chain */
+ CasEntry_t entries[1];
+};
+
+/* Marked pointers. */
+typedef unsigned long ptr_int;
+#ifndef MARK_IN_PROGRESS
+#define MARK_IN_PROGRESS 1
+#endif
+#ifndef MARK_PTR_TO_CD
+#define MARK_PTR_TO_CD 2
+#endif
+
+#define get_markedness(p) (((ptr_int) (p)) & 3)
+#define get_unmarked_reference(p) ((void *) (((ptr_int) (p)) & (~3)))
+#define get_marked_reference(p,m) ((void *) (((ptr_int) (p)) | m))
+
+static bool_t mcas0 (per_thread_state_t *ptst, CasDescriptor_t *cd);
+static per_thread_state_t *get_ptst (void);
+
+pad128_t p0; /* I'm worried these important RO vars might be false shared */
+static int cas_sz;
+static int num_ptrs = 1024;
+static int ptr_mult = 1;
+pad128_t p1;
+
+static void *ALLOC(int size)
+{
+ void *a = calloc(1, size);
+ if ( a == NULL ) abort();
+ return a;
+}
+
+static void *ALLOC_ALONE (int size)
+{
+ int ps = sysconf(_SC_PAGESIZE);
+ int req = ps + size + ps;
+ char *res = ALLOC(req);
+ return (void *)(res + ps);
+}
+
+static int next_thread_id = 0;
+static per_thread_state_t *ptsts = NULL;
+
+static void new_arena (per_thread_state_t *ptst, int size)
+{
+ ptst->arena = ALLOC(size);
+ if ( !ptst->arena ) abort();
+ ptst->arena_lim = (((char *) ptst->arena) + size);
+}
+
+static per_thread_state_t *get_ptst (void)
+{
+ per_thread_state_t *result;
+ int r;
+
+ result = pthread_getspecific(mcas_ptst_key);
+
+ if ( result == NULL )
+ {
+ int my_id;
+ int largest = sysconf(_SC_PAGESIZE);
+
+ if ( largest < sizeof (per_thread_state_t) )
+ largest = sizeof (per_thread_state_t);
+
+ ALLOC (largest);
+ result = ALLOC (largest);
+ ALLOC (largest);
+
+ do { my_id = next_thread_id; }
+ while ( CASIO (&next_thread_id, my_id, my_id + 1) != my_id );
+
+ result->id = my_id;
+ ptsts = result;
+
+ new_arena(result, ARENA_SIZE);
+
+ r = pthread_setspecific(mcas_ptst_key, result);
+ assert(r == 0);
+ }
+
+ return result;
+}
+
+static void release_descriptor (CasDescriptor_t *cd)
+{
+ per_thread_state_t *ptst = get_ptst ();
+ cd->fc = ptst->next_descriptor;
+ ptst->next_descriptor = cd;
+}
+
+static int rc_delta_descriptor (CasDescriptor_t *cd,
+ int delta)
+{
+ int rc, new_rc = cd->rc;
+
+ do { rc = new_rc; }
+ while ( (new_rc = CASIO (&(cd->rc), rc, rc + delta)) != rc );
+
+ return rc;
+}
+
+static void rc_up_descriptor (CasDescriptor_t *cd)
+{
+ rc_delta_descriptor(cd, 2);
+ MB();
+}
+
+static void rc_down_descriptor (CasDescriptor_t *cd)
+{
+ int old_rc, new_rc, cur_rc = cd->rc;
+
+ do {
+ old_rc = cur_rc;
+ new_rc = old_rc - 2;
+ if ( new_rc == 0 ) new_rc = 1; else MB();
+ }
+ while ( (cur_rc = CASIO(&(cd->rc), old_rc, new_rc)) != old_rc );
+
+ if ( old_rc == 2 )
+ release_descriptor(cd);
+}
+
+static CasDescriptor_t *new_descriptor (per_thread_state_t *ptst, int length)
+{
+ CasDescriptor_t *result;
+ int i;
+
+ CasDescriptor_t **ptr = &(ptst->next_descriptor);
+ result = *ptr;
+ while ( (result != NULL) && (result->length != length) )
+ {
+ ptr = &(result->fc);
+ result = *ptr;
+ }
+
+ if ( result == NULL )
+ {
+ int alloc_size;
+
+ alloc_size = sizeof (CasDescriptor_t) +
+ ((length - 1) * sizeof (CasEntry_t));
+
+ result = (CasDescriptor_t *) ptst->arena;
+ ptst->arena = ((char *) (ptst->arena)) + alloc_size;
+
+ if ( ptst->arena >= ptst->arena_lim )
+ {
+ new_arena(ptst, ARENA_SIZE);
+ result = (CasDescriptor_t *) ptst->arena;
+ ptst->arena = ((char *) (ptst->arena)) + alloc_size;
+ }
+
+ for ( i = 0; i < num_threads; i++ )
+ result->pt[i] = result;
+
+ result->length = length;
+ result->rc = 2;
+ }
+ else
+ {
+ *ptr = result->fc;
+ assert((result->rc & 1) == 1);
+ rc_delta_descriptor(result, 1); /* clears lowest bit */
+ }
+
+ assert(result->length == length);
+
+ return result;
+}
+
+static void *read_from_cd (void **ptr, CasDescriptor_t *cd, bool_t get_old)
+{
+ CasEntry_t *ce;
+ int i;
+ int n;
+
+ n = cd->length;
+ for ( i = 0; i < n; i++ )
+ {
+ ce = &(cd->entries[i]);
+ if ( ce->ptr == ptr )
+ return get_old ? ce->old : ce->new;
+ }
+
+ assert(0);
+ return NULL;
+}
+
+static void *read_barrier_lite (void **ptr)
+{
+ CasDescriptor_t *cd;
+ void *v;
+ int m;
+
+ retry_read_barrier:
+ v = *ptr;
+ m = get_markedness(v);
+
+ if ( m == MARK_PTR_TO_CD )
+ {
+ WEAK_DEP_ORDER_RMB();
+ cd = get_unmarked_reference(v);
+
+ rc_up_descriptor(cd);
+ if ( *ptr != v )
+ {
+ rc_down_descriptor(cd);
+ goto retry_read_barrier;
+ }
+
+ v = read_from_cd(ptr, cd, (cd->status != STATUS_SUCCEEDED));
+
+ rc_down_descriptor(cd);
+ }
+ else if ( m == MARK_IN_PROGRESS )
+ {
+ WEAK_DEP_ORDER_RMB();
+ cd = *(CasDescriptor_t **)get_unmarked_reference(v);
+
+ rc_up_descriptor(cd);
+ if ( *ptr != v )
+ {
+ rc_down_descriptor(cd);
+ goto retry_read_barrier;
+ }
+
+ v = read_from_cd(ptr, cd, (cd->status != STATUS_SUCCEEDED));
+
+ rc_down_descriptor(cd);
+ }
+
+ return v;
+}
+
+static void clean_descriptor (CasDescriptor_t *cd)
+{
+ int i;
+ void *mcd;
+ int status;
+
+ status = cd->status;
+ assert(status == STATUS_SUCCEEDED || status == STATUS_FAILED);
+
+ mcd = get_marked_reference(cd, MARK_PTR_TO_CD);
+
+ if (status == STATUS_SUCCEEDED)
+ for ( i = 0; i < cd->length; i++ )
+ CASPO (cd->entries[i].ptr, mcd, cd->entries[i].new);
+ else
+ for ( i = 0; i < cd->length; i++ )
+ CASPO(cd->entries[i].ptr, mcd, cd->entries[i].old);
+}
+
+static bool_t mcas_fixup (void **ptr,
+ void *value_read)
+{
+ int m;
+
+ retry_mcas_fixup:
+ m = get_markedness(value_read);
+ if ( m == MARK_PTR_TO_CD )
+ {
+ CasDescriptor_t *helpee;
+ helpee = get_unmarked_reference(value_read);
+
+ rc_up_descriptor(helpee);
+ if ( *ptr != value_read )
+ {
+ rc_down_descriptor(helpee);
+ value_read = *ptr;
+ goto retry_mcas_fixup;
+ }
+
+ mcas0(NULL, helpee);
+
+ rc_down_descriptor(helpee);
+
+ return TRUE;
+ }
+ else if ( m == MARK_IN_PROGRESS )
+ {
+ CasDescriptor_t *other_cd;
+
+ WEAK_DEP_ORDER_RMB();
+ other_cd = *(CasDescriptor_t **)get_unmarked_reference(value_read);
+
+ rc_up_descriptor(other_cd);
+ if ( *ptr != value_read )
+ {
+ rc_down_descriptor(other_cd);
+ value_read = *ptr;
+ goto retry_mcas_fixup;
+ }
+
+ if ( other_cd->status == STATUS_IN_PROGRESS )
+ CASPO(ptr,
+ value_read,
+ get_marked_reference(other_cd, MARK_PTR_TO_CD));
+ else
+ CASPO(ptr,
+ value_read,
+ read_from_cd(ptr, other_cd, TRUE));
+
+ rc_down_descriptor (other_cd);
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+static void *read_barrier (void **ptr)
+{
+ void *v;
+
+ do { v = *ptr; }
+ while ( mcas_fixup(ptr, v) );
+
+ return v;
+}
+
+static bool_t mcas0 (per_thread_state_t *ptst, CasDescriptor_t *cd)
+{
+ int i;
+ int n;
+ int desired_status;
+ bool_t final_success;
+ void *mcd;
+ void *dmcd;
+ int old_status;
+
+ if ( ptst == NULL )
+ ptst = get_ptst();
+
+ MB(); /* required for sequential consistency */
+
+ if ( cd->status == STATUS_SUCCEEDED )
+ {
+ clean_descriptor(cd);
+ final_success = TRUE;
+ goto out;
+ }
+ else if ( cd->status == STATUS_FAILED )
+ {
+ clean_descriptor(cd);
+ final_success = FALSE;
+ goto out;
+ }
+
+ /* Attempt to link in all entries in the descriptor. */
+ mcd = get_marked_reference(cd, MARK_PTR_TO_CD);
+ dmcd = get_marked_reference(&(cd->pt[ptst->id]), MARK_IN_PROGRESS);
+
+ desired_status = STATUS_SUCCEEDED;
+
+ retry:
+ n = cd->length;
+ for (i = 0; i < n; i ++)
+ {
+ CasEntry_t *ce = &(cd->entries[i]);
+ void *value_read = CASPO(ce->ptr, ce->old, dmcd);
+
+ if ( (value_read != ce->old) &&
+ (value_read != dmcd) &&
+ (value_read != mcd) )
+ {
+ if ( mcas_fixup(ce->ptr, value_read) )
+ goto retry;
+ desired_status = STATUS_FAILED;
+ break;
+ }
+
+ RMB_NEAR_CAS(); /* ensure check of status occurs after CASPO. */
+ if ( cd->status != STATUS_IN_PROGRESS )
+ {
+ CASPO(ce->ptr, dmcd, ce->old);
+ break;
+ }
+
+ if ( value_read != mcd )
+ {
+ value_read = CASPO(ce->ptr, dmcd, mcd);
+ assert((value_read == dmcd) ||
+ (value_read == mcd) ||
+ (cd->status != STATUS_IN_PROGRESS));
+ }
+ }
+
+ /*
+ * All your ptrs are belong to us (or we've been helped and
+ * already known to have succeeded or failed). Try to
+ * propagate our desired result into the status field.
+ */
+
+ /*
+ * When changing to success, we must have all pointer ownerships
+ * globally visible. But we get this without a memory barrier, as
+ * 'desired_status' is dependent on the outcome of each CASPO
+ * to MARK_IN_PROGRESS.
+ *
+ * Architectures providing CAS natively all specify that the operation
+ * is _indivisible_. That is, the write will be done when the CAS
+ * completes.
+ *
+ * Architectures providing LL/SC are even better: any following
+ * instruction in program order is control-dependent on the CAS, because
+ * CAS may be retried if SC fails. All we need is that SC gets to point
+ * of coherency before producing its result: even Alpha provides this!
+ */
+ WEAK_DEP_ORDER_WMB();
+ old_status = CASIO((int *)&cd->status,
+ STATUS_IN_PROGRESS,
+ desired_status);
+ /*
+ * This ensures final sequential consistency.
+ * Also ensures that the status update is visible before cleanup.
+ */
+ WMB_NEAR_CAS();
+
+ clean_descriptor(cd);
+ final_success = (cd->status == STATUS_SUCCEEDED);
+
+ out:
+ return final_success;
+}
+
+
+void mcas_init (void)
+{
+ int r = pthread_key_create(&mcas_ptst_key, NULL);
+ if ( r != 0 ) abort();
+}
+
+/***********************************************************************/
+
+bool_t mcas (int n,
+ void **ptr, void *old, void *new,
+ ...)
+{
+ va_list ap;
+ int i;
+ CasDescriptor_t *cd;
+ CasEntry_t *ce;
+ int result = 0;
+ per_thread_state_t *ptst = get_ptst();
+
+ cd = new_descriptor(ptst, n);
+
+ cd->status = STATUS_IN_PROGRESS;
+ cd->length = n;
+
+ ce = cd->entries;
+ ce->ptr = ptr;
+ ce->old = old;
+ ce->new = new;
+
+ va_start(ap, new);
+ for ( i = 1; i < n; i++ )
+ {
+ ce ++;
+ ce->ptr = va_arg(ap, void **);
+ ce->old = va_arg(ap, void *);
+ ce->new = va_arg(ap, void *);
+ }
+ va_end (ap);
+
+ /* Insertion sort. Fail on non-unique pointers. */
+ for ( i = 1, ce = &cd->entries[1]; i < n; i++, ce++ )
+ {
+ int j;
+ CasEntry_t *cei, tmp;
+ for ( j = i-1, cei = ce-1; j >= 0; j--, cei-- )
+ if ( cei->ptr <= ce->ptr ) break;
+ if ( cei->ptr == ce->ptr ) goto out;
+ if ( ++cei != ce )
+ {
+ tmp = *ce;
+ memmove(cei+1, cei, (ce-cei)*sizeof(CasEntry_t));
+ *cei = tmp;
+ }
+ }
+
+ result = mcas0(ptst, cd);
+ assert(cd->status != STATUS_IN_PROGRESS);
+
+ out:
+ rc_down_descriptor (cd);
+ return result;
+}
+
--- /dev/null
+#ifndef __MIPS_DEFNS_H__
+#define __MIPS_DEFNS_H__
+
+#include <pthread.h>
+#include <sched.h>
+
+#ifndef MIPS
+#define MIPS
+#endif
+
+#define _SC_NPROCESSORS_ONLN _SC_NPROC_ONLN
+
+#define CACHE_LINE_SIZE 64
+
+
+/*
+ * I. Compare-and-swap.
+ */
+
+#define FAS32(_a, _n) \
+({ __typeof__(_n) __r; \
+ __asm__ __volatile__( \
+ "1: ll %0,%1 ;" \
+ " move $3,%2 ;" \
+ " sc $3,%1 ;" \
+ " beqz $3,1b ;" \
+ : "=&r" (__r), "=m" (*(_a)) \
+ : "r" (_n) : "$3" ); \
+ __r; \
+})
+
+#define FAS64(_a, _n) \
+({ __typeof__(_n) __r; \
+ __asm__ __volatile__( \
+ "1: lld %0,%1 ;" \
+ " move $3,%2 ;" \
+ " scd $3,%1 ;" \
+ " beqz $3,1b ;" \
+ : "=&r" (__r), "=m" (*(_a)) \
+ : "r" (_n) : "$3" ); \
+ __r; \
+})
+
+#define CAS32(_a, _o, _n) \
+({ __typeof__(_o) __r; \
+ __asm__ __volatile__( \
+ "1: ll %0,%1 ;" \
+ " bne %0,%2,2f ;" \
+ " move $3,%3 ;" \
+ " sc $3,%1 ;" \
+ " beqz $3,1b ;" \
+ "2: " \
+ : "=&r" (__r), "=m" (*(_a)) \
+ : "r" (_o), "r" (_n) : "$3" ); \
+ __r; \
+})
+
+#define CAS64(_a, _o, _n) \
+({ __typeof__(_o) __r; \
+ __asm__ __volatile__( \
+ "1: lld %0,%1 ;" \
+ " bne %0,%2,2f ;" \
+ " move $3,%3 ;" \
+ " scd $3,%1 ;" \
+ " beqz $3,1b ;" \
+ "2: " \
+ : "=&r" (__r), "=m" (*(_a)) \
+ : "r" (_o), "r" (_n) : "$3" ); \
+ __r; \
+})
+
+#define CAS(_x,_o,_n) ((sizeof (*_x) == 4)?CAS32(_x,_o,_n):CAS64(_x,_o,_n))
+#define FAS(_x,_n) ((sizeof (*_x) == 4)?FAS32(_x,_n) :FAS64(_x,_n))
+/* Update Integer location, return Old value. */
+#define CASIO(_x,_o,_n) CAS(_x,_o,_n)
+#define FASIO(_x,_n) FAS(_x,_n)
+/* Update Pointer location, return Old value. */
+#define CASPO(_x,_o,_n) (void*)CAS((_x),(void*)(_o),(void*)(_n))
+#define FASPO(_x,_n) (void*)FAS((_x),(void*)(_n))
+/* Update 32/64-bit location, return Old value. */
+#define CAS32O CAS32
+#define CAS64O CAS64
+
+/*
+ * II. Memory barriers.
+ * WMB(): All preceding write operations must commit before any later writes.
+ * RMB(): All preceding read operations must commit before any later reads.
+ * MB(): All preceding memory accesses must commit before any later accesses.
+ *
+ * If the compiler does not observe these barriers (but any sane compiler
+ * will!), then VOLATILE should be defined as 'volatile'.
+ */
+
+#define MB() __asm__ __volatile__ ("sync" : : : "memory")
+#define WMB() MB()
+#define RMB() MB()
+#define VOLATILE /*volatile*/
+
+
+/*
+ * III. Cycle counter access.
+ */
+
+typedef unsigned long long tick_t;
+#define RDTICK() \
+ ({ tick_t __t; __asm__ __volatile__ ("dmfc0 %0,$9" : "=r" (__t)); __t; })
+
+
+/*
+ * IV. Types.
+ */
+
+typedef unsigned char _u8;
+typedef unsigned short _u16;
+typedef unsigned int _u32;
+typedef unsigned long long _u64;
+
+#endif /* __INTEL_DEFNS_H__ */
--- /dev/null
+#ifndef __PORTABLE_DEFNS_H__
+#define __PORTABLE_DEFNS_H__
+
+#define MAX_THREADS 128 /* Nobody will ever have more! */
+
+#if defined(SPARC)
+#include "sparc_defns.h"
+#elif defined(INTEL)
+#include "intel_defns.h"
+#elif defined(PPC)
+#include "ppc_defns.h"
+#elif defined(IA64)
+#include "ia64_defns.h"
+#elif defined(MIPS)
+#include "mips_defns.h"
+#elif defined(ALPHA)
+#include "alpha_defns.h"
+#else
+#error "A valid architecture has not been defined"
+#endif
+
+#include <string.h>
+
+#ifndef MB_NEAR_CAS
+#define RMB_NEAR_CAS() RMB()
+#define WMB_NEAR_CAS() WMB()
+#define MB_NEAR_CAS() MB()
+#endif
+
+typedef unsigned long int_addr_t;
+
+typedef int bool_t;
+#define FALSE 0
+#define TRUE 1
+
+#define ADD_TO(_v,_x) \
+do { \
+ int __val = (_v), __newval; \
+ while ( (__newval = CASIO(&(_v),__val,__val+(_x))) != __val ) \
+ __val = __newval; \
+} while ( 0 )
+
+/*
+ * Allow us to efficiently align and pad structures so that shared fields
+ * don't cause contention on thread-local or read-only fields.
+ */
+#define CACHE_PAD(_n) char __pad ## _n [CACHE_LINE_SIZE]
+#define ALIGNED_ALLOC(_s) \
+ ((void *)(((unsigned long)malloc((_s)+CACHE_LINE_SIZE*2) + \
+ CACHE_LINE_SIZE - 1) & ~(CACHE_LINE_SIZE-1)))
+
+/*
+ * Interval counting
+ */
+
+typedef unsigned int interval_t;
+#define get_interval(_i) \
+do { \
+ interval_t _ni = interval; \
+ do { _i = _ni; } while ( (_ni = CASIO(&interval, _i, _i+1)) != _i ); \
+} while ( 0 )
+
+/*
+ * POINTER MARKING
+ */
+
+#define get_marked_ref(_p) ((void *)(((unsigned long)(_p)) | 1))
+#define get_unmarked_ref(_p) ((void *)(((unsigned long)(_p)) & ~1))
+#define is_marked_ref(_p) (((unsigned long)(_p)) & 1)
+
+
+/*
+ * SUPPORT FOR WEAK ORDERING OF MEMORY ACCESSES
+ */
+
+#ifdef WEAK_MEM_ORDER
+
+#define MAYBE_GARBAGE (0)
+
+/* Read field @_f into variable @_x. */
+#define READ_FIELD(_x,_f) \
+do { \
+ (_x) = (_f); \
+ if ( (_x) == MAYBE_GARBAGE ) { RMB(); (_x) = (_f); } \
+ while ( 0 )
+
+#define WEAK_DEP_ORDER_RMB() RMB()
+#define WEAK_DEP_ORDER_WMB() WMB()
+#define WEAK_DEP_ORDER_MB() MB()
+
+#else
+
+/* Read field @_f into variable @_x. */
+#define READ_FIELD(_x,_f) ((_x) = (_f))
+
+#define WEAK_DEP_ORDER_RMB() ((void)0)
+#define WEAK_DEP_ORDER_WMB() ((void)0)
+#define WEAK_DEP_ORDER_MB() ((void)0)
+
+#endif
+
+/*
+ * Strong LL/SC operations
+ */
+
+static _u32 strong_ll(_u64 *ptr, int p)
+{
+ _u64 val_read;
+ _u64 new_val;
+ _u64 flag;
+
+ flag = (1LL << p);
+
+ new_val = *ptr;
+ do {
+ val_read = new_val;
+ new_val = val_read | flag;
+ } while ( ((val_read & flag) == 0) &&
+ ((new_val = CAS64O(ptr, val_read, new_val)) != val_read) );
+
+ return (_u32) (val_read >> 32);
+}
+
+static int strong_vl(_u64 *ptr, int p)
+{
+ _u64 val_read;
+ _u64 flag;
+
+ flag = (1LL << p);
+ val_read = *ptr;
+
+ return (val_read & flag);
+}
+
+static int strong_sc(_u64 *ptr, int p, _u32 n)
+{
+ _u64 val_read;
+ _u64 new_val;
+ _u64 flag;
+
+ flag = (1LL << p);
+ val_read = *ptr;
+
+ while ( (val_read & flag) != 0 )
+ {
+ new_val = (((_u64)n) << 32);
+
+ if ( (new_val = CAS64O(ptr, val_read, new_val)) == val_read )
+ {
+ return 1;
+ }
+
+ val_read = new_val;
+ }
+
+ return 0;
+}
+
+static void s_store(_u64 *ptr, _u32 n)
+{
+ _u64 new_val;
+
+ new_val = (((_u64)n) << 32);
+ *ptr = new_val;
+}
+
+static _u32 s_load(_u64 *ptr)
+{
+ _u64 val_read;
+
+ val_read = *ptr;
+ return (val_read >> 32);
+}
+
+
+/*
+ * MCS lock
+ */
+
+typedef struct qnode_t qnode_t;
+
+struct qnode_t {
+ qnode_t *next;
+ int locked;
+};
+
+typedef struct {
+ qnode_t *tail;
+} mcs_lock_t;
+
+static void mcs_init(mcs_lock_t *lock)
+{
+ lock->tail = NULL;
+}
+
+static void mcs_lock(mcs_lock_t *lock, qnode_t *qn)
+{
+ qnode_t *pred;
+
+ qn->next = NULL;
+ qn->locked = 1;
+ WMB_NEAR_CAS();
+
+ pred = FASPO(&lock->tail, qn);
+ if ( pred != NULL )
+ {
+ pred->next = qn;
+ while ( qn->locked ) RMB();
+ }
+
+ MB();
+}
+
+static void mcs_unlock(mcs_lock_t *lock, qnode_t *qn)
+{
+ qnode_t *t = qn->next;
+
+ MB();
+
+ if ( t == NULL )
+ {
+ if ( CASPO(&lock->tail, qn, NULL) == qn ) return;
+ while ( (t = qn->next) == NULL ) RMB();
+ WEAK_DEP_ORDER_MB();
+ }
+
+ t->locked = 0;
+}
+
+
+/*
+ * MCS fair MRSW lock.
+ */
+
+typedef struct mrsw_qnode_st mrsw_qnode_t;
+
+struct mrsw_qnode_st {
+#define CLS_RD 0
+#define CLS_WR 1
+ int class;
+#define ST_NOSUCC 0
+#define ST_RDSUCC 1
+#define ST_WRSUCC 2
+#define ST_SUCCMASK 3
+#define ST_BLOCKED 4
+ int state;
+ mrsw_qnode_t *next;
+};
+
+typedef struct {
+ mrsw_qnode_t *tail;
+ mrsw_qnode_t *next_writer;
+ int reader_count;
+} mrsw_lock_t;
+
+
+#define CLEAR_BLOCKED(_qn) ADD_TO((_qn)->state, -ST_BLOCKED)
+
+static void mrsw_init(mrsw_lock_t *lock)
+{
+ memset(lock, 0, sizeof(*lock));
+}
+
+static void rd_lock(mrsw_lock_t *lock, mrsw_qnode_t *qn)
+{
+ mrsw_qnode_t *pred, *next;
+
+ qn->class = CLS_RD;
+ qn->next = NULL;
+ qn->state = ST_NOSUCC | ST_BLOCKED;
+
+ WMB_NEAR_CAS();
+
+ pred = FASPO(&lock->tail, qn);
+
+ if ( pred == NULL )
+ {
+ ADD_TO(lock->reader_count, 1);
+ CLEAR_BLOCKED(qn);
+ }
+ else
+ {
+ if ( (pred->class == CLS_WR) ||
+ (CASIO(&pred->state, ST_BLOCKED|ST_NOSUCC, ST_BLOCKED|ST_RDSUCC)
+ == (ST_BLOCKED|ST_NOSUCC)) )
+ {
+ WEAK_DEP_ORDER_WMB();
+ pred->next = qn;
+ while ( (qn->state & ST_BLOCKED) ) RMB();
+ }
+ else
+ {
+ ADD_TO(lock->reader_count, 1);
+ pred->next = qn;
+ WEAK_DEP_ORDER_WMB();
+ CLEAR_BLOCKED(qn);
+ }
+ }
+
+ if ( qn->state == ST_RDSUCC )
+ {
+ while ( (next = qn->next) == NULL ) RMB();
+ ADD_TO(lock->reader_count, 1);
+ WEAK_DEP_ORDER_WMB();
+ CLEAR_BLOCKED(next);
+ }
+
+ RMB();
+}
+
+static void rd_unlock(mrsw_lock_t *lock, mrsw_qnode_t *qn)
+{
+ mrsw_qnode_t *next = qn->next;
+ int c, oc;
+
+ RMB();
+
+ if ( (next != NULL) || (CASPO(&lock->tail, qn, NULL) != qn) )
+ {
+ while ( (next = qn->next) == NULL ) RMB();
+ if ( (qn->state & ST_SUCCMASK) == ST_WRSUCC )
+ {
+ lock->next_writer = next;
+ WMB_NEAR_CAS(); /* set next_writer before dec'ing refcnt */
+ }
+ }
+
+ /* Bounded to maximum # readers if no native atomic_decrement */
+ c = lock->reader_count;
+ while ( (oc = CASIO(&lock->reader_count, c, c-1)) != c ) c = oc;
+
+ if ( c == 1 )
+ {
+ WEAK_DEP_ORDER_MB();
+ if ( (next = lock->next_writer) != NULL )
+ {
+ RMB();
+ if ( (lock->reader_count == 0) &&
+ (CASPO(&lock->next_writer, next, NULL) == next) )
+ {
+ WEAK_DEP_ORDER_WMB();
+ CLEAR_BLOCKED(next);
+ }
+ }
+ }
+}
+
+static void wr_lock(mrsw_lock_t *lock, mrsw_qnode_t *qn)
+{
+ mrsw_qnode_t *pred;
+ int os, s;
+
+ qn->class = CLS_WR;
+ qn->next = NULL;
+ qn->state = ST_NOSUCC | ST_BLOCKED;
+
+ WMB_NEAR_CAS();
+
+ pred = FASPO(&lock->tail, qn);
+
+ if ( pred == NULL )
+ {
+ WEAK_DEP_ORDER_WMB();
+ lock->next_writer = qn;
+ MB(); /* check reader_count after setting next_writer. */
+ if ( (lock->reader_count == 0) &&
+ (CASPO(&lock->next_writer, qn, NULL) == qn) )
+ {
+ CLEAR_BLOCKED(qn);
+ }
+ }
+ else
+ {
+ s = pred->state;
+ /* Bounded while loop: only one other remote update may occur. */
+ while ( (os = CASIO(&pred->state, s, s | ST_WRSUCC)) != s ) s = os;
+ WMB();
+ pred->next = qn;
+ }
+
+ while ( (qn->state & ST_BLOCKED) ) RMB();
+
+ MB();
+}
+
+static void wr_unlock(mrsw_lock_t *lock, mrsw_qnode_t *qn)
+{
+ mrsw_qnode_t *next = qn->next;
+
+ MB();
+
+ if ( (next != NULL) || (CASPO(&lock->tail, qn, NULL) != qn) )
+ {
+ while ( (next = qn->next) == NULL ) RMB();
+ WEAK_DEP_ORDER_MB();
+ if ( next->class == CLS_RD )
+ {
+ ADD_TO(lock->reader_count, 1);
+ WMB();
+ }
+ CLEAR_BLOCKED(next);
+ }
+}
+
+
+#endif /* __PORTABLE_DEFNS_H__ */
--- /dev/null
+#ifndef __PPC_DEFNS_H__
+#define __PPC_DEFNS_H__
+
+#ifndef PPC
+#define PPC
+#endif
+
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sched.h>
+
+#define CACHE_LINE_SIZE 64
+
+#include <pthread.h>
+
+
+/*
+ * I. Compare-and-swap.
+ */
+
+static int FAS32(void *, int);
+static long FAS64(void *, long);
+static int CAS32(void *, int, int);
+static long CAS64(void *, long, long);
+
+#pragma mc_func FAS32 { \
+ "7c001828" /* 1: lwarx r0,0,r3 */ \
+ "7c80192d" /* stwcx r4,0,r3 */ \
+ "4082fff8" /* bne 1 */ \
+ "60030000" /* ori r3,0,r0 */ \
+}
+
+#pragma mc_func FAS64 { \
+ "7c0018a8" /* 1: ldarx r0,0,r3 */ \
+ "7c8019ad" /* stdcx r4,0,r3 */ \
+ "4082fff8" /* bne 1 */ \
+ "60030000" /* ori r3,0,r0 */ \
+}
+
+#pragma mc_func CAS32 { \
+ "7c001828" /* 1: lwarx r0,0,r3 */ \
+ "7c002000" /* cmpw r0,r4 */ \
+ "4082000c" /* bne 2 */ \
+ "7ca0192d" /* stwcx r5,0,r3 */ \
+ "4082fff0" /* bne 1 */ \
+ "60030000" /* 2: ori r3,0,r0 */ \
+}
+
+#pragma mc_func CAS64 { \
+ "7c0018a8" /* 1: ldarx r0,0,r3 */ \
+ "7c202000" /* cmpd r0,r4 */ \
+ "4082000c" /* bne 2 */ \
+ "7ca019ad" /* stdcx r5,0,r3 */ \
+ "4082fff0" /* bne 1 */ \
+ "60030000" /* 2: ori r3,0,r0 */ \
+}
+
+#define CASIO(_a,_o,_n) ((int)CAS32((int*)(_a),(int)(_o),(int)(_n)))
+#define FASIO(_a,_n) ((int)FAS32((int*)(_a),(int)(_n)))
+#define CASPO(_a,_o,_n) ((void *)(CAS64((long*)(_a),(long)(_o),(long)(_n))))
+#define FASPO(_a,_n) ((void *)(FAS64((long*)(_a),(long)(_n))))
+#define CAS32O(_a,_o,_n) ((_u32)(CAS32((_u32*)(_a),(_u32)(_o),(_u32)(_n))))
+#define CAS64O(_a,_o,_n) ((_u64)(CAS64((long*)(_a),(long)(_o),(long)(_n))))
+
+
+/*
+ * II. Memory barriers.
+ * WMB(): All preceding write operations must commit before any later writes.
+ * RMB(): All preceding read operations must commit before any later reads.
+ * MB(): All preceding memory accesses must commit before any later accesses.
+ *
+ * If the compiler does not observe these barriers (but any sane compiler
+ * will!), then VOLATILE should be defined as 'volatile'.
+ */
+
+static void WMB(void);
+static void RMB(void);
+static void MB(void);
+
+#pragma mc_func WMB { "7c0004ac" } /* msync (orders memory transactions) */
+#pragma mc_func RMB { "4c00012c" } /* isync (orders instruction issue) */
+#pragma mc_func MB { "7c0004ac" } /* msync (orders memory transactions) */
+
+#define VOLATILE /*volatile*/
+
+
+/*
+ * III. Cycle counter access.
+ */
+
+typedef unsigned long tick_t;
+static tick_t RDTICK(void);
+#pragma mc_func RDTICK { "7c6c42e6" } /* mftb r3 */
+
+
+/*
+ * IV. Types.
+ */
+
+typedef unsigned char _u8;
+typedef unsigned short _u16;
+typedef unsigned int _u32;
+typedef unsigned long _u64;
+
+#endif /* __PPC_DEFNS_H__ */
--- /dev/null
+/******************************************************************************
+ * ptst.c
+ *
+ * Per-thread state management. Essentially the state management parts
+ * of MB's garbage-collection code have been pulled out and placed here,
+ * for the use of other utility routines.
+ *
+ * Copyright (c) 2002-2003, K A Fraser
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution. Neither the name of the Keir Fraser
+ * nor the names of its contributors may be used to endorse or
+ * promote products derived from this software without specific
+ * prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "portable_defns.h"
+#include "ptst.h"
+
+
+pthread_key_t ptst_key;
+ptst_t *ptst_list;
+
+static unsigned int next_id;
+
+ptst_t *critical_enter(void)
+{
+ ptst_t *ptst, *next, *new_next;
+ unsigned int id, oid;
+
+ ptst = (ptst_t *)pthread_getspecific(ptst_key);
+ if ( ptst == NULL )
+ {
+ for ( ptst = ptst_first(); ptst != NULL; ptst = ptst_next(ptst) )
+ {
+ if ( (ptst->count == 0) && (CASIO(&ptst->count, 0, 1) == 0) )
+ {
+ break;
+ }
+ }
+
+ if ( ptst == NULL )
+ {
+ ptst = ALIGNED_ALLOC(sizeof(*ptst));
+ if ( ptst == NULL ) exit(1);
+ memset(ptst, 0, sizeof(*ptst));
+ ptst->gc = gc_init();
+ rand_init(ptst);
+ ptst->count = 1;
+ id = next_id;
+ while ( (oid = CASIO(&next_id, id, id+1)) != id ) id = oid;
+ ptst->id = id;
+ new_next = ptst_list;
+ do {
+ ptst->next = next = new_next;
+ WMB_NEAR_CAS();
+ }
+ while ( (new_next = CASPO(&ptst_list, next, ptst)) != next );
+ }
+
+ pthread_setspecific(ptst_key, ptst);
+ }
+
+ gc_enter(ptst);
+ return(ptst);
+}
+
+
+static void ptst_destructor(ptst_t *ptst)
+{
+ ptst->count = 0;
+}
+
+
+void _init_ptst_subsystem(void)
+{
+ ptst_list = NULL;
+ next_id = 0;
+ WMB();
+ if ( pthread_key_create(&ptst_key, (void (*)(void *))ptst_destructor) )
+ {
+ exit(1);
+ }
+}
--- /dev/null
+/******************************************************************************
+ * ptst.h
+ *
+ * Per-thread state management.
+ *
+ * Copyright (c) 2002-2003, K A Fraser
+ */
+
+#ifndef __PTST_H__
+#define __PTST_H__
+
+typedef struct ptst_st ptst_t;
+
+#include "gc.h"
+#include "random.h"
+
+struct ptst_st
+{
+ /* Thread id */
+ unsigned int id;
+
+ /* State management */
+ ptst_t *next;
+ unsigned int count;
+ /* Utility structures */
+ gc_t *gc;
+ rand_t rand;
+};
+
+extern pthread_key_t ptst_key;
+
+/*
+ * Enter/leave a critical region. A thread gets a state handle for
+ * use during critical regions.
+ */
+ptst_t *critical_enter(void);
+#define critical_exit(_p) gc_exit(_p)
+
+/* Iterators */
+extern ptst_t *ptst_list;
+#define ptst_first() (ptst_list)
+#define ptst_next(_p) ((_p)->next)
+
+/* Called once at start-of-day for entire application. */
+void _init_ptst_subsystem(void);
+
+#endif /* __PTST_H__ */
--- /dev/null
+/******************************************************************************
+ * random.h
+ *
+ * A really simple random-number generator. Crappy linear congruential
+ * taken from glibc, but has at least a 2^32 period.
+ */
+
+#ifndef __RANDOM_H__
+#define __RANDOM_H__
+
+typedef unsigned long rand_t;
+
+#define rand_init(_ptst) \
+ ((_ptst)->rand = RDTICK())
+
+#define rand_next(_ptst) \
+ ((_ptst)->rand = ((_ptst)->rand * 1103515245) + 12345)
+
+#endif /* __RANDOM_H__ */
--- /dev/null
+/******************************************************************************
+ * rb_lock_concurrentwriters.c
+ *
+ * Lock-based red-black trees, based on Hanke's relaxed balancing operations.
+ *
+ * For more details on the local tree restructuring operations used here:
+ * S. Hanke, T. Ottmann, and E. Soisalon-Soininen.
+ * "Relaxed balanced red-black trees".
+ * 3rd Italian Conference on Algorithms and Complexity, pages 193-204.
+ *
+ * Rather than issuing up-in and up-out requests to a balancing process,
+ * each operation is directly responsible for local rebalancing. However,
+ * this process can be split into a number of individual restructuring
+ * operations, and locks can be released between each operation. Between
+ * operations, we mark the node concerned as UNBALANCED -- contending
+ * updates will then wait for this mark to be removed before continuing.
+ *
+ * Copyright (c) 2002-2003, K A Fraser
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution. Neither the name of the Keir Fraser
+ * nor the names of its contributors may be used to endorse or
+ * promote products derived from this software without specific
+ * prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define __SET_IMPLEMENTATION__
+
+#include <stdio.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include "portable_defns.h"
+#include "gc.h"
+#include "set.h"
+
+#define BLACK_MARK 0
+#define RED_MARK 1
+#define UNBALANCED_MARK 2
+
+#define SET_VALUE(_v,_n) \
+ ((_v) = ((setval_t)(((unsigned long)(_v)&3)|((unsigned long)(_n)))))
+#define GET_VALUE(_v) ((setval_t)((int_addr_t)(_v) & ~3UL))
+#define GET_COLOUR(_v) ((int_addr_t)(_v) & 1)
+#define SET_COLOUR(_v,_c) \
+ ((setval_t)(((unsigned long)(_v)&~1UL)|(unsigned long)(_c)))
+
+#define IS_BLACK(_v) (GET_COLOUR(_v) == 0)
+#define IS_RED(_v) (GET_COLOUR(_v) == 1)
+#define IS_UNBALANCED(_v) (((int_addr_t)(_v) & 2) == 2)
+
+#define MK_BLACK(_v) ((setval_t)(((int_addr_t)(_v)&~1UL) | 0))
+#define MK_RED(_v) ((setval_t)(((int_addr_t)(_v)&~1UL) | 1))
+#define MK_BALANCED(_v) ((setval_t)(((int_addr_t)(_v)&~2UL) | 0))
+#define MK_UNBALANCED(_v) ((setval_t)(((int_addr_t)(_v)&~2UL) | 2))
+
+#define GARBAGE_VALUE ((setval_t)4)
+#define IS_GARBAGE(_n) (GET_VALUE((_n)->v) == GARBAGE_VALUE)
+#define MK_GARBAGE(_n) (SET_VALUE((_n)->v, GARBAGE_VALUE))
+
+#define INTERNAL_VALUE ((void *)0xdeadbee0)
+
+#define IS_ROOT(_n) ((_n)->p->k == 0)
+#define IS_LEAF(_n) ((_n)->l == NULL)
+
+/* TRUE if node X is a child of P. */
+#define ADJACENT(_p,_x) (((_p)->l==(_x))||((_p)->r==(_x)))
+
+typedef struct node_st node_t;
+typedef struct set_st set_t;
+
+struct node_st
+{
+ setkey_t k;
+ setval_t v;
+ node_t *l, *r, *p;
+ mrsw_lock_t lock;
+};
+
+struct set_st
+{
+ node_t root;
+ node_t null;
+ node_t dummy_g, dummy_gg;
+};
+
+static int gc_id;
+
+/* Nodes p, x, y must be locked for writing. */
+static void left_rotate(node_t *x)
+{
+ node_t *y = x->r, *p = x->p;
+ x->r = y->l;
+ x->r->p = x;
+ x->p = y;
+ y->l = x;
+ y->p = p;
+ if ( x == p->l ) p->l = y; else p->r = y;
+}
+
+
+/* Nodes p, x, y must be locked for writing. */
+static void right_rotate(node_t *x)
+{
+ node_t *y = x->l, *p = x->p;
+ x->l = y->r;
+ x->l->p = x;
+ x->p = y;
+ y->r = x;
+ y->p = p;
+ if ( x == p->l ) p->l = y; else p->r = y;
+}
+
+
+static void fix_unbalance_up(node_t *x)
+{
+ mrsw_qnode_t x_qn, g_qn, p_qn, w_qn, gg_qn;
+ node_t *g, *p, *w, *gg;
+ int done = 0;
+
+ do {
+ assert(IS_UNBALANCED(x->v));
+ if ( IS_GARBAGE(x) ) return;
+
+ p = x->p;
+ g = p->p;
+ gg = g->p;
+
+ wr_lock(&gg->lock, &gg_qn);
+ if ( !ADJACENT(gg, g) || IS_UNBALANCED(gg->v) || IS_GARBAGE(gg) )
+ goto unlock_gg;
+
+ wr_lock(&g->lock, &g_qn);
+ if ( !ADJACENT(g, p) || IS_UNBALANCED(g->v) ) goto unlock_ggg;
+
+ wr_lock(&p->lock, &p_qn);
+ if ( !ADJACENT(p, x) || IS_UNBALANCED(p->v) ) goto unlock_pggg;
+
+ wr_lock(&x->lock, &x_qn);
+
+ assert(IS_RED(x->v));
+ assert(IS_UNBALANCED(x->v));
+
+ if ( IS_BLACK(p->v) )
+ {
+ /* Case 1. Nothing to do. */
+ x->v = MK_BALANCED(x->v);
+ done = 1;
+ goto unlock_xpggg;
+ }
+
+ if ( IS_ROOT(x) )
+ {
+ /* Case 2. */
+ x->v = MK_BLACK(MK_BALANCED(x->v));
+ done = 1;
+ goto unlock_xpggg;
+ }
+
+ if ( IS_ROOT(p) )
+ {
+ /* Case 2. */
+ p->v = MK_BLACK(p->v);
+ x->v = MK_BALANCED(x->v);
+ done = 1;
+ goto unlock_xpggg;
+ }
+
+ if ( g->l == p ) w = g->r; else w = g->l;
+ wr_lock(&w->lock, &w_qn);
+
+ if ( IS_RED(w->v) )
+ {
+ /* Case 5. */
+ /* In all other cases, doesn't change colour or subtrees. */
+ if ( IS_UNBALANCED(w->v) ) goto unlock_wxpggg;
+ g->v = MK_UNBALANCED(MK_RED(g->v));
+ p->v = MK_BLACK(p->v);
+ w->v = MK_BLACK(w->v);
+ x->v = MK_BALANCED(x->v);
+ done = 2;
+ goto unlock_wxpggg;
+ }
+
+ /* Cases 3 & 4. Both of these need the great-grandfather locked. */
+ if ( p == g->l )
+ {
+ if ( x == p->l )
+ {
+ /* Case 3. Single rotation. */
+ x->v = MK_BALANCED(x->v);
+ p->v = MK_BLACK(p->v);
+ g->v = MK_RED(g->v);
+ right_rotate(g);
+ }
+ else
+ {
+ /* Case 4. Double rotation. */
+ x->v = MK_BALANCED(MK_BLACK(x->v));
+ g->v = MK_RED(g->v);
+ left_rotate(p);
+ right_rotate(g);
+ }
+ }
+ else /* SYMMETRIC CASE */
+ {
+ if ( x == p->r )
+ {
+ /* Case 3. Single rotation. */
+ x->v = MK_BALANCED(x->v);
+ p->v = MK_BLACK(p->v);
+ g->v = MK_RED(g->v);
+ left_rotate(g);
+ }
+ else
+ {
+ /* Case 4. Double rotation. */
+ x->v = MK_BALANCED(MK_BLACK(x->v));
+ g->v = MK_RED(g->v);
+ right_rotate(p);
+ left_rotate(g);
+ }
+ }
+
+ done = 1;
+
+ unlock_wxpggg:
+ wr_unlock(&w->lock, &w_qn);
+ unlock_xpggg:
+ wr_unlock(&x->lock, &x_qn);
+ unlock_pggg:
+ wr_unlock(&p->lock, &p_qn);
+ unlock_ggg:
+ wr_unlock(&g->lock, &g_qn);
+ unlock_gg:
+ wr_unlock(&gg->lock, &gg_qn);
+
+ if ( done == 2 )
+ {
+ x = g;
+ done = 0;
+ }
+ }
+ while ( !done );
+}
+
+
+static void fix_unbalance_down(node_t *x)
+{
+ /* WN == W_NEAR, WF == W_FAR (W_FAR is further, in key space, from X). */
+ mrsw_qnode_t x_qn, w_qn, p_qn, g_qn, wn_qn, wf_qn;
+ node_t *w, *p, *g, *wn, *wf;
+ int done = 0;
+
+ do {
+ if ( !IS_UNBALANCED(x->v) || IS_GARBAGE(x) ) return;
+
+ p = x->p;
+ g = p->p;
+
+ wr_lock(&g->lock, &g_qn);
+ if ( !ADJACENT(g, p) || IS_UNBALANCED(g->v) || IS_GARBAGE(g) )
+ goto unlock_g;
+
+ wr_lock(&p->lock, &p_qn);
+ if ( !ADJACENT(p, x) || IS_UNBALANCED(p->v) ) goto unlock_pg;
+
+ wr_lock(&x->lock, &x_qn);
+
+ if ( !IS_BLACK(x->v) || !IS_UNBALANCED(x->v) )
+ {
+ done = 1;
+ goto unlock_xpg;
+ }
+
+ if ( IS_ROOT(x) )
+ {
+ x->v = MK_BALANCED(x->v);
+ done = 1;
+ goto unlock_xpg;
+ }
+
+ w = (x == p->l) ? p->r : p->l;
+ wr_lock(&w->lock, &w_qn);
+ if ( IS_UNBALANCED(w->v) )
+ {
+ if ( IS_BLACK(w->v) )
+ {
+ /* Funky relaxed rules to the rescue. */
+ x->v = MK_BALANCED(x->v);
+ w->v = MK_BALANCED(w->v);
+ if ( IS_BLACK(p->v) )
+ {
+ p->v = MK_UNBALANCED(p->v);
+ done = 2;
+ }
+ else
+ {
+ p->v = MK_BLACK(p->v);
+ done = 1;
+ }
+ }
+ goto unlock_wxpg;
+ }
+
+ assert(!IS_LEAF(w));
+
+ if ( x == p->l )
+ {
+ wn = w->l;
+ wf = w->r;
+ }
+ else
+ {
+ wn = w->r;
+ wf = w->l;
+ }
+
+ wr_lock(&wn->lock, &wn_qn);
+ /* Hanke has an extra relaxed transform here. It's not needed. */
+ if ( IS_UNBALANCED(wn->v) ) goto unlock_wnwxpg;
+
+ wr_lock(&wf->lock, &wf_qn);
+ if ( IS_UNBALANCED(wf->v) ) goto unlock_wfwnwxpg;
+
+ if ( IS_RED(w->v) )
+ {
+ /* Case 1. Rotate at parent. */
+ assert(IS_BLACK(p->v) && IS_BLACK(wn->v) && IS_BLACK(wf->v));
+ w->v = MK_BLACK(w->v);
+ p->v = MK_RED(p->v);
+ if ( x == p->l ) left_rotate(p); else right_rotate(p);
+ goto unlock_wfwnwxpg;
+ }
+
+ if ( IS_BLACK(wn->v) && IS_BLACK(wf->v) )
+ {
+ if ( IS_RED(p->v) )
+ {
+ /* Case 2. Simple recolouring. */
+ p->v = MK_BLACK(p->v);
+ done = 1;
+ }
+ else
+ {
+ /* Case 5. Simple recolouring. */
+ p->v = MK_UNBALANCED(p->v);
+ done = 2;
+ }
+ w->v = MK_RED(w->v);
+ x->v = MK_BALANCED(x->v);
+ goto unlock_wfwnwxpg;
+ }
+
+ if ( x == p->l )
+ {
+ if ( IS_RED(wf->v) )
+ {
+ /* Case 3. Single rotation. */
+ wf->v = MK_BLACK(wf->v);
+ w->v = SET_COLOUR(w->v, GET_COLOUR(p->v));
+ p->v = MK_BLACK(p->v);
+ x->v = MK_BALANCED(x->v);
+ left_rotate(p);
+ }
+ else
+ {
+ /* Case 4. Double rotation. */
+ assert(IS_RED(wn->v));
+ wn->v = SET_COLOUR(wn->v, GET_COLOUR(p->v));
+ p->v = MK_BLACK(p->v);
+ x->v = MK_BALANCED(x->v);
+ right_rotate(w);
+ left_rotate(p);
+ }
+ }
+ else /* SYMMETRIC CASE: X == P->R */
+ {
+ if ( IS_RED(wf->v) )
+ {
+ /* Case 3. Single rotation. */
+ wf->v = MK_BLACK(wf->v);
+ w->v = SET_COLOUR(w->v, GET_COLOUR(p->v));
+ p->v = MK_BLACK(p->v);
+ x->v = MK_BALANCED(x->v);
+ right_rotate(p);
+ }
+ else
+ {
+ /* Case 4. Double rotation. */
+ assert(IS_RED(wn->v));
+ wn->v = SET_COLOUR(wn->v, GET_COLOUR(p->v));
+ p->v = MK_BLACK(p->v);
+ x->v = MK_BALANCED(x->v);
+ left_rotate(w);
+ right_rotate(p);
+ }
+ }
+
+ done = 1;
+
+ unlock_wfwnwxpg:
+ wr_unlock(&wf->lock, &wf_qn);
+ unlock_wnwxpg:
+ wr_unlock(&wn->lock, &wn_qn);
+ unlock_wxpg:
+ wr_unlock(&w->lock, &w_qn);
+ unlock_xpg:
+ wr_unlock(&x->lock, &x_qn);
+ unlock_pg:
+ wr_unlock(&p->lock, &p_qn);
+ unlock_g:
+ wr_unlock(&g->lock, &g_qn);
+
+ if ( done == 2 )
+ {
+ x = p;
+ done = 0;
+ }
+ }
+ while ( !done );
+}
+
+
+static void delete_finish(ptst_t *ptst, node_t *x)
+{
+ mrsw_qnode_t g_qn, p_qn, w_qn, x_qn;
+ node_t *g, *p, *w;
+ int done = 0;
+
+ do {
+ if ( IS_GARBAGE(x) ) return;
+
+ p = x->p;
+ g = p->p;
+
+ wr_lock(&g->lock, &g_qn);
+ if ( !ADJACENT(g, p) || IS_UNBALANCED(g->v) || IS_GARBAGE(g) )
+ goto unlock_g;
+
+ wr_lock(&p->lock, &p_qn);
+ /* Removing unbalanced red nodes is okay. */
+ if ( !ADJACENT(p, x) || (IS_UNBALANCED(p->v) && IS_BLACK(p->v)) )
+ goto unlock_pg;
+
+ wr_lock(&x->lock, &x_qn);
+ if ( IS_UNBALANCED(x->v) ) goto unlock_xpg;
+ if ( GET_VALUE(x->v) != NULL )
+ {
+ done = 1;
+ goto unlock_xpg;
+ }
+
+ if ( p->l == x ) w = p->r; else w = p->l;
+ assert(w != x);
+ wr_lock(&w->lock, &w_qn);
+ if ( IS_UNBALANCED(w->v) ) goto unlock_wxpg;
+
+ if ( g->l == p ) g->l = w; else g->r = w;
+ MK_GARBAGE(p); gc_free(ptst, p, gc_id);
+ MK_GARBAGE(x); gc_free(ptst, x, gc_id);
+ w->p = g;
+ if ( IS_BLACK(p->v) && IS_BLACK(w->v) )
+ {
+ w->v = MK_UNBALANCED(w->v);
+ done = 2;
+ }
+ else
+ {
+ w->v = MK_BLACK(w->v);
+ done = 1;
+ }
+
+ unlock_wxpg:
+ wr_unlock(&w->lock, &w_qn);
+ unlock_xpg:
+ wr_unlock(&x->lock, &x_qn);
+ unlock_pg:
+ wr_unlock(&p->lock, &p_qn);
+ unlock_g:
+ wr_unlock(&g->lock, &g_qn);
+ }
+ while ( !done );
+
+ if ( done == 2 ) fix_unbalance_down(w);
+}
+
+
+set_t *set_alloc(void)
+{
+ ptst_t *ptst;
+ set_t *set;
+ node_t *root, *null;
+
+ ptst = critical_enter();
+
+ set = (set_t *)malloc(sizeof(*set));
+ memset(set, 0, sizeof(*set));
+
+ root = &set->root;
+ null = &set->null;
+
+ root->k = 0;
+ root->v = MK_RED(INTERNAL_VALUE);
+ root->l = NULL;
+ root->r = null;
+ root->p = NULL;
+ mrsw_init(&root->lock);
+
+ null->k = SENTINEL_KEYMIN;
+ null->v = MK_BLACK(INTERNAL_VALUE);
+ null->l = NULL;
+ null->r = NULL;
+ null->p = root;
+ mrsw_init(&null->lock);
+
+ set->dummy_gg.l = &set->dummy_g;
+ set->dummy_g.p = &set->dummy_gg;
+ set->dummy_g.l = &set->root;
+ set->root.p = &set->dummy_g;
+
+ critical_exit(ptst);
+
+ return set;
+}
+
+
+setval_t set_update(set_t *s, setkey_t k, setval_t v, int overwrite)
+{
+ ptst_t *ptst;
+ node_t *x, *y, *z, *new_internal, *new_leaf;
+ mrsw_qnode_t qn[2], *y_pqn=qn+0, *z_pqn=qn+1, *t_pqn, x_qn;
+ int fix_up = 0;
+ setval_t ov = NULL;
+
+ k = CALLER_TO_INTERNAL_KEY(k);
+
+ ptst = critical_enter();
+
+ /*
+ * We start our search by read-lock-coupling from the root.
+ * There is a special case, when there is only one node in the tree.
+ * In this case, we take a write lock on the root.
+ */
+ retry_from_root:
+ z = &s->root;
+ rd_lock(&z->lock, z_pqn);
+
+ /*
+ * We read-couple down the tree until we get within two nodes of the
+ * required leaf. We then speculatively take write locks.
+ */
+ carry_on:
+ while ( (y = (k <= z->k) ? z->l : z->r) != NULL )
+ {
+ if ( IS_LEAF(y) )
+ {
+ y = z;
+ rd_unlock(&z->lock, z_pqn);
+ wr_lock(&y->lock, y_pqn);
+ x = (k <= z->k) ? z->l : z->r;
+ if ( IS_GARBAGE(y) || !IS_LEAF(x) )
+ {
+ wr_unlock(&y->lock, y_pqn);
+ goto retry_from_root;
+ }
+ wr_lock(&x->lock, &x_qn);
+ assert(!IS_GARBAGE(x));
+ goto found_and_locked;
+ }
+
+ x = (k <= y->k) ? y->l : y->r;
+ if ( IS_LEAF(x) ) goto found;
+ rd_lock(&y->lock, y_pqn);
+ rd_unlock(&z->lock, z_pqn);
+ z = y;
+ t_pqn = y_pqn;
+ y_pqn = z_pqn;
+ z_pqn = t_pqn;
+ }
+
+ /*
+ * At this point Z is read locked, and next two nodes on search path
+ * are probably the last. Certainly there is more than one on the path.
+ */
+ found:
+ wr_lock(&y->lock, y_pqn);
+ x = (k <= y->k) ? y->l : y->r;
+ if ( !IS_LEAF(x) )
+ {
+ wr_unlock(&y->lock, y_pqn);
+ goto carry_on;
+ }
+ wr_lock(&x->lock, &x_qn);
+ rd_unlock(&z->lock, z_pqn);
+
+ found_and_locked:
+ /*
+ * At this point, node X is write locked and may be correct node.
+ * Y is X's parent, and is also write locked. No other node is locked.
+ */
+ assert(!IS_GARBAGE(x));
+ if ( x->k == k )
+ {
+ ov = GET_VALUE(x->v);
+ if ( overwrite || (ov == NULL) )
+ {
+ SET_VALUE(x->v, v);
+ }
+ }
+ else
+ {
+ new_leaf = gc_alloc(ptst, gc_id);
+ new_internal = gc_alloc(ptst, gc_id);
+ new_leaf->k = k;
+ new_leaf->v = MK_BLACK(v);
+ new_leaf->l = NULL;
+ new_leaf->r = NULL;
+ new_leaf->p = new_internal;
+ mrsw_init(&new_leaf->lock);
+ if ( x->k < k )
+ {
+ new_internal->k = x->k;
+ new_internal->l = x;
+ new_internal->r = new_leaf;
+ }
+ else
+ {
+ new_internal->k = k;
+ new_internal->l = new_leaf;
+ new_internal->r = x;
+ }
+ new_internal->p = y;
+ mrsw_init(&new_internal->lock);
+ x->p = new_internal;
+ if ( y->l == x ) y->l = new_internal; else y->r = new_internal;
+ if ( IS_UNBALANCED(x->v) )
+ {
+ x->v = MK_BALANCED(x->v);
+ new_internal->v = MK_BLACK(INTERNAL_VALUE);
+ }
+ else if ( IS_RED(y->v) )
+ {
+ new_internal->v = MK_UNBALANCED(MK_RED(INTERNAL_VALUE));
+ fix_up = 1;
+ }
+ else
+ {
+ new_internal->v = MK_RED(INTERNAL_VALUE);
+ }
+ }
+
+ wr_unlock(&y->lock, y_pqn);
+ wr_unlock(&x->lock, &x_qn);
+
+ if ( fix_up ) fix_unbalance_up(new_internal);
+
+ critical_exit(ptst);
+
+ return ov;
+}
+
+
+setval_t set_remove(set_t *s, setkey_t k)
+{
+ ptst_t *ptst;
+ node_t *y, *z;
+ mrsw_qnode_t qn[2], *y_pqn=qn+0, *z_pqn=qn+1, *t_pqn;
+ setval_t ov = NULL;
+
+ k = CALLER_TO_INTERNAL_KEY(k);
+
+ ptst = critical_enter();
+
+ z = &s->root;
+ rd_lock(&z->lock, z_pqn);
+
+ while ( (y = (k <= z->k) ? z->l : z->r) != NULL )
+ {
+ if ( IS_LEAF(y) )
+ wr_lock(&y->lock, y_pqn);
+ else
+ rd_lock(&y->lock, y_pqn);
+ rd_unlock(&z->lock, z_pqn);
+ z = y;
+ t_pqn = y_pqn;
+ y_pqn = z_pqn;
+ z_pqn = t_pqn;
+ }
+
+ if ( z->k == k )
+ {
+ ov = GET_VALUE(z->v);
+ SET_VALUE(z->v, NULL);
+ }
+
+ wr_unlock(&z->lock, z_pqn);
+
+ if ( ov != NULL ) delete_finish(ptst, z);
+
+ critical_exit(ptst);
+ return ov;
+}
+
+
+setval_t set_lookup(set_t *s, setkey_t k)
+{
+ ptst_t *ptst;
+ node_t *m, *n;
+ mrsw_qnode_t qn[2], *m_pqn=&qn[0], *n_pqn=&qn[1], *t_pqn;
+ setval_t v = NULL;
+
+ k = CALLER_TO_INTERNAL_KEY(k);
+
+ ptst = critical_enter();
+
+ n = &s->root;
+ rd_lock(&n->lock, n_pqn);
+
+ while ( (m = (k <= n->k) ? n->l : n->r) != NULL )
+ {
+ rd_lock(&m->lock, m_pqn);
+ rd_unlock(&n->lock, n_pqn);
+ n = m;
+ t_pqn = m_pqn;
+ m_pqn = n_pqn;
+ n_pqn = t_pqn;
+ }
+
+ if ( k == n->k ) v = GET_VALUE(n->v);
+
+ rd_unlock(&n->lock, n_pqn);
+
+ critical_exit(ptst);
+
+ return v;
+}
+
+
+void _init_set_subsystem(void)
+{
+ gc_id = gc_add_allocator(sizeof(node_t));
+}
--- /dev/null
+/******************************************************************************
+ * rb_lock_mutex.c
+ *
+ * Lock-based red-black trees, based on Hanke's relaxed balancing operations.
+ *
+ * For more details on the local tree restructuring operations used here:
+ * S. Hanke, T. Ottmann, and E. Soisalon-Soininen.
+ * "Relaxed balanced red-black trees".
+ * 3rd Italian Conference on Algorithms and Complexity, pages 193-204.
+ *
+ * Rather than issuing up-in and up-out requests to a balancing process,
+ * each operation is directly responsible for local rebalancing. However,
+ * this process can be split into a number of individual restructuring
+ * operations, and locks can be released between each operation. Between
+ * operations, we mark the node concerned as UNBALANCED -- contending
+ * updates will then wait for this mark to be removed before continuing.
+ *
+ * Copyright (c) 2002-2003, K A Fraser
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution. Neither the name of the Keir Fraser
+ * nor the names of its contributors may be used to endorse or
+ * promote products derived from this software without specific
+ * prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define __SET_IMPLEMENTATION__
+
+#include <stdio.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include "portable_defns.h"
+#include "gc.h"
+#include "set.h"
+
+#define BLACK_MARK 0
+#define RED_MARK 1
+#define UNBALANCED_MARK 2
+
+#define SET_VALUE(_v,_n) \
+ ((_v) = ((setval_t)(((unsigned long)(_v)&3)|((unsigned long)(_n)))))
+#define GET_VALUE(_v) ((setval_t)((int_addr_t)(_v) & ~3UL))
+#define GET_COLOUR(_v) ((int_addr_t)(_v) & 1)
+#define SET_COLOUR(_v,_c) \
+ ((setval_t)(((unsigned long)(_v)&~1UL)|(unsigned long)(_c)))
+
+#define IS_BLACK(_v) (GET_COLOUR(_v) == 0)
+#define IS_RED(_v) (GET_COLOUR(_v) == 1)
+#define IS_UNBALANCED(_v) (((int_addr_t)(_v) & 2) == 2)
+
+#define MK_BLACK(_v) ((setval_t)(((int_addr_t)(_v)&~1UL) | 0))
+#define MK_RED(_v) ((setval_t)(((int_addr_t)(_v)&~1UL) | 1))
+#define MK_BALANCED(_v) ((setval_t)(((int_addr_t)(_v)&~2UL) | 0))
+#define MK_UNBALANCED(_v) ((setval_t)(((int_addr_t)(_v)&~2UL) | 2))
+
+#define GARBAGE_VALUE ((setval_t)4)
+#define IS_GARBAGE(_n) (GET_VALUE((_n)->v) == GARBAGE_VALUE)
+#define MK_GARBAGE(_n) (SET_VALUE((_n)->v, GARBAGE_VALUE))
+
+#define INTERNAL_VALUE ((void *)0xdeadbee0)
+
+#define IS_ROOT(_n) ((_n)->p->k == 0)
+#define IS_LEAF(_n) ((_n)->l == NULL)
+
+/* TRUE if node X is a child of P. */
+#define ADJACENT(_p,_x) (((_p)->l==(_x))||((_p)->r==(_x)))
+
+typedef struct node_st node_t;
+typedef struct set_st set_t;
+
+struct node_st
+{
+ setkey_t k;
+ setval_t v;
+ node_t *l, *r, *p;
+ mcs_lock_t lock;
+};
+
+struct set_st
+{
+ node_t root;
+ node_t null;
+ node_t dummy_g, dummy_gg;
+};
+
+static int gc_id;
+
+/* Nodes p, x, y must be locked. */
+static void left_rotate(ptst_t *ptst, node_t *x)
+{
+ node_t *y = x->r, *p = x->p, *nx;
+
+ nx = gc_alloc(ptst, gc_id);
+ nx->p = y;
+ nx->l = x->l;
+ nx->r = y->l;
+ nx->k = x->k;
+ nx->v = x->v;
+ mcs_init(&nx->lock);
+
+ WMB();
+
+ y->p = p;
+ x->l->p = nx;
+ y->l->p = nx;
+ y->l = nx;
+ if ( x == p->l ) p->l = y; else p->r = y;
+
+ MK_GARBAGE(x);
+ gc_free(ptst, x, gc_id);
+}
+
+
+/* Nodes p, x, y must be locked. */
+static void right_rotate(ptst_t *ptst, node_t *x)
+{
+ node_t *y = x->l, *p = x->p, *nx;
+
+ nx = gc_alloc(ptst, gc_id);
+ nx->p = y;
+ nx->l = y->r;
+ nx->r = x->r;
+ nx->k = x->k;
+ nx->v = x->v;
+ mcs_init(&nx->lock);
+
+ WMB();
+
+ y->p = p;
+ x->r->p = nx;
+ y->r->p = nx;
+ y->r = nx;
+ if ( x == p->l ) p->l = y; else p->r = y;
+
+ MK_GARBAGE(x);
+ gc_free(ptst, x, gc_id);
+}
+
+
+static void fix_unbalance_up(ptst_t *ptst, node_t *x)
+{
+ qnode_t x_qn, g_qn, p_qn, w_qn, gg_qn;
+ node_t *g, *p, *w, *gg;
+ int done = 0;
+
+ do {
+ assert(IS_UNBALANCED(x->v));
+ if ( IS_GARBAGE(x) ) return;
+
+ p = x->p;
+ g = p->p;
+ gg = g->p;
+
+ mcs_lock(&gg->lock, &gg_qn);
+ if ( !ADJACENT(gg, g) || IS_UNBALANCED(gg->v) || IS_GARBAGE(gg) )
+ goto unlock_gg;
+
+ mcs_lock(&g->lock, &g_qn);
+ if ( !ADJACENT(g, p) || IS_UNBALANCED(g->v) ) goto unlock_ggg;
+
+ mcs_lock(&p->lock, &p_qn);
+ if ( !ADJACENT(p, x) || IS_UNBALANCED(p->v) ) goto unlock_pggg;
+
+ mcs_lock(&x->lock, &x_qn);
+
+ assert(IS_RED(x->v));
+ assert(IS_UNBALANCED(x->v));
+
+ if ( IS_BLACK(p->v) )
+ {
+ /* Case 1. Nothing to do. */
+ x->v = MK_BALANCED(x->v);
+ done = 1;
+ goto unlock_xpggg;
+ }
+
+ if ( IS_ROOT(x) )
+ {
+ /* Case 2. */
+ x->v = MK_BLACK(MK_BALANCED(x->v));
+ done = 1;
+ goto unlock_xpggg;
+ }
+
+ if ( IS_ROOT(p) )
+ {
+ /* Case 2. */
+ p->v = MK_BLACK(p->v);
+ x->v = MK_BALANCED(x->v);
+ done = 1;
+ goto unlock_xpggg;
+ }
+
+ if ( g->l == p ) w = g->r; else w = g->l;
+ mcs_lock(&w->lock, &w_qn);
+
+ if ( IS_RED(w->v) )
+ {
+ /* Case 5. */
+ /* In all other cases, doesn't change colour or subtrees. */
+ if ( IS_UNBALANCED(w->v) ) goto unlock_wxpggg;
+ g->v = MK_UNBALANCED(MK_RED(g->v));
+ p->v = MK_BLACK(p->v);
+ w->v = MK_BLACK(w->v);
+ x->v = MK_BALANCED(x->v);
+ done = 2;
+ goto unlock_wxpggg;
+ }
+
+ /* Cases 3 & 4. Both of these need the great-grandfather locked. */
+ if ( p == g->l )
+ {
+ if ( x == p->l )
+ {
+ /* Case 3. Single rotation. */
+ x->v = MK_BALANCED(x->v);
+ p->v = MK_BLACK(p->v);
+ g->v = MK_RED(g->v);
+ right_rotate(ptst, g);
+ }
+ else
+ {
+ /* Case 4. Double rotation. */
+ x->v = MK_BALANCED(MK_BLACK(x->v));
+ g->v = MK_RED(g->v);
+ left_rotate(ptst, p);
+ right_rotate(ptst, g);
+ }
+ }
+ else /* SYMMETRIC CASE */
+ {
+ if ( x == p->r )
+ {
+ /* Case 3. Single rotation. */
+ x->v = MK_BALANCED(x->v);
+ p->v = MK_BLACK(p->v);
+ g->v = MK_RED(g->v);
+ left_rotate(ptst, g);
+ }
+ else
+ {
+ /* Case 4. Double rotation. */
+ x->v = MK_BALANCED(MK_BLACK(x->v));
+ g->v = MK_RED(g->v);
+ right_rotate(ptst, p);
+ left_rotate(ptst, g);
+ }
+ }
+
+ done = 1;
+
+ unlock_wxpggg:
+ mcs_unlock(&w->lock, &w_qn);
+ unlock_xpggg:
+ mcs_unlock(&x->lock, &x_qn);
+ unlock_pggg:
+ mcs_unlock(&p->lock, &p_qn);
+ unlock_ggg:
+ mcs_unlock(&g->lock, &g_qn);
+ unlock_gg:
+ mcs_unlock(&gg->lock, &gg_qn);
+
+ if ( done == 2 )
+ {
+ x = g;
+ done = 0;
+ }
+ }
+ while ( !done );
+}
+
+
+static void fix_unbalance_down(ptst_t *ptst, node_t *x)
+{
+ /* WN == W_NEAR, WF == W_FAR (W_FAR is further, in key space, from X). */
+ qnode_t x_qn, w_qn, p_qn, g_qn, wn_qn, wf_qn;
+ node_t *w, *p, *g, *wn, *wf;
+ int done = 0;
+
+ do {
+ if ( !IS_UNBALANCED(x->v) || IS_GARBAGE(x) ) return;
+
+ p = x->p;
+ g = p->p;
+
+ mcs_lock(&g->lock, &g_qn);
+ if ( !ADJACENT(g, p) || IS_UNBALANCED(g->v) || IS_GARBAGE(g) )
+ goto unlock_g;
+
+ mcs_lock(&p->lock, &p_qn);
+ if ( !ADJACENT(p, x) || IS_UNBALANCED(p->v) ) goto unlock_pg;
+
+ mcs_lock(&x->lock, &x_qn);
+
+ if ( !IS_BLACK(x->v) || !IS_UNBALANCED(x->v) )
+ {
+ done = 1;
+ goto unlock_xpg;
+ }
+
+ if ( IS_ROOT(x) )
+ {
+ x->v = MK_BALANCED(x->v);
+ done = 1;
+ goto unlock_xpg;
+ }
+
+ w = (x == p->l) ? p->r : p->l;
+ mcs_lock(&w->lock, &w_qn);
+ if ( IS_UNBALANCED(w->v) )
+ {
+ if ( IS_BLACK(w->v) )
+ {
+ /* Funky relaxed rules to the rescue. */
+ x->v = MK_BALANCED(x->v);
+ w->v = MK_BALANCED(w->v);
+ if ( IS_BLACK(p->v) )
+ {
+ p->v = MK_UNBALANCED(p->v);
+ done = 2;
+ }
+ else
+ {
+ p->v = MK_BLACK(p->v);
+ done = 1;
+ }
+ }
+ goto unlock_wxpg;
+ }
+
+ assert(!IS_LEAF(w));
+
+ if ( x == p->l )
+ {
+ wn = w->l;
+ wf = w->r;
+ }
+ else
+ {
+ wn = w->r;
+ wf = w->l;
+ }
+
+ mcs_lock(&wn->lock, &wn_qn);
+ /* Hanke has an extra relaxed transform here. It's not needed. */
+ if ( IS_UNBALANCED(wn->v) ) goto unlock_wnwxpg;
+
+ mcs_lock(&wf->lock, &wf_qn);
+ if ( IS_UNBALANCED(wf->v) ) goto unlock_wfwnwxpg;
+
+ if ( IS_RED(w->v) )
+ {
+ /* Case 1. Rotate at parent. */
+ assert(IS_BLACK(p->v) && IS_BLACK(wn->v) && IS_BLACK(wf->v));
+ w->v = MK_BLACK(w->v);
+ p->v = MK_RED(p->v);
+ if ( x == p->l ) left_rotate(ptst, p); else right_rotate(ptst, p);
+ goto unlock_wfwnwxpg;
+ }
+
+ if ( IS_BLACK(wn->v) && IS_BLACK(wf->v) )
+ {
+ if ( IS_RED(p->v) )
+ {
+ /* Case 2. Simple recolouring. */
+ p->v = MK_BLACK(p->v);
+ done = 1;
+ }
+ else
+ {
+ /* Case 5. Simple recolouring. */
+ p->v = MK_UNBALANCED(p->v);
+ done = 2;
+ }
+ w->v = MK_RED(w->v);
+ x->v = MK_BALANCED(x->v);
+ goto unlock_wfwnwxpg;
+ }
+
+ if ( x == p->l )
+ {
+ if ( IS_RED(wf->v) )
+ {
+ /* Case 3. Single rotation. */
+ wf->v = MK_BLACK(wf->v);
+ w->v = SET_COLOUR(w->v, GET_COLOUR(p->v));
+ p->v = MK_BLACK(p->v);
+ x->v = MK_BALANCED(x->v);
+ left_rotate(ptst, p);
+ }
+ else
+ {
+ /* Case 4. Double rotation. */
+ assert(IS_RED(wn->v));
+ wn->v = SET_COLOUR(wn->v, GET_COLOUR(p->v));
+ p->v = MK_BLACK(p->v);
+ x->v = MK_BALANCED(x->v);
+ right_rotate(ptst, w);
+ left_rotate(ptst, p);
+ }
+ }
+ else /* SYMMETRIC CASE: X == P->R */
+ {
+ if ( IS_RED(wf->v) )
+ {
+ /* Case 3. Single rotation. */
+ wf->v = MK_BLACK(wf->v);
+ w->v = SET_COLOUR(w->v, GET_COLOUR(p->v));
+ p->v = MK_BLACK(p->v);
+ x->v = MK_BALANCED(x->v);
+ right_rotate(ptst, p);
+ }
+ else
+ {
+ /* Case 4. Double rotation. */
+ assert(IS_RED(wn->v));
+ wn->v = SET_COLOUR(wn->v, GET_COLOUR(p->v));
+ p->v = MK_BLACK(p->v);
+ x->v = MK_BALANCED(x->v);
+ left_rotate(ptst, w);
+ right_rotate(ptst, p);
+ }
+ }
+
+ done = 1;
+
+ unlock_wfwnwxpg:
+ mcs_unlock(&wf->lock, &wf_qn);
+ unlock_wnwxpg:
+ mcs_unlock(&wn->lock, &wn_qn);
+ unlock_wxpg:
+ mcs_unlock(&w->lock, &w_qn);
+ unlock_xpg:
+ mcs_unlock(&x->lock, &x_qn);
+ unlock_pg:
+ mcs_unlock(&p->lock, &p_qn);
+ unlock_g:
+ mcs_unlock(&g->lock, &g_qn);
+
+ if ( done == 2 )
+ {
+ x = p;
+ done = 0;
+ }
+ }
+ while ( !done );
+}
+
+
+static void delete_finish(ptst_t *ptst, node_t *x)
+{
+ qnode_t g_qn, p_qn, w_qn, x_qn;
+ node_t *g, *p, *w;
+ int done = 0;
+
+ do {
+ if ( IS_GARBAGE(x) ) return;
+
+ p = x->p;
+ g = p->p;
+
+ mcs_lock(&g->lock, &g_qn);
+ if ( !ADJACENT(g, p) || IS_UNBALANCED(g->v) || IS_GARBAGE(g) )
+ goto unlock_g;
+
+ mcs_lock(&p->lock, &p_qn);
+ /* Removing unbalanced red nodes is okay. */
+ if ( !ADJACENT(p, x) || (IS_UNBALANCED(p->v) && IS_BLACK(p->v)) )
+ goto unlock_pg;
+
+ mcs_lock(&x->lock, &x_qn);
+ if ( IS_UNBALANCED(x->v) ) goto unlock_xpg;
+ if ( GET_VALUE(x->v) != NULL )
+ {
+ done = 1;
+ goto unlock_xpg;
+ }
+
+ if ( p->l == x ) w = p->r; else w = p->l;
+ assert(w != x);
+ mcs_lock(&w->lock, &w_qn);
+ if ( IS_UNBALANCED(w->v) ) goto unlock_wxpg;
+
+ if ( g->l == p ) g->l = w; else g->r = w;
+ MK_GARBAGE(p); gc_free(ptst, p, gc_id);
+ MK_GARBAGE(x); gc_free(ptst, x, gc_id);
+ w->p = g;
+ if ( IS_BLACK(p->v) && IS_BLACK(w->v) )
+ {
+ w->v = MK_UNBALANCED(w->v);
+ done = 2;
+ }
+ else
+ {
+ w->v = MK_BLACK(w->v);
+ done = 1;
+ }
+
+ unlock_wxpg:
+ mcs_unlock(&w->lock, &w_qn);
+ unlock_xpg:
+ mcs_unlock(&x->lock, &x_qn);
+ unlock_pg:
+ mcs_unlock(&p->lock, &p_qn);
+ unlock_g:
+ mcs_unlock(&g->lock, &g_qn);
+ }
+ while ( !done );
+
+ if ( done == 2 ) fix_unbalance_down(ptst, w);
+}
+
+
+set_t *set_alloc(void)
+{
+ ptst_t *ptst;
+ set_t *set;
+ node_t *root, *null;
+
+ ptst = critical_enter();
+
+ set = (set_t *)malloc(sizeof(*set));
+ memset(set, 0, sizeof(*set));
+
+ root = &set->root;
+ null = &set->null;
+
+ root->k = 0;
+ root->v = MK_RED(INTERNAL_VALUE);
+ root->l = NULL;
+ root->r = null;
+ root->p = NULL;
+ mcs_init(&root->lock);
+
+ null->k = SENTINEL_KEYMIN;
+ null->v = MK_BLACK(INTERNAL_VALUE);
+ null->l = NULL;
+ null->r = NULL;
+ null->p = root;
+ mcs_init(&null->lock);
+
+ set->dummy_gg.l = &set->dummy_g;
+ set->dummy_g.p = &set->dummy_gg;
+ set->dummy_g.l = &set->root;
+ set->root.p = &set->dummy_g;
+
+ critical_exit(ptst);
+
+ return set;
+}
+
+
+setval_t set_update(set_t *s, setkey_t k, setval_t v, int overwrite)
+{
+ ptst_t *ptst;
+ qnode_t y_qn, z_qn;
+ node_t *y, *z, *new_internal, *new_leaf;
+ int fix_up = 0;
+ setval_t ov = NULL;
+
+ k = CALLER_TO_INTERNAL_KEY(k);
+
+ ptst = critical_enter();
+
+ retry:
+ z = &s->root;
+ while ( (y = (k <= z->k) ? z->l : z->r) != NULL )
+ z = y;
+
+ y = z->p;
+ mcs_lock(&y->lock, &y_qn);
+ if ( (((k <= y->k) ? y->l : y->r) != z) || IS_GARBAGE(y) )
+ {
+ mcs_unlock(&y->lock, &y_qn);
+ goto retry;
+ }
+
+ mcs_lock(&z->lock, &z_qn);
+ assert(!IS_GARBAGE(z) && IS_LEAF(z));
+
+ if ( z->k == k )
+ {
+ ov = GET_VALUE(z->v);
+ if ( overwrite || (ov == NULL) )
+ SET_VALUE(z->v, v);
+ }
+ else
+ {
+ new_leaf = gc_alloc(ptst, gc_id);
+ new_internal = gc_alloc(ptst, gc_id);
+ new_leaf->k = k;
+ new_leaf->v = MK_BLACK(v);
+ new_leaf->l = NULL;
+ new_leaf->r = NULL;
+
+ new_leaf->p = new_internal;
+ mcs_init(&new_leaf->lock);
+ if ( z->k < k )
+ {
+ new_internal->k = z->k;
+ new_internal->l = z;
+ new_internal->r = new_leaf;
+ }
+ else
+ {
+ new_internal->k = k;
+ new_internal->l = new_leaf;
+ new_internal->r = z;
+ }
+ new_internal->p = y;
+ mcs_init(&new_internal->lock);
+
+ if ( IS_UNBALANCED(z->v) )
+ {
+ z->v = MK_BALANCED(z->v);
+ new_internal->v = MK_BLACK(INTERNAL_VALUE);
+ }
+ else if ( IS_RED(y->v) )
+ {
+ new_internal->v = MK_UNBALANCED(MK_RED(INTERNAL_VALUE));
+ fix_up = 1;
+ }
+ else
+ {
+ new_internal->v = MK_RED(INTERNAL_VALUE);
+ }
+
+ WMB();
+
+ z->p = new_internal;
+ if ( y->l == z ) y->l = new_internal; else y->r = new_internal;
+ }
+
+ mcs_unlock(&y->lock, &y_qn);
+ mcs_unlock(&z->lock, &z_qn);
+
+ if ( fix_up )
+ fix_unbalance_up(ptst, new_internal);
+
+ out:
+ critical_exit(ptst);
+
+ return ov;
+}
+
+
+setval_t set_remove(set_t *s, setkey_t k)
+{
+ ptst_t *ptst;
+ node_t *y, *z;
+ qnode_t z_qn;
+ setval_t ov = NULL;
+
+ k = CALLER_TO_INTERNAL_KEY(k);
+
+ ptst = critical_enter();
+
+ z = &s->root;
+ while ( (y = (k <= z->k) ? z->l : z->r) != NULL )
+ z = y;
+
+ if ( z->k == k )
+ {
+ mcs_lock(&z->lock, &z_qn);
+ if ( !IS_GARBAGE(z) )
+ {
+ ov = GET_VALUE(z->v);
+
+ SET_VALUE(z->v, NULL);
+ }
+ mcs_unlock(&z->lock, &z_qn);
+ }
+
+ if ( ov != NULL )
+ delete_finish(ptst, z);
+
+ critical_exit(ptst);
+
+ return ov;
+}
+
+
+setval_t set_lookup(set_t *s, setkey_t k)
+{
+ ptst_t *ptst;
+ node_t *m, *n;
+ setval_t v;
+
+ k = CALLER_TO_INTERNAL_KEY(k);
+
+ ptst = critical_enter();
+
+ n = &s->root;
+ while ( (m = (k <= n->k) ? n->l : n->r) != NULL )
+ n = m;
+
+ v = (k == n->k) ? GET_VALUE(n->v) : NULL;
+ if ( v == GARBAGE_VALUE ) v = NULL;
+
+ critical_exit(ptst);
+
+ return v;
+}
+
+
+void _init_set_subsystem(void)
+{
+ gc_id = gc_add_allocator(sizeof(node_t));
+}
+
+#if 0
+static int valll=0, bug=0, nrb=-1;
+static void __traverse(node_t *n, int d, int _nrb)
+{
+ int i;
+ if ( n == NULL )
+ {
+ if ( nrb == -1 ) nrb = _nrb;
+ if ( nrb != _nrb )
+ printf("Imbalance at depth %d (%d,%d)\n", d, nrb, _nrb);
+ return;
+ }
+ if ( IS_LEAF(n) && (n->k != 0) )
+ {
+ assert(n->l == NULL);
+ assert(n->r == NULL);
+ assert(IS_BLACK(n->v));
+ }
+ if ( !IS_LEAF(n) && IS_RED(n->v) )
+ {
+ assert(IS_BLACK(n->l->v));
+ assert(IS_BLACK(n->r->v));
+ }
+ if ( IS_BLACK(n->v) ) _nrb++;
+ __traverse(n->l, d+1, _nrb);
+ if ( valll > n->k ) bug=1;
+#if 0
+ for ( i = 0; i < d; i++ ) printf(" ");
+ printf("%c%p K: %5d V: %p P: %p L: %p R: %p depth: %d\n",
+ IS_BLACK(n->v) ? 'B' : 'R', n, n->k, n->v, n->p, n->l, n->r, d);
+#endif
+ valll = n->k;
+ __traverse(n->r, d+1, _nrb);
+}
+void check_tree(set_t *s)
+{
+ __traverse(s->root.r, 0, 0);
+ if ( bug )
+ printf("***********************************************************************************************\n");
+}
+#endif
--- /dev/null
+/******************************************************************************
+ * rb_lock_serialisedwriters.c
+ *
+ * Lock-based red-black trees, using multi-reader locks.
+ *
+ * Updates are serialised on a global mutual-exclusion spinlock.
+ *
+ * Updates never need to read-lock, as updates are serialised. Must write-lock
+ * for all node changes except colour changes and parent-link updates.
+ *
+ * Searches must read-lock down the tree, as they do not serialise.
+ *
+ * Copyright (c) 2002-2003, K A Fraser
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution. Neither the name of the Keir Fraser
+ * nor the names of its contributors may be used to endorse or
+ * promote products derived from this software without specific
+ * prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define __SET_IMPLEMENTATION__
+
+#include <stdio.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include "portable_defns.h"
+#include "gc.h"
+#include "set.h"
+
+#define IS_BLACK(_v) ((int_addr_t)(_v)&1)
+#define IS_RED(_v) (!IS_BLACK(_v))
+#define MK_BLACK(_v) ((setval_t)((int_addr_t)(_v)|1))
+#define MK_RED(_v) ((setval_t)((int_addr_t)(_v)&~1))
+#define GET_VALUE(_v) (MK_RED(_v))
+#define GET_COLOUR(_v) (IS_BLACK(_v))
+#define SET_COLOUR(_v,_c) ((setval_t)((unsigned long)(_v)|(unsigned long)(_c)))
+
+typedef struct node_st node_t;
+typedef struct set_st set_t;
+
+struct node_st
+{
+ setkey_t k;
+ setval_t v;
+ node_t *l, *r, *p;
+ mrsw_lock_t lock;
+};
+
+struct set_st
+{
+ node_t root;
+ CACHE_PAD(0);
+ mcs_lock_t writer_lock;
+};
+
+static node_t null;
+static int gc_id;
+
+static void left_rotate(node_t *x)
+{
+ mrsw_qnode_t p_qn, x_qn, y_qn;
+ node_t *y = x->r, *p = x->p;
+
+ wr_lock(&p->lock, &p_qn);
+ wr_lock(&x->lock, &x_qn);
+ wr_lock(&y->lock, &y_qn);
+
+ /* No need to write-lock to update parent link. */
+ if ( (x->r = y->l) != &null ) x->r->p = x;
+
+ x->p = y;
+ y->l = x;
+ y->p = p;
+ if ( x == p->l ) p->l = y; else p->r = y;
+
+ wr_unlock(&y->lock, &y_qn);
+ wr_unlock(&x->lock, &x_qn);
+ wr_unlock(&p->lock, &p_qn);
+}
+
+
+static void right_rotate(node_t *x)
+{
+ mrsw_qnode_t p_qn, x_qn, y_qn;
+ node_t *y = x->l, *p = x->p;
+
+ wr_lock(&p->lock, &p_qn);
+ wr_lock(&x->lock, &x_qn);
+ wr_lock(&y->lock, &y_qn);
+
+ /* No need to write-lock to update parent link. */
+ if ( (x->l = y->r) != &null ) x->l->p = x;
+
+ x->p = y;
+ y->r = x;
+ y->p = p;
+ if ( x == p->l ) p->l = y; else p->r = y;
+
+ wr_unlock(&y->lock, &y_qn);
+ wr_unlock(&x->lock, &x_qn);
+ wr_unlock(&p->lock, &p_qn);
+}
+
+
+/* No locks held on entry/exit. Colour changes safe. Rotations lock for us. */
+static void delete_fixup(ptst_t *ptst, set_t *s, node_t *x)
+{
+ node_t *p, *w;
+
+ while ( (x->p != &s->root) && IS_BLACK(x->v) )
+ {
+ p = x->p;
+
+ if ( x == p->l )
+ {
+ w = p->r;
+ if ( IS_RED(w->v) )
+ {
+ w->v = MK_BLACK(w->v);
+ p->v = MK_RED(p->v);
+ /* Node W will be new parent of P. */
+ left_rotate(p);
+ /* Get new sibling W. */
+ w = p->r;
+ }
+
+ if ( IS_BLACK(w->l->v) && IS_BLACK(w->r->v) )
+ {
+ w->v = MK_RED(w->v);
+ x = p;
+ }
+ else
+ {
+ if ( IS_BLACK(w->r->v) )
+ {
+ /* w->l is red => it cannot be null node. */
+ w->l->v = MK_BLACK(w->l->v);
+ w->v = MK_RED(w->v);
+ right_rotate(w);
+ /* Old w is new w->r. Old w->l is new w.*/
+ w = p->r;
+ }
+
+ w->v = SET_COLOUR(GET_VALUE(w->v), GET_COLOUR(p->v));
+ p->v = MK_BLACK(p->v);
+ w->r->v = MK_BLACK(w->r->v);
+ left_rotate(p);
+ break;
+ }
+ }
+ else /* SYMMETRIC CASE */
+ {
+ w = p->l;
+ if ( IS_RED(w->v) )
+ {
+ w->v = MK_BLACK(w->v);
+ p->v = MK_RED(p->v);
+ /* Node W will be new parent of P. */
+ right_rotate(p);
+ /* Get new sibling W. */
+ w = p->l;
+ }
+
+ if ( IS_BLACK(w->l->v) && IS_BLACK(w->r->v) )
+ {
+ w->v = MK_RED(w->v);
+ x = p;
+ }
+ else
+ {
+ if ( IS_BLACK(w->l->v) )
+ {
+ /* w->r is red => it cannot be the null node. */
+ w->r->v = MK_BLACK(w->r->v);
+ w->v = MK_RED(w->v);
+ left_rotate(w);
+ /* Old w is new w->l. Old w->r is new w.*/
+ w = p->l;
+ }
+
+ w->v = SET_COLOUR(GET_VALUE(w->v), GET_COLOUR(p->v));
+ p->v = MK_BLACK(p->v);
+ w->l->v = MK_BLACK(w->l->v);
+ right_rotate(p);
+ break;
+ }
+ }
+ }
+
+ x->v = MK_BLACK(x->v);
+}
+
+
+set_t *set_alloc(void)
+{
+ ptst_t *ptst;
+ set_t *set;
+ node_t *root;
+
+ ptst = critical_enter();
+
+ set = (set_t *)malloc(sizeof(*set));
+
+ root = &set->root;
+ root->k = SENTINEL_KEYMIN;
+ root->v = MK_RED(NULL);
+ root->l = &null;
+ root->r = &null;
+ root->p = NULL;
+ mrsw_init(&root->lock);
+
+ mcs_init(&set->writer_lock);
+
+ critical_exit(ptst);
+
+ return set;
+}
+
+
+setval_t set_update(set_t *s, setkey_t k, setval_t v, int overwrite)
+{
+ ptst_t *ptst;
+ node_t *x, *p, *g, *y, *new;
+ mrsw_qnode_t x_qn;
+ qnode_t writer_qn;
+ setval_t ov;
+
+ k = CALLER_TO_INTERNAL_KEY(k);
+
+ ptst = critical_enter();
+
+ mcs_lock(&s->writer_lock, &writer_qn);
+
+ x = &s->root;
+ while ( (y = (k < x->k) ? x->l : x->r) != &null )
+ {
+ x = y;
+ if ( k == x->k ) break;
+ }
+
+ if ( k == x->k )
+ {
+ ov = x->v;
+ /* Lock X to change mapping. */
+ wr_lock(&x->lock, &x_qn);
+ if ( overwrite ) x->v = SET_COLOUR(v, GET_COLOUR(ov));
+ wr_unlock(&x->lock, &x_qn);
+ ov = GET_VALUE(ov);
+ }
+ else
+ {
+ ov = NULL;
+
+ new = (node_t *)gc_alloc(ptst, gc_id);
+ new->k = k;
+ new->v = MK_RED(v);
+ new->l = &null;
+ new->r = &null;
+ new->p = x;
+ mrsw_init(&new->lock);
+
+ /* Lock X to change a child. */
+ wr_lock(&x->lock, &x_qn);
+ if ( k < x->k ) x->l = new; else x->r = new;
+ wr_unlock(&x->lock, &x_qn);
+
+ x = new;
+
+ /* No locks held here. Colour changes safe. Rotations lock for us. */
+ for ( ; ; )
+ {
+ if ( (p = x->p) == &s->root )
+ {
+ x->v = MK_BLACK(x->v);
+ break;
+ }
+
+ if ( IS_BLACK(p->v) ) break;
+
+ g = p->p;
+ if ( p == g->l )
+ {
+ y = g->r;
+ if ( IS_RED(y->v) )
+ {
+ p->v = MK_BLACK(p->v);
+ y->v = MK_BLACK(y->v);
+ g->v = MK_RED(g->v);
+ x = g;
+ }
+ else
+ {
+ if ( x == p->r )
+ {
+ x = p;
+ left_rotate(x);
+ /* X and P switched round. */
+ p = x->p;
+ }
+ p->v = MK_BLACK(p->v);
+ g->v = MK_RED(g->v);
+ right_rotate(g);
+ /* G no longer on the path. */
+ }
+ }
+ else /* SYMMETRIC CASE */
+ {
+ y = g->l;
+ if ( IS_RED(y->v) )
+ {
+ p->v = MK_BLACK(p->v);
+ y->v = MK_BLACK(y->v);
+ g->v = MK_RED(g->v);
+ x = g;
+ }
+ else
+ {
+ if ( x == p->l )
+ {
+ x = p;
+ right_rotate(x);
+ /* X and P switched round. */
+ p = x->p;
+ }
+ p->v = MK_BLACK(p->v);
+ g->v = MK_RED(g->v);
+ left_rotate(g);
+ /* G no longer on the path. */
+ }
+ }
+ }
+ }
+
+ mcs_unlock(&s->writer_lock, &writer_qn);
+
+ critical_exit(ptst);
+
+ return ov;
+}
+
+
+setval_t set_remove(set_t *s, setkey_t k)
+{
+ ptst_t *ptst;
+ node_t *x, *y, *z;
+ mrsw_qnode_t qn[2], *y_pqn=qn+0, *yp_pqn=qn+1, *t_pqn;
+ mrsw_qnode_t z_qn, zp_qn;
+ qnode_t writer_qn;
+ setval_t ov = NULL;
+
+ k = CALLER_TO_INTERNAL_KEY(k);
+
+ ptst = critical_enter();
+
+ mcs_lock(&s->writer_lock, &writer_qn);
+
+ z = &s->root;
+ while ( (z = (k < z->k) ? z->l : z->r) != &null )
+ {
+ if ( k == z->k ) break;
+ }
+
+ if ( k == z->k )
+ {
+ ov = GET_VALUE(z->v);
+
+ if ( (z->l != &null) && (z->r != &null) )
+ {
+ /* Lock Z. It will get new key copied in. */
+ wr_lock(&z->lock, &z_qn);
+ y = z->r;
+ /*
+ * Write-lock from Z to Y. We end up with (YP,Y) locked.
+ * Write-coupling is needed so we don't overtake searches for Y.
+ */
+ wr_lock(&y->lock, y_pqn);
+ while ( y->l != &null )
+ {
+ if ( y->p != z ) wr_unlock(&y->p->lock, yp_pqn);
+ y = y->l;
+ t_pqn = yp_pqn;
+ yp_pqn = y_pqn;
+ y_pqn = t_pqn;
+ wr_lock(&y->lock, y_pqn);
+ }
+ }
+ else
+ {
+ y = z;
+ /* Lock ZP. It will get new child. */
+ wr_lock(&z->p->lock, &zp_qn);
+ /* Lock Z. It will be deleted. */
+ wr_lock(&z->lock, &z_qn);
+ }
+
+ /* No need to lock X. Only parent link is modified. */
+ x = (y->l != &null) ? y->l : y->r;
+ x->p = y->p;
+
+ if ( y == y->p->l ) y->p->l = x; else y->p->r = x;
+
+ if ( y != z )
+ {
+ z->k = y->k;
+ z->v = SET_COLOUR(GET_VALUE(y->v), GET_COLOUR(z->v));
+ if ( y->p != z ) wr_unlock(&y->p->lock, yp_pqn);
+ wr_unlock(&y->lock, y_pqn);
+ }
+ else
+ {
+ wr_unlock(&z->p->lock, &zp_qn);
+ }
+
+ wr_unlock(&z->lock, &z_qn);
+
+ gc_free(ptst, y, gc_id);
+
+ if ( IS_BLACK(y->v) ) delete_fixup(ptst, s, x);
+ }
+
+ mcs_unlock(&s->writer_lock, &writer_qn);
+
+ critical_exit(ptst);
+
+ return ov;
+}
+
+
+setval_t set_lookup(set_t *s, setkey_t k)
+{
+ ptst_t *ptst;
+ node_t *m, *n;
+ mrsw_qnode_t qn[2], *m_pqn=&qn[0], *n_pqn=&qn[1], *t_pqn;
+ setval_t v = NULL;
+
+ k = CALLER_TO_INTERNAL_KEY(k);
+
+ ptst = critical_enter();
+
+ n = &s->root;
+ rd_lock(&n->lock, n_pqn);
+
+ while ( (m = (k < n->k) ? n->l : n->r) != &null )
+ {
+ rd_lock(&m->lock, m_pqn);
+ rd_unlock(&n->lock, n_pqn);
+ n = m;
+ t_pqn = m_pqn;
+ m_pqn = n_pqn;
+ n_pqn = t_pqn;
+ if ( k == n->k )
+ {
+ v = GET_VALUE(n->v);
+ break;
+ }
+ }
+
+ rd_unlock(&n->lock, n_pqn);
+
+ critical_exit(ptst);
+
+ return v;
+}
+
+
+void _init_set_subsystem(void)
+{
+ gc_id = gc_add_allocator(sizeof(node_t));
+
+ null.k = 0;
+ null.v = MK_BLACK(NULL);
+ null.l = NULL;
+ null.r = NULL;
+ null.p = NULL;
+ mrsw_init(&null.lock);
+}
--- /dev/null
+/******************************************************************************
+ * rb_stm.c
+ *
+ * Lock-free red-black trees, based on STM.
+ *
+ * Copyright (c) 2002-2003, K A Fraser
+ *
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution. Neither the name of the Keir Fraser
+ * nor the names of its contributors may be used to endorse or
+ * promote products derived from this software without specific
+ * prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define __SET_IMPLEMENTATION__
+
+#include <stdio.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include "portable_defns.h"
+#include "gc.h"
+#include "stm.h"
+#include "set.h"
+
+#define IS_BLACK(_v) ((int_addr_t)(_v)&1)
+#define IS_RED(_v) (!IS_BLACK(_v))
+#define MK_BLACK(_v) ((setval_t)((int_addr_t)(_v)|1))
+#define MK_RED(_v) ((setval_t)((int_addr_t)(_v)&~1))
+#define GET_VALUE(_v) (MK_RED(_v))
+#define GET_COLOUR(_v) (IS_BLACK(_v))
+#define SET_COLOUR(_v,_c) ((setval_t)((unsigned long)(_v)|(unsigned long)(_c)))
+
+typedef struct node_st node_t;
+typedef stm_blk set_t;
+
+struct node_st
+{
+ setkey_t k;
+ setval_t v;
+ stm_blk *l, *r, *p;
+};
+
+static struct {
+ CACHE_PAD(0);
+ stm *memory; /* read-only */
+ stm_blk *nullb; /* read-only */
+ CACHE_PAD(2);
+} shared;
+
+#define MEMORY (shared.memory)
+#define NULLB (shared.nullb)
+
+static void left_rotate(ptst_t *ptst, stm_tx *tx, stm_blk *xb, node_t *x)
+{
+ stm_blk *yb, *pb;
+ node_t *y, *p;
+
+ yb = x->r;
+ pb = x->p;
+
+ y = write_stm_blk(ptst, tx, yb);
+ p = write_stm_blk(ptst, tx, pb);
+
+ if ( (x->r = y->l) != NULLB )
+ {
+ node_t *xr = write_stm_blk(ptst, tx, x->r);
+ xr->p = xb;
+ }
+
+ x->p = yb;
+ y->l = xb;
+ y->p = pb;
+ if ( xb == p->l ) p->l = yb; else p->r = yb;
+}
+
+
+static void right_rotate(ptst_t *ptst, stm_tx *tx, stm_blk *xb, node_t *x)
+{
+ stm_blk *yb, *pb;
+ node_t *y, *p;
+
+ yb = x->l;
+ pb = x->p;
+
+ y = write_stm_blk(ptst, tx, yb);
+ p = write_stm_blk(ptst, tx, pb);
+
+ if ( (x->l = y->r) != NULLB )
+ {
+ node_t *xl = write_stm_blk(ptst, tx, x->l);
+ xl->p = xb;
+ }
+
+ x->p = yb;
+ y->r = xb;
+ y->p = pb;
+ if ( xb == p->l ) p->l = yb; else p->r = yb;
+}
+
+
+static void delete_fixup(ptst_t *ptst, stm_tx *tx, set_t *s,
+ stm_blk *xb, node_t *x)
+{
+ stm_blk *pb, *wb, *wlb, *wrb;
+ node_t *p, *w, *wl, *wr;
+
+ while ( (x->p != s) && IS_BLACK(x->v) )
+ {
+ pb = x->p;
+ p = write_stm_blk(ptst, tx, pb);
+
+ if ( xb == p->l )
+ {
+ wb = p->r;
+ w = write_stm_blk(ptst, tx, wb);
+ if ( IS_RED(w->v) )
+ {
+ w->v = MK_BLACK(w->v);
+ p->v = MK_RED(p->v);
+ left_rotate(ptst, tx, pb, p);
+ wb = p->r;
+ w = write_stm_blk(ptst, tx, wb);
+ }
+
+ wlb = w->l;
+ wl = read_stm_blk(ptst, tx, wlb);
+ wrb = w->r;
+ wr = read_stm_blk(ptst, tx, wrb);
+ if ( IS_BLACK(wl->v) && IS_BLACK(wr->v) )
+ {
+ w->v = MK_RED(w->v);
+ xb = pb;
+ x = p;
+ }
+ else
+ {
+ if ( IS_BLACK(wr->v) )
+ {
+ wl = write_stm_blk(ptst, tx, wlb);
+ wl->v = MK_BLACK(wl->v);
+ w->v = MK_RED(w->v);
+ right_rotate(ptst, tx, wb, w);
+ wb = p->r;
+ w = write_stm_blk(ptst, tx, wb);
+ }
+
+ wrb = w->r;
+ wr = write_stm_blk(ptst, tx, wrb);
+ w->v = SET_COLOUR(GET_VALUE(w->v), GET_COLOUR(p->v));
+ p->v = MK_BLACK(p->v);
+ wr->v = MK_BLACK(wr->v);
+ left_rotate(ptst, tx, pb, p);
+ break;
+ }
+ }
+ else /* SYMMETRIC CASE */
+ {
+ wb = p->l;
+ w = write_stm_blk(ptst, tx, wb);
+ if ( IS_RED(w->v) )
+ {
+ w->v = MK_BLACK(w->v);
+ p->v = MK_RED(p->v);
+ right_rotate(ptst, tx, pb, p);
+ wb = p->l;
+ w = write_stm_blk(ptst, tx, wb);
+ }
+
+ wlb = w->l;
+ wl = read_stm_blk(ptst, tx, wlb);
+ wrb = w->r;
+ wr = read_stm_blk(ptst, tx, wrb);
+ if ( IS_BLACK(wl->v) && IS_BLACK(wr->v) )
+ {
+ w->v = MK_RED(w->v);
+ xb = pb;
+ x = p;
+ }
+ else
+ {
+ if ( IS_BLACK(wl->v) )
+ {
+ wr = write_stm_blk(ptst, tx, wrb);
+ wr->v = MK_BLACK(wr->v);
+ w->v = MK_RED(w->v);
+ left_rotate(ptst, tx, wb, w);
+ wb = p->l;
+ w = write_stm_blk(ptst, tx, wb);
+ }
+
+ wlb = w->l;
+ wl = write_stm_blk(ptst, tx, wlb);
+ w->v = SET_COLOUR(GET_VALUE(w->v), GET_COLOUR(p->v));
+ p->v = MK_BLACK(p->v);
+ wl->v = MK_BLACK(wl->v);
+ right_rotate(ptst, tx, pb, p);
+ break;
+ }
+ }
+ }
+
+ x->v = MK_BLACK(x->v);
+}
+
+
+set_t *set_alloc(void)
+{
+ ptst_t *ptst;
+ set_t *set;
+ node_t *root;
+
+ ptst = critical_enter();
+
+ set = new_stm_blk(ptst, MEMORY);
+
+ root = init_stm_blk(ptst, MEMORY, set);
+ root->k = SENTINEL_KEYMIN;
+ root->v = MK_RED(NULL);
+ root->l = NULLB;
+ root->r = NULLB;
+ root->p = NULL;
+
+ critical_exit(ptst);
+
+ return set;
+}
+
+
+setval_t set_update(set_t *s, setkey_t k, setval_t v, int overwrite)
+{
+ ptst_t *ptst;
+ stm_tx *tx;
+ stm_blk *xb, *b, *pb, *gb, *yb, *newb;
+ node_t *x, *p, *g, *y, *new;
+ setval_t ov;
+
+ k = CALLER_TO_INTERNAL_KEY(k);
+
+ newb = NULL;
+
+ ptst = critical_enter();
+
+ do {
+ new_stm_tx(tx, ptst, MEMORY);
+
+ b = s;
+ while ( b != NULLB )
+ {
+ xb = b;
+ x = read_stm_blk(ptst, tx, xb);
+ if ( k == x->k ) break;
+ b = (k < x->k) ? x->l : x->r;
+ }
+
+ x = write_stm_blk(ptst, tx, xb);
+
+ if ( k == x->k )
+ {
+ ov = x->v;
+ if ( overwrite ) x->v = SET_COLOUR(v, GET_COLOUR(ov));
+ ov = GET_VALUE(ov);
+ }
+ else
+ {
+ ov = NULL;
+ if ( newb == NULL )
+ {
+ newb = new_stm_blk(ptst, MEMORY);
+ new = init_stm_blk(ptst, MEMORY, newb);
+ new->k = k;
+ }
+
+ new->v = MK_RED(v);
+ new->l = NULLB;
+ new->r = NULLB;
+ new->p = xb;
+
+ if ( k < x->k ) x->l = newb; else x->r = newb;
+
+ xb = newb;
+ x = new;
+
+ for ( ; ; )
+ {
+ if ( (pb = x->p) == s )
+ {
+ x->v = MK_BLACK(x->v);
+ break;
+ }
+
+ p = read_stm_blk(ptst, tx, pb);
+ if ( IS_BLACK(p->v) ) break;
+
+ gb = p->p;
+ g = read_stm_blk(ptst, tx, gb);
+ if ( pb == g->l )
+ {
+ yb = g->r;
+ y = read_stm_blk(ptst, tx, yb);
+ if ( IS_RED(y->v) )
+ {
+ p = write_stm_blk(ptst, tx, pb);
+ y = write_stm_blk(ptst, tx, yb);
+ g = write_stm_blk(ptst, tx, gb);
+ p->v = MK_BLACK(p->v);
+ y->v = MK_BLACK(y->v);
+ g->v = MK_RED(g->v);
+ xb = gb;
+ x = g;
+ }
+ else
+ {
+ if ( xb == p->r )
+ {
+ xb = pb;
+ x = write_stm_blk(ptst, tx, pb);
+ left_rotate(ptst, tx, xb, x);
+ }
+ pb = x->p;
+ p = write_stm_blk(ptst, tx, pb);
+ gb = p->p;
+ g = write_stm_blk(ptst, tx, gb);
+ p->v = MK_BLACK(p->v);
+ g->v = MK_RED(g->v);
+ right_rotate(ptst, tx, gb, g);
+ }
+ }
+ else /* SYMMETRIC CASE */
+ {
+ yb = g->l;
+ y = read_stm_blk(ptst, tx, yb);
+ if ( IS_RED(y->v) )
+ {
+ p = write_stm_blk(ptst, tx, pb);
+ y = write_stm_blk(ptst, tx, yb);
+ g = write_stm_blk(ptst, tx, gb);
+ p->v = MK_BLACK(p->v);
+ y->v = MK_BLACK(y->v);
+ g->v = MK_RED(g->v);
+ xb = gb;
+ x = g;
+ }
+ else
+ {
+ if ( xb == p->l )
+ {
+ xb = pb;
+ x = write_stm_blk(ptst, tx, pb);
+ right_rotate(ptst, tx, xb, x);
+ }
+ pb = x->p;
+ p = write_stm_blk(ptst, tx, pb);
+ gb = p->p;
+ g = write_stm_blk(ptst, tx, gb);
+ p->v = MK_BLACK(p->v);
+ g->v = MK_RED(g->v);
+ left_rotate(ptst, tx, gb, g);
+ }
+ }
+ }
+ }
+
+ remove_from_tx(ptst, tx, NULLB);
+ }
+ while ( !commit_stm_tx(ptst, tx) );
+
+ /* Free unused new block. */
+ if ( (ov != NULL) && (newb != NULL) ) free_stm_blk(ptst, MEMORY, newb);
+
+ critical_exit(ptst);
+
+ return ov;
+}
+
+
+setval_t set_remove(set_t *s, setkey_t k)
+{
+ ptst_t *ptst;
+ stm_tx *tx;
+ stm_blk *zb, *b, *xb, *yb;
+ node_t *z, *x, *y, *yp;
+ setval_t ov;
+
+ k = CALLER_TO_INTERNAL_KEY(k);
+
+ ptst = critical_enter();
+
+ do {
+ new_stm_tx(tx, ptst, MEMORY);
+ ov = NULL;
+ b = s;
+
+ while ( b != NULLB )
+ {
+ zb = b;
+ z = read_stm_blk(ptst, tx, zb);
+ if ( k == z->k )
+ {
+ ov = GET_VALUE(z->v);
+ break;
+ }
+ b = (k < z->k) ? z->l : z->r;
+ }
+
+ if ( ov != NULL )
+ {
+ z = write_stm_blk(ptst, tx, zb);
+
+ if ( (z->l != NULLB) && (z->r != NULLB) )
+ {
+ /* Find successor of node z, and place in (yb,y). */
+ yb = z->r;
+ y = read_stm_blk(ptst, tx, yb);
+
+ while ( y->l != NULLB )
+ {
+ yb = y->l;
+ y = read_stm_blk(ptst, tx, yb);
+ }
+
+ y = write_stm_blk(ptst, tx, yb);
+ }
+ else
+ {
+ yb = zb;
+ y = z;
+ }
+
+ xb = (y->l != NULLB) ? y->l : y->r;
+ x = write_stm_blk(ptst, tx, xb);
+ x->p = y->p;
+
+ yp = write_stm_blk(ptst, tx, y->p);
+ if ( yb == yp->l ) yp->l = xb; else yp->r = xb;
+
+ if ( y != z )
+ {
+ z->k = y->k;
+ z->v = SET_COLOUR(GET_VALUE(y->v), GET_COLOUR(z->v));
+ }
+
+ if ( IS_BLACK(y->v) ) delete_fixup(ptst, tx, s, xb, x);
+ }
+
+ remove_from_tx(ptst, tx, NULLB);
+ }
+ while ( !commit_stm_tx(ptst, tx) );
+
+ /* Free a deleted block. */
+ if ( ov != NULL ) free_stm_blk(ptst, MEMORY, yb);
+
+ critical_exit(ptst);
+
+ return ov;
+}
+
+
+setval_t set_lookup(set_t *s, setkey_t k)
+{
+ ptst_t *ptst;
+ stm_tx *tx;
+ stm_blk *nb;
+ node_t *n;
+ setval_t v;
+
+ k = CALLER_TO_INTERNAL_KEY(k);
+
+ ptst = critical_enter();
+
+ do {
+ new_stm_tx(tx, ptst, MEMORY);
+ v = NULL;
+ nb = s;
+
+ while ( nb != NULLB )
+ {
+ n = read_stm_blk(ptst, tx, nb);
+ if ( k == n->k )
+ {
+ v = GET_VALUE(n->v);
+ break;
+ }
+ nb = (k < n->k) ? n->l : n->r;
+ }
+ }
+ while ( !commit_stm_tx(ptst, tx) );
+
+ critical_exit(ptst);
+
+ return v;
+}
+
+
+void _init_set_subsystem(void)
+{
+ node_t *null;
+ ptst_t *ptst;
+
+ ptst = critical_enter();
+
+ _init_stm_subsystem(0);
+
+ MEMORY = new_stm(ptst, sizeof(node_t));
+
+ NULLB = new_stm_blk(ptst, MEMORY);
+ null = init_stm_blk(ptst, MEMORY, NULLB);
+ null->k = 0;
+ null->v = MK_BLACK(NULL);
+ null->l = NULL;
+ null->r = NULL;
+ null->p = NULL;
+
+ critical_exit(ptst);
+}
--- /dev/null
+/******************************************************************************
+ * replay.c
+ *
+ * Replay the log output of search-structure runs.
+ * Must build set_harness.c with DO_WRITE_LOG defined.
+ *
+ * Copyright (c) 2002-2003, K A Fraser
+ *
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution. Neither the name of the Keir Fraser
+ * nor the names of its contributors may be used to endorse or
+ * promote products derived from this software without specific
+ * prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "portable_defns.h"
+
+#define RMAX_THREADS 256
+#define VERIFY_ORDERINGS
+
+#define LOG_REPLAYED (1<<26)
+#define LOG_KEY_MASK 0xffffff
+
+typedef struct log_st
+{
+ interval_t start, end;
+ unsigned int data; /* key, and replay flag */
+ void *val, *old_val; /* op changed mapping from old_val to val */
+} log_t;
+
+#define REPLAYED(_l) ((_l)->data & LOG_REPLAYED)
+
+static log_t *global_log;
+static int nr_threads, nr_updates, nr_keys;
+static int *key_offsets;
+static int *success;
+static unsigned int next_key = 0;
+static pthread_mutex_t key_lock;
+
+
+/*
+ * GLOBAL LOGS SORTED ON:
+ * 1. Key value
+ * 2. Start time
+ *
+ * Replayer deals with each key value in turn.
+ */
+static int compare(const void *t1, const void *t2)
+{
+ const log_t *l1 = t1;
+ const log_t *l2 = t2;
+ const int k1 = l1->data & LOG_KEY_MASK;
+ const int k2 = l2->data & LOG_KEY_MASK;
+
+ if ( k1 < k2 ) return(-1);
+ if ( k1 > k2 ) return(+1);
+
+ if ( l1->start < l2->start ) return(-1);
+
+ return(+1);
+}
+
+
+static int do_op(log_t *log, void **key_state)
+{
+ if ( REPLAYED(log) || (log->old_val != *key_state) ) return(0);
+ *key_state = log->val;
+ log->data |= LOG_REPLAYED;
+ return(1);
+}
+
+
+static void undo_op(log_t *log, void **key_state)
+{
+ assert(REPLAYED(log));
+ log->data &= ~LOG_REPLAYED;
+ *key_state = log->old_val;
+}
+
+
+/* Sink down element @pos of @heap. */
+static void down_heap(log_t **heap, int *heap_offsets, log_t *log, int pos)
+{
+ int sz = (int)heap[0], nxt;
+ log_t *tmp;
+ while ( (nxt = (pos << 1)) <= sz )
+ {
+ if ( ((nxt+1) <= sz) && (heap[nxt+1]->end < heap[nxt]->end) ) nxt++;
+ if ( heap[nxt]->end > heap[pos]->end ) break;
+ heap_offsets[heap[pos] - log] = nxt;
+ heap_offsets[heap[nxt] - log] = pos;
+ tmp = heap[pos];
+ heap[pos] = heap[nxt];
+ heap[nxt] = tmp;
+ pos = nxt;
+ }
+}
+
+/* Float element @pos up @heap. */
+static void up_heap(log_t **heap, int *heap_offsets, log_t *log, int pos)
+{
+ log_t *tmp;
+ while ( pos > 1 )
+ {
+ if ( heap[pos]->end > heap[pos>>1]->end ) break;
+ heap_offsets[heap[pos] - log] = pos >> 1;
+ heap_offsets[heap[pos>>1] - log] = pos;
+ tmp = heap[pos];
+ heap[pos] = heap[pos>>1];
+ heap[pos>>1] = tmp;
+ pos >>= 1;
+ }
+}
+
+
+/* Delete @entry from @heap. */
+static void remove_entry(log_t **heap, int *heap_offsets,
+ log_t *log, log_t *entry)
+{
+ int sz = (int)heap[0];
+ int pos = heap_offsets[entry - log];
+ heap_offsets[heap[sz] - log] = pos;
+ heap[pos] = heap[sz];
+ heap[0] = (void *)(--sz);
+ if ( (pos > 1) && (heap[pos]->end < heap[pos>>1]->end) )
+ {
+ up_heap(heap, heap_offsets, log, pos);
+ }
+ else
+ {
+ down_heap(heap, heap_offsets, log, pos);
+ }
+}
+
+
+/* Add new entry @new to @heap. */
+static void add_entry(log_t **heap, int *heap_offsets, log_t *log, log_t *new)
+{
+ int sz = (int)heap[0];
+ heap[0] = (void *)(++sz);
+ heap_offsets[new - log] = sz;
+ heap[sz] = new;
+ up_heap(heap, heap_offsets, log, sz);
+}
+
+
+/*
+ * This linearisation algorithm is a depth-first search of all feasible
+ * orderings. At each step, the next available operation is selected.
+ * The set of "available" operations is those which:
+ * (1) have not already been selected on this search path
+ * (2) are operations whose results are correct given current state
+ * (eg. a failed delete couldn't be selected if the key is in the set!)
+ * (3) have start times <= the earliest end time in the set.
+ * (1) ensures that each operation happens only once. (2) ensures that
+ * abstract state is consistent between operations. (3) ensures that time
+ * ordering is conserved.
+ */
+static int linearise_ops_for_key(
+ log_t *log, int nr_items, log_t **stack,
+ log_t **cutoff_heap, int *heap_offsets, void **key_state)
+{
+ int i;
+ log_t **sp = stack;
+ interval_t cutoff;
+
+ /* Construct cutoff heap. */
+ cutoff_heap[0] = (void *)nr_items;
+ for ( i = 0; i < nr_items; i++ )
+ {
+ cutoff_heap[i+1] = log + i;
+ heap_offsets[i] = i+1;
+ }
+ for ( i = nr_items>>1; i > 0; i-- )
+ {
+ down_heap(cutoff_heap, heap_offsets, log, i);
+ }
+
+ cutoff = cutoff_heap[1]->end;
+
+ for ( i = 0; ; )
+ {
+ while ( (i < nr_items) && (log[i].start <= cutoff) )
+ {
+ if ( !do_op(&log[i], key_state) ) { i++; continue; }
+
+ *sp++ = &log[i];
+
+ /* Done? */
+ if ( (sp - stack) == nr_items ) goto success;
+
+ remove_entry(cutoff_heap, heap_offsets, log, &log[i]);
+ cutoff = cutoff_heap[1]->end;
+ i = 0;
+ }
+
+ /* Failure? */
+ if ( (sp - stack) == 0 )
+ {
+ for ( i = -3; i < nr_items + 3; i++ )
+ {
+#if 1
+ printf("%08x -> %08x -- %d: %08x -> %08x\n",
+ (unsigned int)log[i].start,
+ (unsigned int)log[i].end,
+ log[i].data & LOG_KEY_MASK,
+ (unsigned int)log[i].old_val,
+ (unsigned int)log[i].val);
+#endif
+ }
+ return(0);
+ }
+
+ i = *--sp - log;
+ undo_op(&log[i], key_state);
+ add_entry(cutoff_heap, heap_offsets, log, &log[i]);
+ cutoff = cutoff_heap[1]->end;
+ i++;
+ }
+
+ success:
+ return(1);
+}
+
+
+static void *thread_start(void *arg)
+{
+ unsigned long tid = (unsigned long)arg;
+ unsigned int our_key;
+ int ch_start, ch_end, start, end, nr_items, *heap_offsets;
+ log_t **stack;
+ log_t **cutoff_heap;
+ interval_t cutoff;
+ void *key_state;
+#ifdef VERIFY_ORDERINGS
+ int i;
+#endif
+
+ stack = malloc((nr_threads*nr_updates+1)*sizeof(log_t*));
+ cutoff_heap = malloc((nr_threads*nr_updates+1)*sizeof(*cutoff_heap));
+ heap_offsets = malloc((nr_threads*nr_updates+1)*sizeof(*heap_offsets));
+ if ( !stack || !cutoff_heap || !heap_offsets )
+ {
+ fprintf(stderr, "Error allocating space for stacks\n");
+ return(NULL);
+ }
+
+ again:
+ pthread_mutex_lock(&key_lock);
+ our_key = next_key++;
+ pthread_mutex_unlock(&key_lock);
+ if ( our_key >= nr_keys ) goto out;
+
+ start = key_offsets[our_key];
+ end = key_offsets[our_key+1];
+ nr_items = end - start;
+
+ printf("[Thread %lu] ++ Linearising key %d (%d events)\n",
+ tid, our_key, nr_items);
+
+#if 0
+ {
+ int i;
+ for ( i = start; i < end; i++ )
+ {
+ printf("%04d/%04d -- %08x -> %08x -- %d: %08x -> %08x\n",
+ our_key, i - start,
+ (unsigned int)global_log[i].start,
+ (unsigned int)global_log[i].end,
+ global_log[i].data & LOG_KEY_MASK,
+ (unsigned int)global_log[i].old_val,
+ (unsigned int)global_log[i].val);
+ }
+ }
+#endif
+
+ /*
+ * We divide operations into independent chunks. A chunk is a maximal
+ * sequence of operations, ordered on start time, that does not
+ * overlap with any operation in any other chunk. Clearly, finding
+ * a linearisation for each chunk produces a total schedule.
+ */
+ success[our_key] = 1;
+ key_state = 0;
+ for ( ch_start = start; ch_start < end; ch_start = ch_end )
+ {
+ cutoff = global_log[ch_start].end;
+ for ( ch_end = ch_start; ch_end < end; ch_end++ )
+ {
+ if ( global_log[ch_end].start > cutoff ) break;
+ if ( global_log[ch_end].end > cutoff )
+ cutoff = global_log[ch_end].end;
+ }
+
+ /* Linearise chunk ch_start -> ch_end. */
+ success[our_key] = linearise_ops_for_key(
+ &global_log[ch_start],
+ ch_end - ch_start,
+ &stack[ch_start - start],
+ cutoff_heap,
+ heap_offsets,
+ &key_state);
+
+ if ( !success[our_key] )
+ {
+ printf("[Thread %lu] -- Linearisation FAILED for key %d\n",
+ tid, our_key);
+ goto again;
+ }
+ }
+
+ printf("[Thread %lu] -- Linearisation %s for key %d\n",
+ tid, (success[our_key] ? "found" : "FAILED"), our_key);
+
+#ifdef VERIFY_ORDERINGS
+ printf("[Thread %lu] ++ Verifying key %d\n", tid, our_key);
+ cutoff = 0;
+ key_state = 0;
+ for ( i = 0; i < nr_items; i++ )
+ {
+ stack[i]->data &= ~LOG_REPLAYED; /* stop valid_op() from choking */
+ if ( !do_op(stack[i], &key_state) || (stack[i]->end < cutoff) )
+ {
+ int j;
+ fprintf(stderr, "\t*** INTERNAL ERROR: "
+ "Assigned ordering is invalid!\n");
+ for ( j = (i < 2) ? 0 : (i-2); j < i+6; j++ )
+ {
+ printf("%08x -> %08x -- %d: %08x -> %08x\n",
+ (unsigned int)stack[j]->start,
+ (unsigned int)stack[j]->end,
+ stack[j]->data & LOG_KEY_MASK,
+ (unsigned int)stack[j]->old_val,
+ (unsigned int)stack[j]->val);
+ }
+ exit(-1);
+ }
+ if ( stack[i]->start > cutoff ) cutoff = stack[i]->start;
+ }
+ printf("[Thread %lu] -- Verified key %d\n", tid, our_key);
+#endif
+
+ goto again;
+
+ out:
+ return(NULL);
+}
+
+
+int main(int argc, char **argv)
+{
+ pthread_t thread[RMAX_THREADS];
+ int fd, i, j, failed = 0, nr_cpus;
+ unsigned long log_header[3];
+
+ if ( argc != 2 )
+ {
+ fprintf(stderr, "%s <log name>\n", argv[0]);
+ exit(1);
+ }
+
+ nr_cpus = (int)sysconf(_SC_NPROCESSORS_ONLN);
+ if ( nr_cpus > RMAX_THREADS ) nr_cpus = RMAX_THREADS;
+
+ if ( (fd = open(argv[1], O_RDONLY, 0)) == -1 )
+ {
+ fprintf(stderr, "Error opening log\n");
+ exit(-1);
+ }
+
+ /* Grok the log header. */
+ read(fd, log_header, sizeof(log_header));
+ nr_threads = log_header[0];
+ nr_updates = log_header[1];
+ nr_keys = log_header[2];
+ printf("Read log header: nr_updates=%d, nr_threads=%d, nr_keys=%d\n",
+ nr_updates, nr_threads, nr_keys);
+
+ /* Allocate state for processing log entries. */
+ global_log = malloc((nr_threads*nr_updates+1)*sizeof(log_t));
+ key_offsets = malloc((nr_keys+1)*sizeof(*key_offsets));
+ success = malloc(nr_keys*sizeof(*success));
+ if ( !global_log || !key_offsets || !success )
+ {
+ fprintf(stderr, "Error allocating space for log\n");
+ exit(-1);
+ }
+
+ /* Read log entries, and sort into key and timestamp order. */
+ read(fd, global_log, nr_threads*nr_updates*sizeof(log_t));
+ global_log[nr_threads*nr_updates].data = LOG_KEY_MASK; /* sentinel */
+
+ printf("Sorting logs..."); fflush(stdout);
+ qsort(global_log, nr_threads*nr_updates, sizeof(log_t), compare);
+ printf(" done\n");
+
+ /* Find offsets of key regions in global table. */
+ key_offsets[0] = 0;
+ nr_keys = 0;
+ for ( i = 0; i < (nr_threads * nr_updates); i = j )
+ {
+ j = i+1;
+ while ( (global_log[j].data & LOG_KEY_MASK) ==
+ (global_log[i].data & LOG_KEY_MASK) ) j++;
+ key_offsets[++nr_keys] = j;
+ }
+
+ /* Set up a bunch of worker threads.... */
+ pthread_mutex_init(&key_lock, NULL);
+ for ( i = 0; i < nr_cpus; i++ )
+ {
+ if ( pthread_create(&thread[i], NULL, thread_start, (void *)i) )
+ {
+ fprintf(stderr, "Error creating thread %d (%d)\n", i, errno);
+ exit(1);
+ }
+ }
+
+ /* ...and wait for them all to complete. */
+ for ( i = 0; i < nr_cpus; i++ )
+ {
+ pthread_join(thread[i], NULL);
+ }
+
+ /* Summarise results from worker threads. */
+ for ( i = 0; i < nr_keys; i++ )
+ {
+ if ( success[i] ) continue;
+ printf("FAILED on key %d\n", i);
+ failed++;
+ }
+
+ if ( failed )
+ {
+ printf("Failed on %d keys\n", failed);
+ return(1);
+ }
+
+ printf("All assigned orderings are valid\n");
+ return(0);
+}
--- /dev/null
+#ifndef __SET_H__
+#define __SET_H__
+
+
+typedef unsigned long setkey_t;
+typedef void *setval_t;
+
+
+#ifdef __SET_IMPLEMENTATION__
+
+/*************************************
+ * INTERNAL DEFINITIONS
+ */
+
+/* Fine for 2^NUM_LEVELS nodes. */
+#define NUM_LEVELS 20
+
+
+/* Internal key values with special meanings. */
+#define INVALID_FIELD (0) /* Uninitialised field value. */
+#define SENTINEL_KEYMIN ( 1UL) /* Key value of first dummy node. */
+#define SENTINEL_KEYMAX (~0UL) /* Key value of last dummy node. */
+
+
+/*
+ * Used internally be set access functions, so that callers can use
+ * key values 0 and 1, without knowing these have special meanings.
+ */
+#define CALLER_TO_INTERNAL_KEY(_k) ((_k) + 2)
+
+
+/*
+ * SUPPORT FOR WEAK ORDERING OF MEMORY ACCESSES
+ */
+
+#ifdef WEAK_MEM_ORDER
+
+/* Read field @_f into variable @_x. */
+#define READ_FIELD(_x,_f) \
+do { \
+ (_x) = (_f); \
+ if ( (_x) == INVALID_FIELD ) { RMB(); (_x) = (_f); } \
+ assert((_x) != INVALID_FIELD); \
+} while ( 0 )
+
+#else
+
+/* Read field @_f into variable @_x. */
+#define READ_FIELD(_x,_f) ((_x) = (_f))
+
+#endif
+
+
+#else
+
+/*************************************
+ * PUBLIC DEFINITIONS
+ */
+
+/*
+ * Key range accepted by set functions.
+ * We lose three values (conveniently at top end of key space).
+ * - Known invalid value to which all fields are initialised.
+ * - Sentinel key values for up to two dummy nodes.
+ */
+#define KEY_MIN ( 0U)
+#define KEY_MAX ((~0U) - 3)
+
+typedef void set_t; /* opaque */
+
+void _init_set_subsystem(void);
+
+/*
+ * Allocate an empty set.
+ */
+set_t *set_alloc(void);
+
+/*
+ * Add mapping (@k -> @v) into set @s. Return previous mapped value if
+ * one existed, or NULL if no previous mapping for @k existed.
+ *
+ * If @overwrite is FALSE, then if a mapping already exists it is not
+ * modified, and the existing value is returned unchanged. It is possible
+ * to see if the value was changed by observing if the return value is NULL.
+ */
+setval_t set_update(set_t *s, setkey_t k, setval_t v, int overwrite);
+
+/*
+ * Remove mapping for key @k from set @s. Return value associated with
+ * removed mapping, or NULL is there was no mapping to delete.
+ */
+setval_t set_remove(set_t *s, setkey_t k);
+
+/*
+ * Look up mapping for key @k in set @s. Return value if found, else NULL.
+ */
+setval_t set_lookup(set_t *s, setkey_t k);
+
+#endif /* __SET_IMPLEMENTATION__ */
+
+
+#endif /* __SET_H__ */
--- /dev/null
+/******************************************************************************
+ * set_harness.c
+ *
+ * Test harness for the various set implementations.
+ *
+ * Copyright (c) 2002-2003, K A Fraser
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution. Neither the name of the Keir Fraser
+ * nor the names of its contributors may be used to endorse or
+ * promote products derived from this software without specific
+ * prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/resource.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/times.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <ucontext.h>
+#include <signal.h>
+#include <sched.h>
+#include <limits.h>
+#include <assert.h>
+#include <stdarg.h>
+
+#include "portable_defns.h"
+#include "set.h"
+#include "ptst.h"
+
+/* This produces an operation log for the 'replay' checker. */
+/*#define DO_WRITE_LOG*/
+
+#ifdef DO_WRITE_LOG
+#define MAX_ITERATIONS 100000
+#define MAX_WALL_TIME 50 /* seconds */
+#else
+#define MAX_ITERATIONS 100000000
+#define MAX_WALL_TIME 10 /* seconds */
+#endif
+
+/*
+ * ***************** LOGGING
+ */
+
+#define MAX_LOG_RECORDS 256
+
+#define LOG_KIND_INT 0
+#define LOG_KIND_STRING 1
+#define LOG_KIND_FLOAT 2
+
+typedef struct {
+ char *name;
+ int kind;
+ int val_int;
+ char *val_string;
+ float val_float;
+} log_record_t;
+
+static log_record_t log_records[MAX_LOG_RECORDS];
+
+static int num_log_records = 0;
+
+static void log_int (char *name, int val) {
+ log_records[num_log_records].name = name;
+ log_records[num_log_records].kind = LOG_KIND_INT;
+ log_records[num_log_records].val_int = val;
+ num_log_records ++;
+}
+
+static void log_string (char *name, char *val) {
+ log_records[num_log_records].name = name;
+ log_records[num_log_records].kind = LOG_KIND_STRING;
+ log_records[num_log_records].val_string = val;
+ num_log_records ++;
+}
+
+static void log_float (char *name, float val) {
+ log_records[num_log_records].name = name;
+ log_records[num_log_records].kind = LOG_KIND_FLOAT;
+ log_records[num_log_records].val_float = val;
+ num_log_records ++;
+}
+
+static void dump_log (void) {
+ int i;
+
+ fprintf (stdout, "-------------------------------------------"
+ "---------------------------\n");
+ for (i = 0; i < num_log_records; i ++)
+ {
+ char padding[40];
+ strcpy(padding, " ");
+ if (30-strlen(log_records[i].name) >= 0){
+ padding[30-strlen(log_records[i].name)] = '\0';
+ }
+ fprintf (stdout, "%s%s = ", padding, log_records[i].name);
+ {
+ int kind = log_records [i].kind;
+ if (kind == LOG_KIND_INT) {
+ fprintf (stdout, "%d\n", log_records[i].val_int);
+ } else if (kind == LOG_KIND_STRING) {
+ fprintf (stdout, "%s\n", log_records[i].val_string);
+ } else if (kind == LOG_KIND_FLOAT) {
+ fprintf (stdout, "%.3f\n", log_records[i].val_float);
+ }
+ }
+ }
+ fprintf (stdout, "-------------------------------------------"
+ "---------------------------\n");
+
+ for (i = 0; i < num_log_records; i ++)
+ {
+ int kind = log_records [i].kind;
+ if (i != 0) { fprintf (stderr, " "); }
+ if (kind == LOG_KIND_INT) {
+ fprintf (stderr, "%d", log_records[i].val_int);
+ } else if (kind == LOG_KIND_STRING) {
+ fprintf (stderr, "%s", log_records[i].val_string);
+ } else if (kind == LOG_KIND_FLOAT) {
+ fprintf (stderr, "%.3f", log_records[i].val_float);
+ }
+ }
+ fprintf (stderr, " LOG\n");
+}
+
+/*
+ * ************** END OF LOGGING
+ */
+
+#define TVAL(x) ((x.tv_sec * 1000000) + x.tv_usec)
+
+/* Log tables. Written out at end-of-day. */
+typedef struct log_st
+{
+ interval_t start, end;
+ unsigned int key;
+ void *val, *old_val; /* @old_val used by update() and remove() */
+} log_t;
+#define SIZEOF_GLOBAL_LOG (num_threads*MAX_ITERATIONS*sizeof(log_t))
+static log_t *global_log;
+static interval_t interval = 0;
+
+static bool_t go = FALSE;
+static int threads_initialised1 = 0, max_key, log_max_key;
+static int threads_initialised2 = 0;
+static int threads_initialised3 = 0;
+int num_threads;
+
+static unsigned long proportion;
+
+static struct timeval start_time, done_time;
+static struct tms start_tms, done_tms;
+
+static int successes[MAX_THREADS];
+
+#ifdef SPARC
+static int processors[MAX_THREADS];
+#endif
+
+/* All the variables accessed in the critical main loop. */
+static struct {
+ CACHE_PAD(0);
+ bool_t alarm_time;
+ CACHE_PAD(1);
+ set_t *set;
+ CACHE_PAD(2);
+} shared;
+
+#define nrand(_r) (((_r) = (_r) * 1103515245) + 12345)
+
+static void alarm_handler( int arg)
+{
+ shared.alarm_time = 1;
+}
+
+/*int cntr[MAX_THREADS] = { 0 };*/
+
+static void *thread_start(void *arg)
+{
+ unsigned long k;
+ int i;
+ void *ov, *v;
+ int id = (int)arg;
+#ifdef DO_WRITE_LOG
+ log_t *log = global_log + id*MAX_ITERATIONS;
+ interval_t my_int;
+#endif
+ unsigned long r = ((unsigned long)arg)+3; /*RDTICK();*/
+ unsigned int prop = proportion;
+ unsigned int _max_key = max_key;
+
+#ifdef SPARC
+ i = processor_bind(P_LWPID, P_MYID, processors[id], NULL);
+ if ( i != 0 )
+ {
+ printf("Failed to bind to processor %d! (%d)\n", processors[id], i);
+ abort();
+ }
+#endif
+
+ if ( id == 0 )
+ {
+ _init_ptst_subsystem();
+ _init_gc_subsystem();
+ _init_set_subsystem();
+ shared.set = set_alloc();
+ }
+
+ /* BARRIER FOR ALL THREADS */
+ {
+ int n_id, id = threads_initialised1;
+ while ( (n_id = CASIO(&threads_initialised1, id, id+1)) != id )
+ id = n_id;
+ }
+ while ( threads_initialised1 != num_threads ) MB();
+
+#ifndef DO_WRITE_LOG
+ /* Start search structure off with a well-distributed set of inital keys */
+ for ( i = (_max_key / num_threads); i != 0; i >>= 1 )
+ {
+ for ( k = i >> 1; k < (_max_key / num_threads); k += i )
+ {
+ set_update(shared.set,
+ k + id * (_max_key / num_threads),
+ (void *)0xdeadbee0, 1);
+ }
+ }
+#endif
+
+ {
+ int n_id, id = threads_initialised2;
+ while ( (n_id = CASIO(&threads_initialised2, id, id+1)) != id )
+ id = n_id;
+ }
+ while ( threads_initialised2 != num_threads ) MB();
+
+ if ( id == 0 )
+ {
+ (void)signal(SIGALRM, &alarm_handler);
+ (void)alarm(MAX_WALL_TIME);
+ WMB();
+ gettimeofday(&start_time, NULL);
+ times(&start_tms);
+ go = TRUE;
+ WMB();
+ }
+ else
+ {
+ while ( !go ) MB();
+ }
+
+#ifdef DO_WRITE_LOG
+ get_interval(my_int);
+#endif
+ for ( i = 0; (i < MAX_ITERATIONS) && !shared.alarm_time; i++ )
+ {
+ /* O-3: ignore ; 4-11: proportion ; 12: ins/del */
+ k = (nrand(r) >> 4) & (_max_key - 1);
+ nrand(r);
+#ifdef DO_WRITE_LOG
+ log->start = my_int;
+#endif
+ if ( ((r>>4)&255) < prop )
+ {
+ ov = v = set_lookup(shared.set, k);
+ }
+ else if ( ((r>>12)&1) )
+ {
+ v = (void *)((r&~7)|0x8);
+ ov = set_update(shared.set, k, v, 1);
+ }
+ else
+ {
+ v = NULL;
+ ov = set_remove(shared.set, k);
+ }
+
+#ifdef DO_WRITE_LOG
+ get_interval(my_int);
+ log->key = k;
+ log->val = v;
+ log->old_val = ov;
+ log->end = my_int;
+ log++;
+#endif
+ }
+
+ /* BARRIER FOR ALL THREADS */
+ {
+ int n_id, id = threads_initialised3;
+ while ( (n_id = CASIO(&threads_initialised3, id, id+1)) != id )
+ id = n_id;
+ }
+ while ( threads_initialised3 != num_threads ) MB();
+
+#if 0
+ if ( id == 0 )
+ {
+ extern void check_tree(set_t *);
+ check_tree(shared.set);
+ }
+#endif
+
+ if ( id == num_threads - 1 )
+ {
+ gettimeofday(&done_time, NULL);
+ times(&done_tms);
+ WMB();
+ _destroy_gc_subsystem();
+ }
+
+ successes[id] = i;
+
+ return(NULL);
+}
+
+#define THREAD_TEST thread_start
+#define THREAD_FLAGS THR_BOUND
+
+#ifdef PPC
+static pthread_attr_t attr;
+#endif
+
+static void test_multithreaded (void)
+{
+ int i, fd;
+ pthread_t thrs[MAX_THREADS];
+ int num_successes;
+ int min_successes, max_successes;
+ int ticksps = sysconf(_SC_CLK_TCK);
+ float wall_time, user_time, sys_time;
+
+ if ( num_threads == 1 ) goto skip_thread_creation;
+
+#ifdef PPC
+ i = pthread_attr_init (&attr);
+ if (i !=0) {
+ fprintf (stderr, "URK! pthread_attr_init rc=%d\n", i);
+ }
+ i = pthread_attr_setscope (&attr, PTHREAD_SCOPE_SYSTEM);
+ if (i !=0) {
+ fprintf (stderr, "URK! pthread_attr_setscope rc=%d\n", i);
+ }
+#endif
+
+#ifdef MIPS
+ pthread_setconcurrency(num_threads + 1);
+#else
+ pthread_setconcurrency(num_threads);
+#endif
+
+ for (i = 0; i < num_threads; i ++)
+ {
+ MB();
+#ifdef PPC
+ pthread_create (&thrs[i], &attr, THREAD_TEST, (void *)i);
+#else
+ pthread_create (&thrs[i], NULL, THREAD_TEST, (void *)i);
+#endif
+ }
+
+ skip_thread_creation:
+ if ( num_threads == 1 )
+ {
+ thread_start(0);
+ }
+ else
+ {
+ for (i = 0; i < num_threads; i ++)
+ {
+ (void)pthread_join (thrs[i], NULL);
+ }
+ }
+
+ wall_time = (float)(TVAL(done_time) - TVAL(start_time))/ 1000000;
+ user_time = ((float)(done_tms.tms_utime - start_tms.tms_utime))/ticksps;
+ sys_time = ((float)(done_tms.tms_stime - start_tms.tms_stime))/ticksps;
+
+ log_float ("wall_time_s", wall_time);
+ log_float ("user_time_s", user_time);
+ log_float ("system_time_s", sys_time);
+
+ num_successes = 0;
+ min_successes = INT_MAX;
+ max_successes = INT_MIN;
+ for ( i = 0; i < num_threads; i++ )
+ {
+ num_successes += successes[i];
+ if ( successes[i] < min_successes ) min_successes = successes[i];
+ if ( successes[i] > max_successes ) max_successes = successes[i];
+ }
+
+ log_int ("min_successes", min_successes);
+ log_int ("max_successes", max_successes);
+ log_int ("num_successes", num_successes);
+
+ log_float("us_per_success", (num_threads*wall_time*1000000.0)/num_successes);
+
+ log_int("log max key", log_max_key);
+}
+
+#if defined(INTEL)
+static void tstp_handler(int sig, siginfo_t *info, ucontext_t *uc)
+{
+ static unsigned int sem = 0;
+ unsigned long *esp = (unsigned long *)(uc->uc_mcontext.gregs[7]);
+ int pid = getpid();
+
+ while ( CASIO(&sem, 0, 1) != 0 ) sched_yield();
+
+ printf("Signal %d for pid %d\n", sig, pid);
+ printf("%d: EIP=%08x EAX=%08x EBX=%08x ECX=%08x EDX=%08x\n", pid,
+ uc->uc_mcontext.gregs[14], uc->uc_mcontext.gregs[11],
+ uc->uc_mcontext.gregs[ 8], uc->uc_mcontext.gregs[10],
+ uc->uc_mcontext.gregs[ 9]);
+ printf("%d: ESP=%08x EBP=%08x ESI=%08x EDI=%08x EFL=%08x\n", pid,
+ uc->uc_mcontext.gregs[ 7], uc->uc_mcontext.gregs[ 6],
+ uc->uc_mcontext.gregs[ 5], uc->uc_mcontext.gregs[ 4],
+ uc->uc_mcontext.gregs[16]);
+ printf("\n");
+
+ sem = 0;
+
+ for ( ; ; ) sched_yield();
+}
+#endif
+
+int main (int argc, char **argv)
+{
+#ifdef DO_WRITE_LOG
+ int fd;
+ unsigned long log_header[] = { 0, MAX_ITERATIONS, 0 };
+
+ if ( argc != 5 )
+ {
+ printf("%s <num_threads> <read_proportion> <key power> <log name>\n"
+ "(0 <= read_proportion <= 256)\n", argv[0]);
+ exit(1);
+ }
+#else
+ if ( argc != 4 )
+ {
+ printf("%s <num_threads> <read_proportion> <key power>\n"
+ "(0 <= read_proportion <= 256)\n", argv[0]);
+ exit(1);
+ }
+#endif
+
+ memset(&shared, 0, sizeof(shared));
+
+ num_threads = atoi(argv[1]);
+ log_int ("num_threads", num_threads);
+
+ proportion = atoi(argv[2]);
+ log_float ("frac_reads", (float)proportion/256.0);
+
+ log_max_key = atoi(argv[3]);
+ max_key = 1 << atoi(argv[3]);
+ log_int("max_key", max_key);
+
+ log_int ("max_iterations", MAX_ITERATIONS);
+
+ log_int ("wall_time_limit_s", MAX_WALL_TIME);
+
+#ifdef SPARC
+ {
+ int st, maxcpu = sysconf(_SC_CPUID_MAX), i, j=0;
+
+ /* Favour processors that don't handle I/O interrupts. */
+ for ( i = 0; i <= maxcpu; i++ )
+ {
+ st = p_online(i, P_STATUS);
+ if ( st == P_NOINTR )
+ {
+ if ( j == num_threads ) break;
+ processors[j++] = i;
+ if ( j == num_threads ) break;
+ }
+ }
+
+ /* Fall back to the system quads if necessary. */
+ for ( i = 0; i <= maxcpu; i++ )
+ {
+ st = p_online(i, P_STATUS);
+ if ( st == P_ONLINE )
+ {
+ if ( j == num_threads ) break;
+ processors[j++] = i;
+ if ( j == num_threads ) break;
+ }
+ }
+
+ if ( j != num_threads )
+ {
+ printf("Urk! Not enough CPUs for threads (%d < %d)\n",
+ j, num_threads);
+ abort();
+ }
+ }
+#endif
+
+#ifdef DO_WRITE_LOG
+ log_header[0] = num_threads;
+ log_header[2] = max_key;
+ global_log = malloc(SIZEOF_GLOBAL_LOG);
+#endif
+
+#if defined(INTEL)
+ {
+ struct sigaction act;
+ memset(&act, 0, sizeof(act));
+ act.sa_handler = (void *)tstp_handler;
+ act.sa_flags = SA_SIGINFO;
+ sigaction(SIGTSTP, &act, NULL);
+ sigaction(SIGQUIT, &act, NULL);
+ sigaction(SIGSEGV, &act, NULL);
+ }
+#endif
+
+ test_multithreaded ();
+
+ dump_log ();
+
+#ifdef DO_WRITE_LOG
+ printf("Writing log...\n");
+ /* Write logs to data file */
+ fd = open(argv[4], O_WRONLY | O_CREAT | O_TRUNC, 0644);
+ if ( fd == -1 )
+ {
+ fprintf(stderr, "Error writing log!\n");
+ exit(-1);
+ }
+
+ if ( (write(fd, log_header, sizeof(log_header)) != sizeof(log_header)) ||
+ (write(fd, global_log, SIZEOF_GLOBAL_LOG) != SIZEOF_GLOBAL_LOG) )
+ {
+ fprintf(stderr, "Log write truncated or erroneous\n");
+ close(fd);
+ exit(-1);
+ }
+
+ close(fd);
+#endif
+
+ exit(0);
+}
--- /dev/null
+/******************************************************************************
+ * skip_cas.c
+ *
+ * Skip lists, allowing concurrent update by use of CAS primitives.
+ *
+ * Copyright (c) 2001-2003, K A Fraser
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution. Neither the name of the Keir Fraser
+ * nor the names of its contributors may be used to endorse or
+ * promote products derived from this software without specific
+ * prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define __SET_IMPLEMENTATION__
+
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include "portable_defns.h"
+#include "ptst.h"
+#include "set.h"
+
+
+/*
+ * SKIP LIST
+ */
+
+typedef struct node_st node_t;
+typedef struct set_st set_t;
+typedef VOLATILE node_t *sh_node_pt;
+
+struct node_st
+{
+ int level;
+#define LEVEL_MASK 0x0ff
+#define READY_FOR_FREE 0x100
+ setkey_t k;
+ setval_t v;
+ sh_node_pt next[1];
+};
+
+struct set_st
+{
+ node_t head;
+};
+
+static int gc_id[NUM_LEVELS];
+
+/*
+ * PRIVATE FUNCTIONS
+ */
+
+/*
+ * Random level generator. Drop-off rate is 0.5 per level.
+ * Returns value 1 <= level <= NUM_LEVELS.
+ */
+static int get_level(ptst_t *ptst)
+{
+ unsigned long r = rand_next(ptst);
+ int l = 1;
+ r = (r >> 4) & ((1 << (NUM_LEVELS-1)) - 1);
+ while ( (r & 1) ) { l++; r >>= 1; }
+ return(l);
+}
+
+
+/*
+ * Allocate a new node, and initialise its @level field.
+ * NB. Initialisation will eventually be pushed into garbage collector,
+ * because of dependent read reordering.
+ */
+static node_t *alloc_node(ptst_t *ptst)
+{
+ int l;
+ node_t *n;
+ l = get_level(ptst);
+ n = gc_alloc(ptst, gc_id[l - 1]);
+ n->level = l;
+ return(n);
+}
+
+
+/* Free a node to the garbage collector. */
+static void free_node(ptst_t *ptst, sh_node_pt n)
+{
+ gc_free(ptst, (void *)n, gc_id[(n->level & LEVEL_MASK) - 1]);
+}
+
+
+/*
+ * Search for first non-deleted node, N, with key >= @k at each level in @l.
+ * RETURN VALUES:
+ * Array @pa: @pa[i] is non-deleted predecessor of N at level i
+ * Array @na: @na[i] is N itself, which should be pointed at by @pa[i]
+ * MAIN RETURN VALUE: same as @na[0].
+ */
+static sh_node_pt strong_search_predecessors(
+ set_t *l, setkey_t k, sh_node_pt *pa, sh_node_pt *na)
+{
+ sh_node_pt x, x_next, old_x_next, y, y_next;
+ setkey_t y_k;
+ int i;
+
+ retry:
+ RMB();
+
+ x = &l->head;
+ for ( i = NUM_LEVELS - 1; i >= 0; i-- )
+ {
+ /* We start our search at previous level's unmarked predecessor. */
+ READ_FIELD(x_next, x->next[i]);
+ /* If this pointer's marked, so is @pa[i+1]. May as well retry. */
+ if ( is_marked_ref(x_next) ) goto retry;
+
+ for ( y = x_next; ; y = y_next )
+ {
+ /* Shift over a sequence of marked nodes. */
+ for ( ; ; )
+ {
+ READ_FIELD(y_next, y->next[i]);
+ if ( !is_marked_ref(y_next) ) break;
+ y = get_unmarked_ref(y_next);
+ }
+
+ READ_FIELD(y_k, y->k);
+ if ( y_k >= k ) break;
+
+ /* Update estimate of predecessor at this level. */
+ x = y;
+ x_next = y_next;
+ }
+
+ /* Swing forward pointer over any marked nodes. */
+ if ( x_next != y )
+ {
+ old_x_next = CASPO(&x->next[i], x_next, y);
+ if ( old_x_next != x_next ) goto retry;
+ }
+
+ if ( pa ) pa[i] = x;
+ if ( na ) na[i] = y;
+ }
+
+ return(y);
+}
+
+
+/* This function does not remove marked nodes. Use it optimistically. */
+static sh_node_pt weak_search_predecessors(
+ set_t *l, setkey_t k, sh_node_pt *pa, sh_node_pt *na)
+{
+ sh_node_pt x, x_next;
+ setkey_t x_next_k;
+ int i;
+
+ x = &l->head;
+ for ( i = NUM_LEVELS - 1; i >= 0; i-- )
+ {
+ for ( ; ; )
+ {
+ READ_FIELD(x_next, x->next[i]);
+ x_next = get_unmarked_ref(x_next);
+
+ READ_FIELD(x_next_k, x_next->k);
+ if ( x_next_k >= k ) break;
+
+ x = x_next;
+ }
+
+ if ( pa ) pa[i] = x;
+ if ( na ) na[i] = x_next;
+ }
+
+ return(x_next);
+}
+
+
+/*
+ * Mark @x deleted at every level in its list from @level down to level 1.
+ * When all forward pointers are marked, node is effectively deleted.
+ * Future searches will properly remove node by swinging predecessors'
+ * forward pointers.
+ */
+static void mark_deleted(sh_node_pt x, int level)
+{
+ sh_node_pt x_next;
+
+ while ( --level >= 0 )
+ {
+ x_next = x->next[level];
+ while ( !is_marked_ref(x_next) )
+ {
+ x_next = CASPO(&x->next[level], x_next, get_marked_ref(x_next));
+ }
+ WEAK_DEP_ORDER_WMB(); /* mark in order */
+ }
+}
+
+
+static int check_for_full_delete(sh_node_pt x)
+{
+ int level = x->level;
+ return ((level & READY_FOR_FREE) ||
+ (CASIO(&x->level, level, level | READY_FOR_FREE) != level));
+}
+
+
+static void do_full_delete(ptst_t *ptst, set_t *l, sh_node_pt x, int level)
+{
+ int k = x->k;
+#ifdef WEAK_MEM_ORDER
+ sh_node_pt preds[NUM_LEVELS];
+ int i = level;
+ retry:
+ (void)strong_search_predecessors(l, k, preds, NULL);
+ /*
+ * Above level 1, references to @x can disappear if a node is inserted
+ * immediately before and we see an old value for its forward pointer. This
+ * is a conservative way of checking for that situation.
+ */
+ if ( i > 0 ) RMB();
+ while ( i > 0 )
+ {
+ node_t *n = get_unmarked_ref(preds[i]->next[i]);
+ while ( n->k < k )
+ {
+ n = get_unmarked_ref(n->next[i]);
+ RMB(); /* we don't want refs to @x to "disappear" */
+ }
+ if ( n == x ) goto retry;
+ i--; /* don't need to check this level again, even if we retry. */
+ }
+#else
+ (void)strong_search_predecessors(l, k, NULL, NULL);
+#endif
+ free_node(ptst, x);
+}
+
+
+/*
+ * PUBLIC FUNCTIONS
+ */
+
+set_t *set_alloc(void)
+{
+ set_t *l;
+ node_t *n;
+ int i;
+
+ n = malloc(sizeof(*n) + (NUM_LEVELS-1)*sizeof(node_t *));
+ memset(n, 0, sizeof(*n) + (NUM_LEVELS-1)*sizeof(node_t *));
+ n->k = SENTINEL_KEYMAX;
+
+ /*
+ * Set the forward pointers of final node to other than NULL,
+ * otherwise READ_FIELD() will continually execute costly barriers.
+ * Note use of 0xfe -- that doesn't look like a marked value!
+ */
+ memset(n->next, 0xfe, NUM_LEVELS*sizeof(node_t *));
+
+ l = malloc(sizeof(*l) + (NUM_LEVELS-1)*sizeof(node_t *));
+ l->head.k = SENTINEL_KEYMIN;
+ l->head.level = NUM_LEVELS;
+ for ( i = 0; i < NUM_LEVELS; i++ )
+ {
+ l->head.next[i] = n;
+ }
+
+ return(l);
+}
+
+
+setval_t set_update(set_t *l, setkey_t k, setval_t v, int overwrite)
+{
+ setval_t ov, new_ov;
+ ptst_t *ptst;
+ sh_node_pt preds[NUM_LEVELS], succs[NUM_LEVELS];
+ sh_node_pt pred, succ, new = NULL, new_next, old_next;
+ int i, level;
+
+ k = CALLER_TO_INTERNAL_KEY(k);
+
+ ptst = critical_enter();
+
+ succ = weak_search_predecessors(l, k, preds, succs);
+
+ retry:
+ ov = NULL;
+
+ if ( succ->k == k )
+ {
+ /* Already a @k node in the list: update its mapping. */
+ new_ov = succ->v;
+ do {
+ if ( (ov = new_ov) == NULL )
+ {
+ /* Finish deleting the node, then retry. */
+ READ_FIELD(level, succ->level);
+ mark_deleted(succ, level & LEVEL_MASK);
+ succ = strong_search_predecessors(l, k, preds, succs);
+ goto retry;
+ }
+ }
+ while ( overwrite && ((new_ov = CASPO(&succ->v, ov, v)) != ov) );
+
+ if ( new != NULL ) free_node(ptst, new);
+ goto out;
+ }
+
+#ifdef WEAK_MEM_ORDER
+ /* Free node from previous attempt, if this is a retry. */
+ if ( new != NULL )
+ {
+ free_node(ptst, new);
+ new = NULL;
+ }
+#endif
+
+ /* Not in the list, so initialise a new node for insertion. */
+ if ( new == NULL )
+ {
+ new = alloc_node(ptst);
+ new->k = k;
+ new->v = v;
+ }
+ level = new->level;
+
+ /* If successors don't change, this saves us some CAS operations. */
+ for ( i = 0; i < level; i++ )
+ {
+ new->next[i] = succs[i];
+ }
+
+ /* We've committed when we've inserted at level 1. */
+ WMB_NEAR_CAS(); /* make sure node fully initialised before inserting */
+ old_next = CASPO(&preds[0]->next[0], succ, new);
+ if ( old_next != succ )
+ {
+ succ = strong_search_predecessors(l, k, preds, succs);
+ goto retry;
+ }
+
+ /* Insert at each of the other levels in turn. */
+ i = 1;
+ while ( i < level )
+ {
+ pred = preds[i];
+ succ = succs[i];
+
+ /* Someone *can* delete @new under our feet! */
+ new_next = new->next[i];
+ if ( is_marked_ref(new_next) ) goto success;
+
+ /* Ensure forward pointer of new node is up to date. */
+ if ( new_next != succ )
+ {
+ old_next = CASPO(&new->next[i], new_next, succ);
+ if ( is_marked_ref(old_next) ) goto success;
+ assert(old_next == new_next);
+ }
+
+ /* Ensure we have unique key values at every level. */
+ if ( succ->k == k ) goto new_world_view;
+ assert((pred->k < k) && (succ->k > k));
+
+ /* Replumb predecessor's forward pointer. */
+ old_next = CASPO(&pred->next[i], succ, new);
+ if ( old_next != succ )
+ {
+ new_world_view:
+ RMB(); /* get up-to-date view of the world. */
+ (void)strong_search_predecessors(l, k, preds, succs);
+ continue;
+ }
+
+ /* Succeeded at this level. */
+ i++;
+ }
+
+ success:
+ /* Ensure node is visible at all levels before punting deletion. */
+ WEAK_DEP_ORDER_WMB();
+ if ( check_for_full_delete(new) )
+ {
+ MB(); /* make sure we see all marks in @new. */
+ do_full_delete(ptst, l, new, level - 1);
+ }
+ out:
+ critical_exit(ptst);
+ return(ov);
+}
+
+
+setval_t set_remove(set_t *l, setkey_t k)
+{
+ setval_t v = NULL, new_v;
+ ptst_t *ptst;
+ sh_node_pt preds[NUM_LEVELS], x;
+ int level, i;
+
+ k = CALLER_TO_INTERNAL_KEY(k);
+
+ ptst = critical_enter();
+
+ x = weak_search_predecessors(l, k, preds, NULL);
+ if ( x->k > k ) goto out;
+ READ_FIELD(level, x->level);
+ level = level & LEVEL_MASK;
+
+ /* Once we've marked the value field, the node is effectively deleted. */
+ new_v = x->v;
+ do {
+ v = new_v;
+ if ( v == NULL ) goto out;
+ }
+ while ( (new_v = CASPO(&x->v, v, NULL)) != v );
+
+ /* Committed to @x: mark lower-level forward pointers. */
+ WEAK_DEP_ORDER_WMB(); /* enforce above as linearisation point */
+ mark_deleted(x, level);
+
+ /*
+ * We must swing predecessors' pointers, or we can end up with
+ * an unbounded number of marked but not fully deleted nodes.
+ * Doing this creates a bound equal to number of threads in the system.
+ * Furthermore, we can't legitimately call 'free_node' until all shared
+ * references are gone.
+ */
+ for ( i = level - 1; i >= 0; i-- )
+ {
+ if ( CASPO(&preds[i]->next[i], x, get_unmarked_ref(x->next[i])) != x )
+ {
+ if ( (i != (level - 1)) || check_for_full_delete(x) )
+ {
+ MB(); /* make sure we see node at all levels. */
+ do_full_delete(ptst, l, x, i);
+ }
+ goto out;
+ }
+ }
+
+ free_node(ptst, x);
+
+ out:
+ critical_exit(ptst);
+ return(v);
+}
+
+
+setval_t set_lookup(set_t *l, setkey_t k)
+{
+ setval_t v = NULL;
+ ptst_t *ptst;
+ sh_node_pt x;
+
+ k = CALLER_TO_INTERNAL_KEY(k);
+
+ ptst = critical_enter();
+
+ x = weak_search_predecessors(l, k, NULL, NULL);
+ if ( x->k == k ) READ_FIELD(v, x->v);
+
+ critical_exit(ptst);
+ return(v);
+}
+
+
+void _init_set_subsystem(void)
+{
+ int i;
+
+ for ( i = 0; i < NUM_LEVELS; i++ )
+ {
+ gc_id[i] = gc_add_allocator(sizeof(node_t) + i*sizeof(node_t *));
+ }
+}
--- /dev/null
+/******************************************************************************
+ * skip_lock.c (Variable-granularity Mutexes)
+ *
+ * Mutex only taken for write operations (reads are unprotected). Write
+ * mutexes come in three flavours, selected by a compile-time flag.
+ *
+ * If FAT_MTX is defined:
+ * A skip list is protected by one mutex for the entire list. Note that this
+ * differs from skip_bm.c, which takes the mutex for read operations as well.
+ *
+ * If TINY_MTX is defined:
+ * Mutex per forward pointer in each node.
+ *
+ * If neither flag is defined:
+ * Mutex per node.
+ *
+ * Copyright (c) 2001-2003, K A Fraser
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution. Neither the name of the Keir Fraser
+ * nor the names of its contributors may be used to endorse or
+ * promote products derived from this software without specific
+ * prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define __SET_IMPLEMENTATION__
+
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include "portable_defns.h"
+#include "ptst.h"
+#include "set.h"
+
+
+/*
+ * SKIP LIST
+ */
+
+typedef struct node_st node_t;
+typedef struct set_st set_t;
+typedef VOLATILE node_t *sh_node_pt;
+
+typedef struct ptr_st ptr_t;
+struct ptr_st
+{
+#ifdef TINY_MTX /* mutex per forward pointer */
+ mcs_lock_t m;
+#endif
+ sh_node_pt p;
+};
+
+struct node_st
+{
+ int level;
+ setkey_t k;
+ setval_t v;
+#ifndef FAT_MTX
+ mcs_lock_t m;
+#endif
+ ptr_t next[1];
+};
+
+struct set_st
+{
+#ifdef FAT_MTX
+ mcs_lock_t m;
+#endif
+ node_t head;
+};
+
+static int gc_id[NUM_LEVELS];
+
+/*
+ * LOCKING
+ */
+
+#ifdef FAT_MTX
+
+#define LIST_LOCK(_l,_qn) ((void)mcs_lock((void*)&(_l)->m, (_qn)))
+#define LIST_UNLOCK(_l,_qn) ((void)mcs_unlock((void*)&(_l)->m, (_qn)))
+#define NODE_LOCK(_x,_qn) ((void)0)
+#define NODE_UNLOCK(_x,_qn) ((void)0)
+#define PTR_UPDATE_LOCK(_x,_i,_qn) ((void)0)
+#define PTR_UPDATE_UNLOCK(_x,_i,_qn) ((void)0)
+#define PTR_DELETE_LOCK(_x,_i,_qn) ((void)0)
+#define PTR_DELETE_UNLOCK(_x,_i,_qn) ((void)0)
+
+#else
+
+#define LIST_LOCK(_l,_qn) ((void)0)
+#define LIST_UNLOCK(_l,_qn) ((void)0)
+
+/* We take the main node lock to get exclusive rights on insert/delete ops. */
+#define NODE_LOCK(_x,_qn) ((void)mcs_lock((void*)&(_x)->m, (_qn)))
+#define NODE_UNLOCK(_x,_qn) ((void)mcs_unlock((void*)&(_x)->m, (_qn)))
+
+#ifdef TINY_MTX
+
+/*
+ * Predecessor's pointer is locked before swinging (on delete), or
+ * replumbing (on insert).
+ */
+#define PTR_UPDATE_LOCK(_x, _i, _qn) \
+ ((void)mcs_lock((void*)&(_x)->next[(_i)].m, (_qn)))
+#define PTR_UPDATE_UNLOCK(_x, _i, _qn) \
+ ((void)mcs_unlock((void*)&(_x)->next[(_i)].m, (_qn)))
+/*
+ * When deleting a node, we take the lock on each of its pointers in turn,
+ * to prevent someone from inserting a new node directly after, or deleting
+ * immediate successor.
+ */
+#define PTR_DELETE_LOCK(_x, _i, _qn) PTR_UPDATE_LOCK(_x,_i,(_qn))
+#define PTR_DELETE_UNLOCK(_x, _i, _qn) PTR_UPDATE_UNLOCK(_x,_i,(_qn))
+
+#else /* LITTLE_MTX */
+
+/*
+ * Predecessor must certainly be locked for insert/delete ops. So we take
+ * the only lock we can.
+ */
+#define PTR_UPDATE_LOCK(_x, _i, _qn) NODE_LOCK(_x,(_qn))
+#define PTR_UPDATE_UNLOCK(_x, _i, _qn) NODE_UNLOCK(_x,(_qn))
+/*
+ * We can't lock individual pointers. There's no need anyway, since we have
+ * the node's lock already (to allow us exclusive delete rights).
+ */
+#define PTR_DELETE_LOCK(_x, _i, _qn) ((void)0)
+#define PTR_DELETE_UNLOCK(_x, _i, _qn) ((void)0)
+
+#endif
+
+#endif
+
+
+/*
+ * PRIVATE FUNCTIONS
+ */
+
+/*
+ * Random level generator. Drop-off rate is 0.5 per level.
+ * Returns value 1 <= level <= NUM_LEVELS.
+ */
+static int get_level(ptst_t *ptst)
+{
+ unsigned long r = rand_next(ptst);
+ int l = 1;
+ r = (r >> 4) & ((1 << (NUM_LEVELS-1)) - 1);
+ while ( (r & 1) ) { l++; r >>= 1; }
+ return(l);
+}
+
+
+/*
+ * Allocate a new node, and initialise its @level field.
+ * NB. Initialisation will eventually be pushed into garbage collector,
+ * because of dependent read reordering.
+ */
+static node_t *alloc_node(ptst_t *ptst)
+{
+ int l;
+ node_t *n;
+ l = get_level(ptst);
+ n = gc_alloc(ptst, gc_id[l - 1]);
+ n->level = l;
+#ifndef FAT_MTX
+ mcs_init(&n->m);
+#endif
+#ifdef TINY_MTX
+ for ( l = 0; l < n->level; l++ )
+ {
+ mcs_init(&n->next[l].m);
+ }
+#endif
+ return(n);
+}
+
+
+/* Free a node to the garbage collector. */
+static void free_node(ptst_t *ptst, sh_node_pt n)
+{
+ gc_free(ptst, (void *)n, gc_id[n->level - 1]);
+}
+
+
+/*
+ * Find and lock predecessor at level @i of node with key @k. This
+ * predecessor must have key >= @x->k.
+ */
+#ifndef FAT_MTX
+static sh_node_pt get_lock(sh_node_pt x, setkey_t k, int i, qnode_t *qn)
+{
+ sh_node_pt y;
+ setkey_t y_k;
+
+ for ( ; ; )
+ {
+ READ_FIELD(y, x->next[i].p);
+ READ_FIELD(y_k, y->k);
+ if ( y_k >= k ) break;
+ retry:
+ x = y;
+ }
+
+ PTR_UPDATE_LOCK(x, i, qn); /* MB => no need for READ_FIELD on x or y. */
+ y = x->next[i].p;
+ if ( y->k < k )
+ {
+ PTR_UPDATE_UNLOCK(x, i, qn);
+ goto retry;
+ }
+
+ return(x);
+}
+#else
+#define get_lock(_x,_k,_i,_qn) (_x)
+#endif
+
+
+/*
+ * Search for first non-deleted node, N, with key >= @k at each level in @l.
+ * RETURN VALUES:
+ * Array @pa: @pa[i] is non-deleted predecessor of N at level i
+ * MAIN RETURN VALUE: N at level 0.
+ */
+static sh_node_pt search_predecessors(set_t *l, setkey_t k, sh_node_pt *pa)
+{
+ sh_node_pt x, y;
+ setkey_t y_k;
+ int i;
+
+ x = &l->head;
+ for ( i = NUM_LEVELS - 1; i >= 0; i-- )
+ {
+ for ( ; ; )
+ {
+ READ_FIELD(y, x->next[i].p);
+ READ_FIELD(y_k, y->k);
+ if ( y_k >= k ) break;
+ x = y; /* remember largest predecessor so far */
+ }
+
+ if ( pa ) pa[i] = x;
+ }
+
+ return(y);
+}
+
+
+/*
+ * PUBLIC FUNCTIONS
+ */
+
+set_t *set_alloc(void)
+{
+ set_t *l;
+ node_t *n;
+ int i;
+
+ n = malloc(sizeof(*n) + (NUM_LEVELS-1)*sizeof(ptr_t));
+ memset(n, 0, sizeof(*n) + (NUM_LEVELS-1)*sizeof(ptr_t));
+ n->k = SENTINEL_KEYMAX;
+
+ l = malloc(sizeof(*l) + (NUM_LEVELS-1)*sizeof(ptr_t));
+ l->head.k = SENTINEL_KEYMIN;
+ l->head.level = NUM_LEVELS;
+#ifdef FAT_MTX
+ mcs_init(&l->m);
+#else
+ mcs_init(&l->head.m);
+#endif
+ for ( i = 0; i < NUM_LEVELS; i++ )
+ {
+ l->head.next[i].p = n;
+#ifdef TINY_MTX
+ mcs_init(&l->head.next[i].m);
+#endif
+ }
+
+ return(l);
+}
+
+
+setval_t set_update(set_t *l, setkey_t k, setval_t v, int overwrite)
+{
+ setval_t ov = NULL;
+ ptst_t *ptst;
+ sh_node_pt update[NUM_LEVELS];
+ sh_node_pt x, y;
+ int i;
+ qnode_t l_qn, x_qn, y_qn;
+
+ k = CALLER_TO_INTERNAL_KEY(k);
+
+ ptst = critical_enter();
+ LIST_LOCK(l, &l_qn);
+
+ (void)search_predecessors(l, k, update);
+
+ x = get_lock(update[0], k, 0, &x_qn);
+ y = x->next[0].p;
+ if ( y->k == k )
+ {
+ ov = y->v;
+ if ( overwrite ) y->v = v;
+ PTR_UPDATE_UNLOCK(x, 0, &x_qn);
+ goto out;
+ }
+
+ /* Not in the list, so do the insertion. */
+ y = alloc_node(ptst);
+ y->k = k;
+ y->v = v;
+ NODE_LOCK(y, &y_qn);
+
+ for ( i = 0; i < y->level; i++ )
+ {
+ if ( i != 0 ) x = get_lock(update[i], k, i, &x_qn);
+ y->next[i].p = x->next[i].p;
+ WMB();
+ x->next[i].p = y;
+ PTR_UPDATE_UNLOCK(x, i, &x_qn);
+ }
+
+ NODE_UNLOCK(y, &y_qn);
+
+ out:
+ LIST_UNLOCK(l, &l_qn);
+ critical_exit(ptst);
+ return(ov);
+}
+
+
+setval_t set_remove(set_t *l, setkey_t k)
+{
+ setval_t v = NULL;
+ ptst_t *ptst;
+ sh_node_pt update[NUM_LEVELS];
+ sh_node_pt x, y;
+ int i;
+ qnode_t l_qn, x_qn, y_qn, yd_qn;
+
+ k = CALLER_TO_INTERNAL_KEY(k);
+
+ ptst = critical_enter();
+ LIST_LOCK(l, &l_qn);
+
+ y = search_predecessors(l, k, update);
+
+#ifdef FAT_MTX
+ if ( y->k != k ) goto out;
+#else
+ y = update[0];
+ for ( ; ; )
+ {
+ setkey_t y_k;
+ y = y->next[0].p; /* no need for READ_FIELD() */
+ READ_FIELD(y_k, y->k);
+ if ( y_k > k ) goto out;
+ NODE_LOCK(y, &y_qn);
+ if ( (y_k == k) && (y_k <= y->next[0].p->k) ) break;
+ NODE_UNLOCK(y, &y_qn);
+ }
+#endif
+
+ /* @y is the correct node, and we have it locked, so now delete it. */
+ for ( i = y->level - 1; i >= 0; i-- )
+ {
+ x = get_lock(update[i], k, i, &x_qn);
+ PTR_DELETE_LOCK(y, i, &yd_qn);
+ x->next[i].p = y->next[i].p;
+ WMB();
+ y->next[i].p = x;
+ PTR_DELETE_UNLOCK(y, i, &yd_qn);
+ PTR_UPDATE_UNLOCK(x, i, &x_qn);
+ }
+
+ v = y->v;
+ free_node(ptst, y);
+ NODE_UNLOCK(y, &y_qn);
+
+ out:
+ LIST_UNLOCK(l, &l_qn);
+ critical_exit(ptst);
+ return(v);
+}
+
+
+setval_t set_lookup(set_t *l, setkey_t k)
+{
+ setval_t v = NULL;
+ ptst_t *ptst;
+ sh_node_pt x;
+
+ k = CALLER_TO_INTERNAL_KEY(k);
+
+ ptst = critical_enter();
+
+ x = search_predecessors(l, k, NULL);
+ if ( x->k == k ) READ_FIELD(v, x->v);
+
+ critical_exit(ptst);
+ return(v);
+}
+
+
+void _init_set_subsystem(void)
+{
+ int i;
+
+ for ( i = 0; i < NUM_LEVELS; i++ )
+ {
+ gc_id[i] = gc_add_allocator(sizeof(node_t) + i*sizeof(ptr_t));
+ }
+}
--- /dev/null
+/******************************************************************************
+ * skip_mcas.c
+ *
+ * Skip lists, allowing concurrent update by use of MCAS primitive.
+ *
+ * Copyright (c) 2001-2003, K A Fraser
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution. Neither the name of the Keir Fraser
+ * nor the names of its contributors may be used to endorse or
+ * promote products derived from this software without specific
+ * prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define __SET_IMPLEMENTATION__
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include "portable_defns.h"
+#include "ptst.h"
+#include "set.h"
+
+#define MCAS_MARK(_v) ((unsigned long)(_v) & 3)
+
+#define PROCESS(_v, _pv) \
+ while ( MCAS_MARK(_v) ) { \
+ mcas_fixup((void **)(_pv), _v); \
+ (_v) = *(_pv); \
+ }
+
+#define WALK_THRU(_v, _pv) \
+ if ( MCAS_MARK(_v) ) (_v) = read_barrier_lite((void **)(_pv));
+
+/* Pull in the MCAS implementation. */
+#include "mcas.c"
+
+/*
+ * SKIP LIST
+ */
+
+typedef struct node_st node_t;
+typedef struct set_st set_t;
+typedef VOLATILE node_t *sh_node_pt;
+
+struct node_st
+{
+ int level;
+ setkey_t k;
+ setval_t v;
+ sh_node_pt next[1];
+};
+
+struct set_st
+{
+ node_t head;
+};
+
+static int gc_id[NUM_LEVELS];
+
+/*
+ * PRIVATE FUNCTIONS
+ */
+
+/*
+ * Random level generator. Drop-off rate is 0.5 per level.
+ * Returns value 1 <= level <= NUM_LEVELS.
+ */
+static int get_level(ptst_t *ptst)
+{
+ unsigned long r = rand_next(ptst);
+ int l = 1;
+ r = (r >> 4) & ((1 << (NUM_LEVELS-1)) - 1);
+ while ( (r & 1) ) { l++; r >>= 1; }
+ return(l);
+}
+
+
+/*
+ * Allocate a new node, and initialise its @level field.
+ * NB. Initialisation will eventually be pushed into garbage collector,
+ * because of dependent read reordering.
+ */
+static node_t *alloc_node(ptst_t *ptst)
+{
+ int l;
+ node_t *n;
+ l = get_level(ptst);
+ n = gc_alloc(ptst, gc_id[l - 1]);
+ n->level = l;
+ return(n);
+}
+
+
+/* Free a node to the garbage collector. */
+static void free_node(ptst_t *ptst, sh_node_pt n)
+{
+ gc_free(ptst, (void *)n, gc_id[n->level - 1]);
+}
+
+
+/*
+ * Search for first non-deleted node, N, with key >= @k at each level in @l.
+ * RETURN VALUES:
+ * Array @pa: @pa[i] is non-deleted predecessor of N at level i
+ * Array @na: @na[i] is N itself, which should be pointed at by @pa[i]
+ * MAIN RETURN VALUE: same as @na[0].
+ */
+static sh_node_pt search_predecessors(
+ set_t *l, setkey_t k, sh_node_pt *pa, sh_node_pt *na)
+{
+ sh_node_pt x, x_next;
+ setkey_t x_next_k;
+ int i;
+
+ RMB();
+
+ x = &l->head;
+ for ( i = NUM_LEVELS - 1; i >= 0; i-- )
+ {
+ for ( ; ; )
+ {
+ READ_FIELD(x_next, x->next[i]);
+ WALK_THRU(x_next, &x->next[i]);
+
+ READ_FIELD(x_next_k, x_next->k);
+ if ( x_next_k >= k ) break;
+
+ x = x_next;
+ }
+
+ if ( pa ) pa[i] = x;
+ if ( na ) na[i] = x_next;
+ }
+
+ return(x_next);
+}
+
+static setval_t finish_delete(sh_node_pt x, sh_node_pt *preds)
+{
+ per_thread_state_t *mcas_ptst = get_ptst();
+ CasDescriptor_t *cd;
+ int level, i, ret = FALSE;
+ sh_node_pt x_next;
+ setkey_t x_next_k;
+ setval_t v;
+
+ READ_FIELD(level, x->level);
+
+ cd = new_descriptor(mcas_ptst, (level << 1) + 1);
+ cd->status = STATUS_IN_PROGRESS;
+ cd->length = (level << 1) + 1;
+
+ /* First, the deleted node's value field. */
+ READ_FIELD(v, x->v);
+ PROCESS(v, &x->v);
+ if ( v == NULL ) goto fail;
+ cd->entries[0].ptr = (void **)&x->v;
+ cd->entries[0].old = v;
+ cd->entries[0].new = NULL;
+
+ for ( i = 0; i < level; i++ )
+ {
+ READ_FIELD(x_next, x->next[i]);
+ PROCESS(x_next, &x->next[i]);
+ READ_FIELD(x_next_k, x_next->k);
+ if ( x->k > x_next_k ) { v = NULL; goto fail; }
+ cd->entries[i +1].ptr = (void **)&x->next[i];
+ cd->entries[i +1].old = x_next;
+ cd->entries[i +1].new = preds[i];
+ cd->entries[i+level+1].ptr = (void **)&preds[i]->next[i];
+ cd->entries[i+level+1].old = x;
+ cd->entries[i+level+1].new = x_next;
+ }
+
+ ret = mcas0(mcas_ptst, cd);
+ if ( ret == 0 ) v = NULL;
+
+ fail:
+ rc_down_descriptor(cd);
+ return v;
+}
+
+
+/*
+ * PUBLIC FUNCTIONS
+ */
+
+set_t *set_alloc(void)
+{
+ set_t *l;
+ node_t *n;
+ int i;
+
+ static int mcas_inited = 0;
+ if ( !CASIO(&mcas_inited, 0, 1) ) mcas_init();
+
+ n = malloc(sizeof(*n) + (NUM_LEVELS-1)*sizeof(node_t *));
+ memset(n, 0, sizeof(*n) + (NUM_LEVELS-1)*sizeof(node_t *));
+ n->k = SENTINEL_KEYMAX;
+
+ /*
+ * Set the forward pointers of final node to other than NULL,
+ * otherwise READ_FIELD() will continually execute costly barriers.
+ * Note use of 0xfc -- that doesn't look like a marked value!
+ */
+ memset(n->next, 0xfc, NUM_LEVELS*sizeof(node_t *));
+
+ l = malloc(sizeof(*l) + (NUM_LEVELS-1)*sizeof(node_t *));
+ l->head.k = SENTINEL_KEYMIN;
+ l->head.level = NUM_LEVELS;
+ for ( i = 0; i < NUM_LEVELS; i++ )
+ {
+ l->head.next[i] = n;
+ }
+
+ return(l);
+}
+
+
+setval_t set_update(set_t *l, setkey_t k, setval_t v, int overwrite)
+{
+ setval_t ov, new_ov;
+ ptst_t *ptst;
+ sh_node_pt preds[NUM_LEVELS], succs[NUM_LEVELS];
+ sh_node_pt succ, new = NULL;
+ int i, ret;
+ per_thread_state_t *mcas_ptst = NULL;
+ CasDescriptor_t *cd;
+
+ k = CALLER_TO_INTERNAL_KEY(k);
+
+ ptst = critical_enter();
+
+ do {
+ retry:
+ ov = NULL;
+
+ succ = search_predecessors(l, k, preds, succs);
+
+ if ( succ->k == k )
+ {
+ /* Already a @k node in the list: update its mapping. */
+ READ_FIELD(new_ov, succ->v);
+ do {
+ ov = new_ov;
+ PROCESS(ov, &succ->v);
+ if ( ov == NULL ) goto retry;
+ }
+ while ( overwrite && ((new_ov = CASPO(&succ->v, ov, v)) != ov) );
+
+ if ( new != NULL ) free_node(ptst, new);
+ goto out;
+ }
+
+#ifdef WEAK_MEM_ORDER
+ /* Free node from previous attempt, if this is a retry. */
+ if ( new != NULL )
+ {
+ free_node(ptst, new);
+ new = NULL;
+ }
+#endif
+
+ /* Not in the list, so initialise a new node for insertion. */
+ if ( new == NULL )
+ {
+ new = alloc_node(ptst);
+ new->k = k;
+ new->v = v;
+ }
+
+ for ( i = 0; i < new->level; i++ )
+ {
+ new->next[i] = succs[i];
+ }
+
+ if ( !mcas_ptst ) mcas_ptst = get_ptst();
+ cd = new_descriptor(mcas_ptst, new->level);
+ cd->status = STATUS_IN_PROGRESS;
+ cd->length = new->level;
+ for ( i = 0; i < new->level; i++ )
+ {
+ cd->entries[i].ptr = (void **)&preds[i]->next[i];
+ cd->entries[i].old = succs[i];
+ cd->entries[i].new = new;
+ }
+ ret = mcas0(mcas_ptst, cd);
+ rc_down_descriptor(cd);
+ }
+ while ( !ret );
+
+ out:
+ critical_exit(ptst);
+ return(ov);
+}
+
+
+setval_t set_remove(set_t *l, setkey_t k)
+{
+ setval_t v = NULL;
+ ptst_t *ptst;
+ sh_node_pt preds[NUM_LEVELS], x;
+
+ k = CALLER_TO_INTERNAL_KEY(k);
+
+ ptst = critical_enter();
+
+ do {
+ x = search_predecessors(l, k, preds, NULL);
+ if ( x->k > k ) goto out;
+ } while ( (v = finish_delete(x, preds)) == NULL );
+
+ free_node(ptst, x);
+
+ out:
+ critical_exit(ptst);
+ return(v);
+}
+
+
+setval_t set_lookup(set_t *l, setkey_t k)
+{
+ setval_t v = NULL;
+ ptst_t *ptst;
+ sh_node_pt x;
+
+ k = CALLER_TO_INTERNAL_KEY(k);
+
+ ptst = critical_enter();
+
+ x = search_predecessors(l, k, NULL, NULL);
+ if ( x->k == k )
+ {
+ READ_FIELD(v, x->v);
+ WALK_THRU(v, &x->v);
+ }
+
+ critical_exit(ptst);
+ return(v);
+}
+
+
+void _init_set_subsystem(void)
+{
+ int i;
+
+ for ( i = 0; i < NUM_LEVELS; i++ )
+ {
+ gc_id[i] = gc_add_allocator(sizeof(node_t) + i*sizeof(node_t *));
+ }
+
+}
--- /dev/null
+/******************************************************************************
+ * skip_stm.c
+ *
+ * Skip lists, allowing concurrent update by use of the STM abstraction.
+ *
+ * Copyright (c) 2003, K A Fraser
+ *
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution. Neither the name of the Keir Fraser
+ * nor the names of its contributors may be used to endorse or
+ * promote products derived from this software without specific
+ * prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define __SET_IMPLEMENTATION__
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include "portable_defns.h"
+#include "gc.h"
+#include "stm.h"
+#include "set.h"
+
+typedef struct node_st node_t;
+typedef stm_blk set_t;
+
+struct node_st
+{
+ int level;
+ setkey_t k;
+ setval_t v;
+ stm_blk *next[NUM_LEVELS];
+};
+
+static struct {
+ CACHE_PAD(0);
+ stm *memory; /* read-only */
+ CACHE_PAD(2);
+} shared;
+
+#define MEMORY (shared.memory)
+
+/*
+ * Random level generator. Drop-off rate is 0.5 per level.
+ * Returns value 1 <= level <= NUM_LEVELS.
+ */
+static int get_level(ptst_t *ptst)
+{
+ unsigned long r = rand_next(ptst);
+ int l = 1;
+ r = (r >> 4) & ((1 << (NUM_LEVELS-1)) - 1);
+ while ( (r & 1) ) { l++; r >>= 1; }
+ return l;
+}
+
+
+/*
+ * Search for first non-deleted node, N, with key >= @k at each level in @l.
+ * RETURN VALUES:
+ * Array @pa: @pa[i] is non-deleted predecessor of N at level i
+ * Array @na: @na[i] is N itself, which should be pointed at by @pa[i]
+ * MAIN RETURN VALUE: same as @na[0], direct pointer open for reading.
+ */
+static node_t *search_predecessors(
+ ptst_t *ptst, stm_tx *tx, set_t *l, setkey_t k, stm_blk **pa, stm_blk **na)
+{
+ stm_blk *xb, *x_nextb;
+ node_t *x, *x_next;
+ int i;
+
+ xb = l;
+ x = read_stm_blk(ptst, tx, l);
+ for ( i = NUM_LEVELS - 1; i >= 0; i-- )
+ {
+ for ( ; ; )
+ {
+ x_nextb = x->next[i];
+ x_next = read_stm_blk(ptst, tx, x_nextb);
+ if ( x_next->k >= k ) break;
+ xb = x_nextb;
+ x = x_next;
+ }
+
+ if ( pa ) pa[i] = xb;
+ if ( na ) na[i] = x_nextb;
+ }
+
+ return x_next;
+}
+
+
+/*
+ * PUBLIC FUNCTIONS
+ */
+
+set_t *set_alloc(void)
+{
+ ptst_t *ptst;
+ stm_blk *hb, *tb;
+ node_t *h, *t;
+ int i;
+
+ ptst = critical_enter();
+
+ tb = new_stm_blk(ptst, MEMORY);
+ t = init_stm_blk(ptst, MEMORY, tb);
+ memset(t, 0, sizeof(*t));
+ t->k = SENTINEL_KEYMAX;
+
+ hb = new_stm_blk(ptst, MEMORY);
+ h = init_stm_blk(ptst, MEMORY, hb);
+ memset(h, 0, sizeof(*h));
+ h->k = SENTINEL_KEYMIN;
+ h->level = NUM_LEVELS;
+ for ( i = 0; i < NUM_LEVELS; i++ )
+ h->next[i] = tb;
+
+ critical_exit(ptst);
+
+ return hb;
+}
+
+
+setval_t set_update(set_t *l, setkey_t k, setval_t v, int overwrite)
+{
+ ptst_t *ptst;
+ stm_tx *tx;
+ setval_t ov;
+ stm_blk *bpreds[NUM_LEVELS], *bsuccs[NUM_LEVELS], *newb = NULL;
+ node_t *x, *p, *new;
+ int i;
+
+ k = CALLER_TO_INTERNAL_KEY(k);
+
+ ptst = critical_enter();
+
+ do {
+ new_stm_tx(tx, ptst, MEMORY);
+ x = search_predecessors(ptst, tx, l, k, bpreds, bsuccs);
+
+ if ( x->k == k )
+ {
+ x = write_stm_blk(ptst, tx, bsuccs[0]);
+ ov = x->v;
+ x->v = v;
+ }
+ else
+ {
+ ov = NULL;
+
+ if ( newb == NULL )
+ {
+ newb = new_stm_blk(ptst, MEMORY);
+ new = init_stm_blk(ptst, MEMORY, newb);
+ new->k = k;
+ new->v = v;
+ new->level = get_level(ptst);
+ }
+
+ for ( i = 0; i < new->level; i++ )
+ {
+ new->next[i] = bsuccs[i];
+ p = write_stm_blk(ptst, tx, bpreds[i]);
+ p->next[i] = newb;
+ }
+ }
+ }
+ while ( !commit_stm_tx(ptst, tx) );
+
+ if ( (ov != NULL) && (newb != NULL) )
+ free_stm_blk(ptst, MEMORY, newb);
+
+ critical_exit(ptst);
+
+ return ov;
+}
+
+
+setval_t set_remove(set_t *l, setkey_t k)
+{
+ setval_t v;
+ ptst_t *ptst;
+ stm_tx *tx;
+ stm_blk *bpreds[NUM_LEVELS], *bsuccs[NUM_LEVELS];
+ node_t *p, *x;
+ int i;
+
+ k = CALLER_TO_INTERNAL_KEY(k);
+
+ ptst = critical_enter();
+
+ do {
+ new_stm_tx(tx, ptst, MEMORY);
+ x = search_predecessors(ptst, tx, l, k, bpreds, bsuccs);
+ if ( x->k == k )
+ {
+ v = x->v;
+ for ( i = 0; i < x->level; i++ )
+ {
+ p = write_stm_blk(ptst, tx, bpreds[i]);
+ p->next[i] = x->next[i];
+ }
+ }
+ else
+ {
+ v = NULL;
+ }
+ }
+ while ( !commit_stm_tx(ptst, tx) );
+
+ if ( v != NULL )
+ free_stm_blk(ptst, MEMORY, bsuccs[0]);
+
+ critical_exit(ptst);
+
+ return v;
+}
+
+
+setval_t set_lookup(set_t *l, setkey_t k)
+{
+ setval_t v;
+ ptst_t *ptst;
+ stm_tx *tx;
+ node_t *x;
+
+ k = CALLER_TO_INTERNAL_KEY(k);
+
+ ptst = critical_enter();
+
+ do {
+ new_stm_tx(tx, ptst, MEMORY);
+ x = search_predecessors(ptst, tx, l, k, NULL, NULL);
+ v = (x->k == k) ? x->v : NULL;
+ }
+ while ( !commit_stm_tx(ptst, tx) );
+
+ critical_exit(ptst);
+
+ return v;
+}
+
+
+void _init_set_subsystem(void)
+{
+ ptst_t *ptst = critical_enter();
+ _init_stm_subsystem(0);
+ MEMORY = new_stm(ptst, sizeof(node_t));
+ critical_exit(ptst);
+}
--- /dev/null
+#ifndef __SPARC_DEFNS_H__
+#define __SPARC_DEFNS_H__
+
+#ifndef SPARC
+#define SPARC
+#endif
+
+#include <sys/types.h>
+#include <sys/processor.h>
+#include <sys/procset.h>
+#include <sched.h>
+#include <alloca.h>
+
+#define CACHE_LINE_SIZE 64
+
+#if 1
+#include <thread.h>
+#define pthread_mutex_t mutex_t
+#define pthread_cond_t cond_t
+#define pthread_t thread_t
+#define pthread_key_t thread_key_t
+#define pthread_create(_a,_b,_c,_d) thr_create(NULL,0,_c,_d,THR_BOUND|THR_NEW_LWP,_a)
+#define pthread_join(_a,_b) thr_join(_a,NULL,NULL)
+#define pthread_key_create(_a,_b) thr_keycreate(_a,_b)
+#define pthread_setspecific(_a,_b) thr_setspecific(_a,_b)
+static void *pthread_getspecific(pthread_key_t _a)
+{
+ void *__x;
+ thr_getspecific(_a,&__x);
+ return __x;
+}
+#define pthread_setconcurrency(_x) thr_setconcurrency(_x)
+#define pthread_mutex_init(_a,_b) mutex_init(_a,USYNC_THREAD,NULL)
+#define pthread_mutex_lock(_a) mutex_lock(_a)
+#define pthread_mutex_unlock(_a) mutex_unlock(_a)
+#define pthread_cond_init(_a,_b) cond_init(_a,USYNC_THREAD,NULL)
+#define pthread_cond_wait(_a,_b) cond_wait(_a,_b)
+#define pthread_cond_broadcast(_a) cond_broadcast(_a)
+#else
+#include <pthread.h>
+#endif
+
+
+/*
+ * I. Compare-and-swap.
+ */
+
+typedef unsigned long long _u64;
+
+extern int CASIO_internal(int *, int, int);
+extern void * CASPO_internal(void *, void *, void *);
+extern _u64 CAS64O_internal(_u64 *, _u64, _u64);
+#define CASIO(_a,_o,_n) (CASIO_internal((int*)(_a),(int)(_o),(int)(_n)))
+#define CASPO(_a,_o,_n) (CASPO_internal((void*)(_a),(void*)(_o),(void*)(_n)))
+#define CAS32O(_a,_o,_n) (_u32)(CASIO_internal((int *)_a,(int)_o,(int)_n))
+#define CAS64O(_a,_o,_n) (CAS64O_internal((_u64 *)_a,(_u64)_o,(_u64)_n))
+
+static int FASIO(int *a, int n)
+{
+ int no, o = *a;
+ while ( (no = CASIO(a, o, n)) != o ) o = no;
+ return o;
+}
+
+static void *FASPO(void *a, void *n)
+{
+ void *no, *o = *(void **)a;
+ while ( (no = CASPO(a, o, n)) != o ) o = no;
+ return o;
+}
+
+
+/*
+ * II. Memory barriers.
+ * WMB(): All preceding write operations must commit before any later writes.
+ * RMB(): All preceding read operations must commit before any later reads.
+ * MB(): All preceding memory accesses must commit before any later accesses.
+ *
+ * If the compiler does not observe these barriers (but any sane compiler
+ * will!), then VOLATILE should be defined as 'volatile'.
+ */
+
+extern void MEMBAR_ALL(void);
+extern void MEMBAR_STORESTORE(void);
+extern void MEMBAR_LOADLOAD(void);
+#define MB() MEMBAR_ALL()
+#define WMB() MEMBAR_STORESTORE()
+#define RMB() MEMBAR_LOADLOAD()
+#define VOLATILE /*volatile*/
+
+
+/*
+ * III. Cycle counter access.
+ */
+
+typedef unsigned long tick_t;
+extern tick_t RDTICK(void);
+
+
+/*
+ * IV. Types.
+ */
+
+typedef unsigned char _u8;
+typedef unsigned short _u16;
+typedef unsigned int _u32;
+
+#endif /* __SPARC_DEFNS_H__ */
--- /dev/null
+.inline MEMBAR_ALL, 0
+ membar #StoreStore | #LoadLoad | #LoadStore | #StoreLoad
+.end
+
+.inline MEMBAR_STORESTORE, 0
+ membar #StoreStore
+.end
+
+.inline MEMBAR_LOADLOAD, 0
+ membar #LoadLoad
+.end
+
+.inline CASPO_internal
+ casx [%o0], %o1, %o2
+ mov %o2, %o0
+.end
+
+.inline CAS64O_internal
+ casx [%o0], %o1, %o2
+ mov %o2, %o0
+.end
+
+.inline CASIO_internal
+ cas [%o0], %o1, %o2
+ mov %o2, %o0
+.end
+
+.inline RDTICK
+ rd %tick, %o0
+.end
--- /dev/null
+/******************************************************************************
+ * stm.h
+ *
+ * Interface definitions for software transactional memory (STM).
+ *
+ * Copyright (c) 2002-2003, K A Fraser
+ */
+
+#include "ptst.h"
+#include <setjmp.h>
+
+typedef struct stm_st stm;
+typedef struct stm_blk_st stm_blk;
+typedef struct stm_tx_st stm_tx;
+
+stm *new_stm(ptst_t *ptst, int blk_size);
+void free_stm(ptst_t *ptst, stm *mem);
+
+stm_blk *new_stm_blk(ptst_t *ptst, stm *mem);
+void free_stm_blk(ptst_t *ptst, stm *mem, stm_blk *b);
+void *init_stm_blk(ptst_t *ptst, stm *mem, stm_blk *b);
+int sizeof_stm_blk(ptst_t *ptst, stm *mem, stm_blk *b);
+
+stm_tx *new_stm_tx(ptst_t *ptst, stm *mem, sigjmp_buf *penv);
+bool_t commit_stm_tx(ptst_t *ptst, stm_tx *t);
+bool_t validate_stm_tx(ptst_t *ptst, stm_tx *t);
+/* NB. Must still call commit after abort, but it's guaranteed to fail. */
+void abort_stm_tx(ptst_t *ptst, stm_tx *t);
+
+void *read_stm_blk(ptst_t *ptst, stm_tx *t, stm_blk *b);
+void *write_stm_blk(ptst_t *ptst, stm_tx *t, stm_blk *b);
+
+void remove_from_tx(ptst_t *ptst, stm_tx *t, stm_blk *b);
+
+void _init_stm_subsystem(int pad_data);
+
+#define new_stm_tx(_tx, _ptst, _mem) \
+ do { \
+ sigjmp_buf env; \
+ sigsetjmp(env, 1); \
+ (_tx) = new_stm_tx((_ptst), (_mem), &env); \
+ } while ( 0 )
--- /dev/null
+/******************************************************************************
+ * stm_fraser.c
+ *
+ * Lock-free software transactional memory (STM).
+ *
+ * Copyright (c) 2002-2003, K A Fraser
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution. Neither the name of the Keir Fraser
+ * nor the names of its contributors may be used to endorse or
+ * promote products derived from this software without specific
+ * prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "portable_defns.h"
+#include "ptst.h"
+#include "gc.h"
+#include <assert.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <setjmp.h>
+#include <signal.h>
+
+typedef struct stm_blk_st stm_blk;
+typedef struct stm_tx_entry_st stm_tx_entry;
+typedef struct stm_tx_st stm_tx;
+typedef struct stm_st stm;
+
+struct stm_blk_st {
+ void *data;
+};
+
+struct stm_tx_entry_st {
+ stm_blk *b;
+ void *old;
+ void *new;
+ stm_tx_entry *next;
+};
+
+struct stm_tx_st {
+ int status;
+ int rc;
+ stm_tx *next_free;
+ stm_tx_entry *reads;
+ stm_tx_entry *writes;
+ stm_tx_entry *alloc_ptr, *check;
+ int gc_data_id, blk_size; /* copied from 'stm' structure */
+ sigjmp_buf *penv;
+};
+
+struct stm_st {
+ int gc_data_id;
+ int blk_size;
+};
+
+/* Private per-thread state. The array is indexed off ptst->id. */
+typedef struct {
+ void *arena, *arena_lim;
+ stm_tx *next_descriptor;
+ stm_tx *cur_tx;
+ CACHE_PAD(0);
+} priv_t;
+
+static priv_t priv_ptst[MAX_THREADS];
+static int gc_blk_id; /* Allocation id for block descriptors. */
+static int do_padding; /* Should all allocations be padded to a cache line? */
+
+#define ALLOCATOR_SIZE(_s) (do_padding ? CACHE_LINE_SIZE : (_s))
+
+#define ARENA_SIZE 40960
+#define DESCRIPTOR_SIZE 4096
+
+#define TXS_IN_PROGRESS 0
+#define TXS_READ_PHASE 1
+#define TXS_FAILED 2
+#define TXS_SUCCESSFUL 3
+
+#define is_descriptor(_p) ((unsigned long)(_p) & 1)
+#define ptr_to_descriptor(_p) ((stm_tx *)((unsigned long)(_p) & ~1))
+#define make_marked_ptr(_p) ((void *)((unsigned long)(_p) | 1))
+
+/* Is transaction read-only? */
+#define read_only(_t) ((_t)->writes == NULL)
+
+bool_t commit_stm_tx(ptst_t *ptst, stm_tx *t);
+
+static void new_arena (priv_t *priv, int size)
+{
+ priv->arena = malloc(size);
+ if ( priv->arena == NULL ) abort();
+ priv->arena_lim = (((char *) priv->arena) + size);
+}
+
+static void release_descriptor(ptst_t *ptst, stm_tx *t)
+{
+ stm_tx_entry *ent;
+ priv_t *priv = &priv_ptst[ptst->id];
+ void *data;
+
+ assert(t->status >= TXS_FAILED);
+
+ t->next_free = priv->next_descriptor;
+ priv->next_descriptor = t;
+
+ if ( t->status == TXS_SUCCESSFUL )
+ {
+ for ( ent = t->writes; ent != NULL; ent = ent->next )
+ {
+ gc_free(ptst, ent->old, t->gc_data_id);
+ }
+ }
+ else
+ {
+ for ( ent = t->writes; ent != NULL; ent = ent->next )
+ {
+ gc_unsafe_free(ptst, ent->new, t->gc_data_id);
+ }
+ }
+}
+
+static int rc_delta_descriptor(stm_tx *t, int delta)
+{
+ int rc, new_rc = t->rc;
+
+ do { rc = new_rc; }
+ while ( (new_rc = CASIO (&t->rc, rc, rc + delta)) != rc );
+
+ return rc;
+}
+
+static void rc_up_descriptor(stm_tx *t)
+{
+ rc_delta_descriptor(t, 2);
+ MB();
+}
+
+static void rc_down_descriptor(ptst_t *ptst, stm_tx *t)
+{
+ int old_rc, new_rc, cur_rc = t->rc;
+
+ WMB();
+
+ do {
+ old_rc = cur_rc;
+ new_rc = old_rc - 2;
+ if ( new_rc == 0 ) new_rc = 1;
+ }
+ while ( (cur_rc = CASIO (&t->rc, old_rc, new_rc)) != old_rc );
+
+ if ( old_rc == 2 ) release_descriptor(ptst, t);
+}
+
+static stm_tx *new_descriptor(priv_t *priv)
+{
+ stm_tx *t;
+
+ t = priv->next_descriptor;
+
+ if ( t != NULL )
+ {
+ priv->next_descriptor = t->next_free;
+ /* 'Unfree' descriptor, if it was previously freed. */
+ if ( (t->rc & 1) == 1 ) rc_delta_descriptor(t, 1);
+ }
+ else
+ {
+ t = (stm_tx *) priv->arena;
+ priv->arena = ((char *) (priv->arena)) + DESCRIPTOR_SIZE;
+
+ if ( priv->arena >= priv->arena_lim )
+ {
+ new_arena(priv, ARENA_SIZE);
+ t = (stm_tx *) priv->arena;
+ priv->arena = ((char *) (priv->arena)) + DESCRIPTOR_SIZE;
+ }
+
+ t->next_free = NULL;
+ t->rc = 2;
+ }
+
+ return t;
+}
+
+
+static stm_tx_entry *alloc_stm_tx_entry(stm_tx *t)
+{
+ stm_tx_entry *ent = t->alloc_ptr++;
+ assert(((unsigned long)t->alloc_ptr - (unsigned long)t) <=
+ DESCRIPTOR_SIZE);
+ return ent;
+}
+
+
+static stm_tx_entry **search_stm_tx_entry(stm_tx_entry **pnext, stm_blk *b)
+{
+ stm_tx_entry *next = *pnext;
+
+ while ( (next != NULL) && ((unsigned long)next->b < (unsigned long)b) )
+ {
+ pnext = &next->next;
+ next = *pnext;
+ }
+
+ return pnext;
+}
+
+
+static void *read_blk_data(ptst_t *ptst, stm_blk *b)
+{
+ void *data;
+ stm_tx *t;
+ int status;
+ stm_tx_entry **pent;
+
+ for ( ; ; )
+ {
+ data = b->data;
+ if ( !is_descriptor(data) ) return data;
+
+ t = ptr_to_descriptor(data);
+ rc_up_descriptor(t);
+ if ( b->data != data )
+ {
+ rc_down_descriptor(ptst, t);
+ continue;
+ }
+
+ /*
+ * Commit even when we could just read from descriptor, as it gets
+ * the descriptor out of the way in future.
+ */
+ commit_stm_tx(ptst, t);
+ }
+}
+
+
+stm *new_stm(ptst_t *ptst, int blk_size)
+{
+ stm *mem = malloc(CACHE_LINE_SIZE);
+ mem->blk_size = blk_size;
+ mem->gc_data_id = gc_add_allocator(ALLOCATOR_SIZE(blk_size));
+ return mem;
+}
+
+
+void free_stm(ptst_t *ptst, stm *mem)
+{
+ gc_remove_allocator(mem->gc_data_id);
+ free(mem);
+}
+
+
+stm_blk *new_stm_blk(ptst_t *ptst, stm *mem)
+{
+ stm_blk *b;
+ b = gc_alloc(ptst, gc_blk_id);
+ b->data = gc_alloc(ptst, mem->gc_data_id);
+ return b;
+}
+
+
+void free_stm_blk(ptst_t *ptst, stm *mem, stm_blk *b)
+{
+ /*
+ * We have to use read_stm_blk(), as some doomed transaction may still
+ * install a marked pointer here while in its write phase.
+ */
+ void *data = read_blk_data(ptst, b);
+ assert(!is_descriptor(data));
+ gc_free(ptst, data, mem->gc_data_id);
+ gc_free(ptst, b, gc_blk_id);
+}
+
+
+void *init_stm_blk(ptst_t *ptst, stm *mem, stm_blk *b)
+{
+ return b->data;
+}
+
+
+int sizeof_stm_blk(ptst_t *ptst, stm *mem, stm_blk *b)
+{
+ return mem->blk_size;
+}
+
+
+stm_tx *new_stm_tx(ptst_t *ptst, stm *mem, sigjmp_buf *penv)
+{
+ priv_t *priv = &priv_ptst[ptst->id];
+ stm_tx *t;
+
+ if ( priv->cur_tx != NULL ) goto nesting;
+ t = new_descriptor(priv);
+ t->status = TXS_IN_PROGRESS;
+ t->reads = t->writes = NULL;
+ t->alloc_ptr = t->check = (stm_tx_entry *)(t + 1);
+ t->gc_data_id = mem->gc_data_id;
+ t->blk_size = mem->blk_size;
+ t->penv = penv;
+ priv->cur_tx = t;
+ return t;
+
+ nesting:
+ fprintf(stderr, "No nesting of transactions is allowed\n");
+ return NULL;
+}
+
+
+bool_t commit_stm_tx(ptst_t *ptst, stm_tx *t)
+{
+ int desired_status, other_status, old_status, new_status, final_status;
+ void *marked_tx, *data;
+ stm_tx *other;
+ stm_tx_entry **other_pent, *ent;
+ priv_t *priv = &priv_ptst[ptst->id];
+
+ if ( priv->cur_tx == t ) priv->cur_tx = NULL;
+
+ marked_tx = make_marked_ptr(t);
+ desired_status = TXS_FAILED;
+
+ /*
+ * PHASE 1: WRITE-CHECKING PHASE.
+ */
+ if ( (t->status == TXS_IN_PROGRESS) && ((ent = t->writes) != NULL) )
+ {
+ /* Others should see up-to-date contents of descriptor. */
+ WMB();
+
+ do {
+ for ( ; ; )
+ {
+ data = CASPO(&ent->b->data, ent->old, marked_tx);
+ if ( (data == ent->old) || (data == marked_tx) ) break;
+
+ if ( !is_descriptor(data) ) goto fail;
+
+ other = ptr_to_descriptor(data);
+ rc_up_descriptor(other);
+ if ( ent->b->data != data )
+ {
+ rc_down_descriptor(ptst, other);
+ continue;
+ }
+
+ commit_stm_tx(ptst, other);
+ }
+ }
+ while ( (ent = ent->next) != NULL );
+ }
+
+ /* On success we linearise at this point. */
+ WEAK_DEP_ORDER_WMB();
+
+ /*
+ * PHASE 2: READ-CHECKING PHASE.
+ */
+ if ( (t->status <= TXS_READ_PHASE) && (t->reads != NULL) )
+ {
+ if ( !read_only(t) )
+ {
+ CASIO(&t->status, TXS_IN_PROGRESS, TXS_READ_PHASE);
+ MB_NEAR_CAS();
+ }
+ else MB();
+
+ for ( ent = t->reads; ent != NULL; ent = ent->next )
+ {
+ for ( ; ; )
+ {
+ data = ent->b->data;
+ if ( data == ent->old ) break;
+
+ /* Someone else made progress at our expense. */
+ if ( !is_descriptor(data) ) goto fail;
+ other = ptr_to_descriptor(data);
+
+ /*
+ * Descriptor always belongs to a contending operation.
+ * Before continuing, we must increment the reference count.
+ */
+ assert(other != t);
+ rc_up_descriptor(other);
+ if ( ent->b->data != data )
+ {
+ rc_down_descriptor(ptst, other);
+ continue;
+ }
+
+ /*
+ * What we do now depends on the status of the contending
+ * operation. This is easy for any status other than
+ * TXS_READ_PHASE -- usually we just check against the
+ * appropriate 'old' or 'new' data pointer. Transactions
+ * in their read-checking phase must be aborted, or helped
+ * to completion, depending on relative ordering of the
+ * transaction descriptors.
+ */
+ while ( (other_status = other->status) == TXS_READ_PHASE )
+ {
+ if ( t < other )
+ {
+ CASIO(&other->status, TXS_READ_PHASE, TXS_FAILED);
+ }
+ else
+ {
+ rc_up_descriptor(other);
+ commit_stm_tx(ptst, other);
+ }
+ }
+
+ other_pent = search_stm_tx_entry(&other->writes, ent->b);
+ assert(*other_pent != NULL);
+ data = (other_status == TXS_SUCCESSFUL) ?
+ (*other_pent)->new : (*other_pent)->old;
+ rc_down_descriptor(ptst, other);
+ if ( data != ent->old ) goto fail;
+
+ break;
+ }
+ }
+ }
+
+ desired_status = TXS_SUCCESSFUL;
+
+ fail:
+ if ( read_only(t) )
+ {
+ /* A very fast path: we can immediately reuse the descriptor. */
+ t->next_free = priv->next_descriptor;
+ priv->next_descriptor = t;
+ return desired_status == TXS_SUCCESSFUL;
+ }
+
+ /* Loop until we push the status to a "final decision" value. */
+ old_status = t->status;
+ while ( old_status <= TXS_READ_PHASE )
+ {
+ new_status = CASIO(&t->status, old_status, desired_status);
+ if ( old_status == new_status ) break;
+ old_status = new_status;
+ }
+ WMB_NEAR_CAS();
+
+ /*
+ * PHASE 3: CLEAN-UP.
+ */
+ final_status = t->status;
+ for ( ent = t->writes; ent != NULL; ent = ent->next )
+ {
+ /* If CAS fails, someone did it for us already. */
+ (void)CASPO(&ent->b->data, marked_tx,
+ (final_status == TXS_SUCCESSFUL) ? ent->new: ent->old);
+ }
+
+ rc_down_descriptor(ptst, t);
+ return final_status == TXS_SUCCESSFUL;
+}
+
+
+bool_t validate_stm_tx(ptst_t *ptst, stm_tx *t)
+{
+ stm_tx_entry *ent;
+
+ RMB();
+
+ for ( ent = t->reads; ent != NULL; ent = ent->next )
+ {
+ if ( read_blk_data(ptst, ent->b) != ent->old ) goto fail;
+ }
+
+ for ( ent = t->writes; ent != NULL; ent = ent->next )
+ {
+ if ( read_blk_data(ptst, ent->b) != ent->old ) goto fail;
+ }
+
+ return TRUE;
+
+ fail:
+ t->status = TXS_FAILED;
+ return FALSE;
+}
+
+
+void abort_stm_tx(ptst_t *ptst, stm_tx *t)
+{
+ t->status = TXS_FAILED;
+}
+
+
+void *read_stm_blk(ptst_t *ptst, stm_tx *t, stm_blk *b)
+{
+ stm_tx_entry **pent, *ent;
+ sigjmp_buf *penv;
+ void *result;
+
+ pent = search_stm_tx_entry(&t->writes, b);
+ ent = *pent;
+ if ( (ent != NULL) && (ent->b == b) ) goto found;
+
+ pent = search_stm_tx_entry(&t->reads, b);
+ ent = *pent;
+ if ( (ent != NULL) && (ent->b == b) ) goto found;
+
+ ent = alloc_stm_tx_entry(t);
+ ent->b = b;
+ ent->old = read_blk_data(ptst, b);
+ ent->new = ent->old;
+ ent->next = *pent;
+ *pent = ent;
+
+ assert(!is_descriptor(ent->new));
+ return ent->new;
+
+ found:
+ result = ent->new;
+ ent = t->check;
+ if ( read_blk_data(ptst, ent->b) != ent->old ) goto fail;
+ if ( ++t->check == t->alloc_ptr ) t->check = (stm_tx_entry *)(t + 1);
+ return result;
+
+ fail:
+ penv = t->penv;
+ abort_stm_tx(ptst, t);
+ commit_stm_tx(ptst, t);
+ siglongjmp(*penv, 0);
+ assert(0);
+ return NULL;
+}
+
+
+void *write_stm_blk(ptst_t *ptst, stm_tx *t, stm_blk *b)
+{
+ stm_tx_entry **r_pent, **w_pent, *ent;
+ sigjmp_buf *penv;
+ void *result;
+
+ w_pent = search_stm_tx_entry(&t->writes, b);
+ ent = *w_pent;
+ if ( (ent != NULL) && (ent->b == b) ) goto found;
+
+ r_pent = search_stm_tx_entry(&t->reads, b);
+ ent = *r_pent;
+ if ( (ent != NULL) && (ent->b == b) )
+ {
+ *r_pent = ent->next;
+ }
+ else
+ {
+ ent = alloc_stm_tx_entry(t);
+ ent->b = b;
+ ent->old = read_blk_data(ptst, b);
+ }
+
+ ent->new = gc_alloc(ptst, t->gc_data_id);
+ ent->next = *w_pent;
+ *w_pent = ent;
+ memcpy(ent->new, ent->old, t->blk_size);
+
+ assert(!is_descriptor(ent->old));
+ assert(!is_descriptor(ent->new));
+ return ent->new;
+
+ found:
+ result = ent->new;
+ ent = t->check;
+ if ( read_blk_data(ptst, ent->b) != ent->old ) goto fail;
+ if ( ++t->check == t->alloc_ptr ) t->check = (stm_tx_entry *)(t + 1);
+ return result;
+
+ fail:
+ penv = t->penv;
+ abort_stm_tx(ptst, t);
+ commit_stm_tx(ptst, t);
+ siglongjmp(*penv, 0);
+ assert(0);
+ return NULL;
+}
+
+
+void remove_from_tx(ptst_t *ptst, stm_tx *t, stm_blk *b)
+{
+ stm_tx_entry **pent, *ent;
+ void *data;
+
+ pent = search_stm_tx_entry(&t->writes, b);
+ ent = *pent;
+ if ( (ent != NULL) && (ent->b == b) )
+ {
+ *pent = ent->next;
+ data = ent->new;
+ assert(!is_descriptor(data));
+ gc_free(ptst, data, t->gc_data_id);
+ return;
+ }
+
+ pent = search_stm_tx_entry(&t->reads, b);
+ ent = *pent;
+ if ( (ent != NULL) && (ent->b == b) )
+ {
+ *pent = ent->next;
+ }
+}
+
+
+static void handle_fault(int sig)
+{
+ ptst_t *ptst;
+ stm_tx *t;
+
+ ptst = critical_enter();
+ t = priv_ptst[ptst->id].cur_tx;
+ if ( (t != NULL) && !validate_stm_tx(ptst, t) )
+ {
+ sigjmp_buf *penv = t->penv;
+ commit_stm_tx(ptst, t);
+ critical_exit(ptst);
+ siglongjmp(*penv, 0);
+ }
+
+ fail:
+ fprintf(stderr, "Error: unhandleable SIGSEGV!\n");
+ abort();
+}
+
+
+void _init_stm_subsystem(int pad_data)
+{
+ struct sigaction act;
+
+ do_padding = pad_data;
+ gc_blk_id = gc_add_allocator(ALLOCATOR_SIZE(sizeof(stm_blk)));
+ memset(priv_ptst, 0, sizeof(priv_ptst));
+
+ act.sa_handler = handle_fault;
+ sigemptyset(&act.sa_mask);
+ act.sa_flags = 0;
+ sigaction(SIGSEGV, &act, NULL);
+}
--- /dev/null
+/******************************************************************************
+ * stm_herlihy.c
+ *
+ * Obstruction-free software transactional memory (STM).
+ *
+ * For more information see:
+ * Software Transactional Memory for Dynamic-sized Data Structures
+ * Maurice Herlihy, Victor Luchangco, Mark Moir, and William Scherer III
+ * Proceedings of 2003 ACM Symposium on Principles of Distributed Computing
+ *
+ * Copyright (c) 2003, K A Fraser
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution. Neither the name of the Keir Fraser
+ * nor the names of its contributors may be used to endorse or
+ * promote products derived from this software without specific
+ * prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "portable_defns.h"
+#include "ptst.h"
+#include "gc.h"
+#include <assert.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <setjmp.h>
+#include <signal.h>
+#include <unistd.h>
+#ifdef SPARC
+#include <time.h>
+#include <errno.h>
+#endif
+
+#define POLITE
+
+typedef struct stm_loc_st stm_loc;
+typedef struct stm_blk_st stm_blk;
+typedef struct stm_tx_entry_st stm_tx_entry;
+typedef struct stm_tx_st stm_tx;
+typedef struct stm_st stm;
+
+struct stm_loc_st {
+ unsigned long status; /* TXS_FAILED, TXS_SUCCESSFUL, descriptor. */
+ void *old;
+ void *new;
+};
+
+struct stm_blk_st {
+ stm_loc *loc;
+};
+
+struct stm_tx_entry_st {
+ stm_blk *b;
+ stm_loc *l;
+ void *data;
+ stm_tx_entry *next;
+};
+
+struct stm_tx_st {
+ unsigned int status;
+ int rc;
+ stm_tx *next_free;
+ stm_tx_entry *reads;
+ stm_tx_entry *writes;
+ stm_tx_entry *alloc_ptr, *check;
+ void *dummy;
+ int gc_data_id, blk_size; /* copied from 'stm' structure */
+ sigjmp_buf *penv;
+};
+
+struct stm_st {
+ int gc_data_id;
+ int blk_size;
+};
+
+/* Private per-thread state. The array is indexed off ptst->id. */
+typedef struct {
+ void *arena, *arena_lim;
+ stm_tx *next_descriptor;
+ stm_tx *cur_tx;
+#ifdef SPARC
+ unsigned int random_counter;
+#endif
+ CACHE_PAD(0);
+} priv_t;
+
+static priv_t priv_ptst[MAX_THREADS];
+static int gc_blk_id; /* Allocation id for block descriptors. */
+static int gc_loc_id; /* Allocation id for locators. */
+static int do_padding; /* Should all allocations be padded to a cache line? */
+
+#ifdef POLITE
+#define MAX_RETRIES 8
+#ifdef SPARC
+#define MIN_LOG_BACKOFF 4
+#define MAX_LOG_BACKOFF 31
+#define RANDOM_BITS 8
+#define RANDOM_SIZE (1 << RANDOM_BITS)
+#define RANDOM_MASK (RANDOM_SIZE - 1)
+static unsigned int rand_arr[RANDOM_SIZE];
+#endif
+#endif
+
+static stm_blk *dummy_obj; /* Dummy object (used by red-black trees). */
+static void *dummy_data;
+
+#define ALLOCATOR_SIZE(_s) (do_padding ? CACHE_LINE_SIZE : (_s))
+
+#define ARENA_SIZE 40960
+#define DESCRIPTOR_SIZE 4096
+
+#define TXS_IN_PROGRESS 0U
+#define TXS_FAILED 1U
+#define TXS_SUCCESSFUL 2U
+
+#define is_descriptor(_p) (((unsigned long)(_p) & 3) == 0)
+#define mk_descriptor(_p) ((stm_tx *)(_p))
+
+/* Is transaction read-only? */
+#define read_only(_t) ((_t)->writes == NULL)
+
+/* Is transaction definitely doomed to fail? */
+#define is_stale(_t, _e) \
+ (((_t)->status != TXS_IN_PROGRESS) || ((_e)->b->loc != (_e)->l))
+
+bool_t commit_stm_tx(ptst_t *ptst, stm_tx *t);
+
+static void new_arena (priv_t *priv, int size)
+{
+ priv->arena = malloc(size);
+ if ( priv->arena == NULL ) abort();
+ priv->arena_lim = (((char *) priv->arena) + size);
+}
+
+static void release_descriptor(ptst_t *ptst, stm_tx *t)
+{
+ stm_tx_entry *ent;
+ priv_t *priv = &priv_ptst[ptst->id];
+ void *data;
+
+ t->next_free = priv->next_descriptor;
+ priv->next_descriptor = t;
+}
+
+static int rc_delta_descriptor(stm_tx *t, int delta)
+{
+ int rc, new_rc = t->rc;
+
+ do { rc = new_rc; }
+ while ( (new_rc = CASIO (&t->rc, rc, rc + delta)) != rc );
+
+ return rc;
+}
+
+static void rc_up_descriptor(stm_tx *t)
+{
+ rc_delta_descriptor(t, 2);
+ MB();
+}
+
+static void rc_down_descriptor(ptst_t *ptst, stm_tx *t)
+{
+ int old_rc, new_rc, cur_rc = t->rc;
+
+ WMB();
+
+ do {
+ old_rc = cur_rc;
+ new_rc = old_rc - 2;
+ if ( new_rc == 0 ) new_rc = 1;
+ }
+ while ( (cur_rc = CASIO (&t->rc, old_rc, new_rc)) != old_rc );
+
+ if ( old_rc == 2 ) release_descriptor(ptst, t);
+}
+
+static stm_tx *new_descriptor(priv_t *priv)
+{
+ stm_tx *t;
+
+ t = priv->next_descriptor;
+
+ if ( t != NULL )
+ {
+ priv->next_descriptor = t->next_free;
+ /* 'Unfree' descriptor, if it was previously freed. */
+ if ( (t->rc & 1) == 1 ) rc_delta_descriptor(t, 1);
+ }
+ else
+ {
+ t = (stm_tx *) priv->arena;
+ priv->arena = ((char *) (priv->arena)) + DESCRIPTOR_SIZE;
+
+ if ( priv->arena >= priv->arena_lim )
+ {
+ new_arena(priv, ARENA_SIZE);
+ t = (stm_tx *) priv->arena;
+ priv->arena = ((char *) (priv->arena)) + DESCRIPTOR_SIZE;
+ }
+
+ t->next_free = NULL;
+ t->rc = 2;
+ }
+
+ return t;
+}
+
+
+static stm_tx_entry *alloc_stm_tx_entry(stm_tx *t)
+{
+ stm_tx_entry *ent = t->alloc_ptr++;
+ assert(((unsigned long)t->alloc_ptr - (unsigned long)t) <=
+ DESCRIPTOR_SIZE);
+ return ent;
+}
+
+
+static stm_tx_entry **search_stm_tx_entry(stm_tx_entry **pnext, stm_blk *b)
+{
+ stm_tx_entry *next = *pnext;
+
+ while ( (next != NULL) && ((unsigned long)next->b < (unsigned long)b) )
+ {
+ pnext = &next->next;
+ next = *pnext;
+ }
+
+ return pnext;
+}
+
+
+static int contention_wait(ptst_t *ptst, int attempts)
+{
+#ifdef POLITE
+ if ( (attempts > 1) && (attempts <= MAX_RETRIES) )
+ {
+#ifdef SPARC /* Exactly as it was done by the original authors. */
+ priv_t *priv = &priv_ptst[ptst->id];
+ struct timespec rqtp;
+ unsigned int log_backoff, mask;
+ log_backoff = attempts - 2 + MIN_LOG_BACKOFF;
+ if ( log_backoff > MAX_LOG_BACKOFF )
+ log_backoff = MAX_LOG_BACKOFF;
+ mask = (1 << log_backoff) - 1;
+ rqtp.tv_nsec = rand_arr[priv->random_counter++ & RANDOM_MASK] & mask;
+ rqtp.tv_sec = 0;
+ while ( nanosleep(&rqtp, NULL) != 0 ) continue;
+#else
+ usleep(1);
+#endif
+ }
+
+ return attempts < MAX_RETRIES;
+#else
+ return FALSE;
+#endif
+}
+
+
+static void *read_loc_data(ptst_t *ptst, stm_loc *l)
+{
+ void *data;
+ stm_tx *t;
+ unsigned long st;
+ stm_tx_entry **pent;
+ int attempts = 0;
+
+ for ( ; ; )
+ {
+ switch ( (st = l->status) )
+ {
+ case TXS_SUCCESSFUL:
+ return l->new;
+ case TXS_FAILED:
+ return l->old;
+ default:
+ t = mk_descriptor(st);
+ rc_up_descriptor(t);
+ if ( l->status == st )
+ {
+ switch ( t->status )
+ {
+ case TXS_SUCCESSFUL:
+ rc_down_descriptor(ptst, t);
+ l->status = TXS_SUCCESSFUL;
+ return l->new;
+ case TXS_FAILED:
+ rc_down_descriptor(ptst, t);
+ l->status = TXS_FAILED;
+ return l->old;
+ default:
+ if ( !contention_wait(ptst, ++attempts) )
+ {
+ attempts = 0;
+ CASIO(&t->status, TXS_IN_PROGRESS, TXS_FAILED);
+ }
+ }
+ }
+ rc_down_descriptor(ptst, t);
+ }
+ }
+}
+
+
+static stm_loc *install_loc(ptst_t *ptst, stm_tx *t,
+ stm_blk *b, stm_loc *old_loc)
+{
+ stm_loc *new_loc = gc_alloc(ptst, gc_loc_id);
+
+ new_loc->status = (unsigned long)t;
+ new_loc->new = gc_alloc(ptst, t->gc_data_id);
+ new_loc->old = read_loc_data(ptst, old_loc);
+ memcpy(new_loc->new, new_loc->old, t->blk_size);
+
+ if ( CASPO(&b->loc, old_loc, new_loc) != old_loc )
+ {
+ gc_unsafe_free(ptst, new_loc->new, t->gc_data_id);
+ gc_unsafe_free(ptst, new_loc , gc_loc_id);
+ new_loc = NULL;
+ }
+ else
+ {
+ gc_free(ptst, old_loc, gc_loc_id);
+ }
+
+ return new_loc;
+}
+
+
+stm *new_stm(ptst_t *ptst, int blk_size)
+{
+ stm *mem = malloc(CACHE_LINE_SIZE);
+ mem->blk_size = blk_size;
+ mem->gc_data_id = gc_add_allocator(ALLOCATOR_SIZE(blk_size));
+ return mem;
+}
+
+
+void free_stm(ptst_t *ptst, stm *mem)
+{
+ gc_remove_allocator(mem->gc_data_id);
+ free(mem);
+}
+
+
+stm_blk *new_stm_blk(ptst_t *ptst, stm *mem)
+{
+ stm_blk *b = gc_alloc(ptst, gc_blk_id);
+ stm_loc *l = gc_alloc(ptst, gc_loc_id);
+ b->loc = l;
+ l->status = TXS_SUCCESSFUL;
+ l->old = NULL;
+ l->new = gc_alloc(ptst, mem->gc_data_id);
+ return b;
+}
+
+
+void free_stm_blk(ptst_t *ptst, stm *mem, stm_blk *b)
+{
+ stm_loc *l;
+ void *data;
+
+ l = FASPO(&b->loc, NULL);
+ data = read_loc_data(ptst, l);
+
+ gc_free(ptst, data, mem->gc_data_id);
+ gc_free(ptst, l, gc_loc_id);
+ gc_free(ptst, b, gc_blk_id);
+}
+
+
+void *init_stm_blk(ptst_t *ptst, stm *mem, stm_blk *b)
+{
+ return b->loc->new;
+}
+
+
+int sizeof_stm_blk(ptst_t *ptst, stm *mem, stm_blk *b)
+{
+ return mem->blk_size;
+}
+
+
+stm_tx *new_stm_tx(ptst_t *ptst, stm *mem, sigjmp_buf *penv)
+{
+ priv_t *priv = &priv_ptst[ptst->id];
+ stm_tx *t;
+
+ if ( priv->cur_tx != NULL ) goto nesting;
+ t = new_descriptor(priv);
+ t->status = TXS_IN_PROGRESS;
+ t->reads = t->writes = NULL;
+ t->alloc_ptr = t->check = (stm_tx_entry *)(t + 1);
+ t->gc_data_id = mem->gc_data_id;
+ t->blk_size = mem->blk_size;
+ t->penv = penv;
+ t->dummy = NULL;
+ priv->cur_tx = t;
+ return t;
+
+ nesting:
+ fprintf(stderr, "No nesting of transactions is allowed\n");
+ return NULL;
+}
+
+
+bool_t commit_stm_tx(ptst_t *ptst, stm_tx *t)
+{
+ unsigned int desired_st = TXS_SUCCESSFUL, st;
+ stm_tx_entry *ent;
+ priv_t *priv = &priv_ptst[ptst->id];
+
+ priv->cur_tx = NULL;
+
+ MB();
+
+ for ( ent = t->reads; ent != NULL; ent = ent->next )
+ {
+ if ( ent->b->loc != ent->l )
+ desired_st = TXS_FAILED;
+ }
+
+ if ( read_only(t) )
+ {
+ /* A very fast path: we can immediately reuse the descriptor. */
+ if ( t->dummy != NULL )
+ gc_unsafe_free(ptst, t->dummy, t->gc_data_id);
+ t->next_free = priv->next_descriptor;
+ priv->next_descriptor = t;
+ return desired_st == TXS_SUCCESSFUL;
+ }
+
+ st = CASIO(&t->status, TXS_IN_PROGRESS, desired_st);
+ if ( st == TXS_IN_PROGRESS )
+ st = desired_st;
+
+ assert((st == TXS_FAILED) || (st == TXS_SUCCESSFUL));
+
+ WMB_NEAR_CAS();
+
+ for ( ent = t->writes; ent != NULL; ent = ent->next )
+ {
+ ent->l->status = (unsigned long)st;
+ gc_free(ptst,
+ (st == TXS_SUCCESSFUL) ? ent->l->old : ent->l->new,
+ t->gc_data_id);
+ }
+
+ if ( t->dummy != NULL )
+ gc_unsafe_free(ptst, t->dummy, t->gc_data_id);
+
+ rc_down_descriptor(ptst, t);
+
+ return st == TXS_SUCCESSFUL;
+}
+
+
+bool_t validate_stm_tx(ptst_t *ptst, stm_tx *t)
+{
+ stm_tx_entry *ent;
+
+ RMB();
+
+ /* A conflict on a pending update will cause us to get failed. */
+ if ( t->status == TXS_FAILED )
+ goto fail;
+
+ /* Reads must be explicitly checked. */
+ for ( ent = t->reads; ent != NULL; ent = ent->next )
+ {
+ if ( ent->b->loc != ent->l )
+ goto fail;
+ }
+
+ return TRUE;
+
+ fail:
+ t->status = TXS_FAILED;
+ return FALSE;
+}
+
+
+void abort_stm_tx(ptst_t *ptst, stm_tx *t)
+{
+ t->status = TXS_FAILED;
+}
+
+
+void *read_stm_blk(ptst_t *ptst, stm_tx *t, stm_blk *b)
+{
+ stm_tx_entry **pent, *ent;
+ sigjmp_buf *penv;
+ void *result;
+
+ if ( b == dummy_obj )
+ {
+ if ( t->dummy == NULL )
+ {
+ t->dummy = gc_alloc(ptst, t->gc_data_id);
+ memcpy(t->dummy, dummy_data, t->blk_size);
+ }
+ return t->dummy;
+ }
+
+ pent = search_stm_tx_entry(&t->writes, b);
+ ent = *pent;
+ if ( (ent != NULL) && (ent->b == b) ) goto found;
+
+ pent = search_stm_tx_entry(&t->reads, b);
+ ent = *pent;
+ if ( (ent != NULL) && (ent->b == b) ) goto found;
+
+ ent = alloc_stm_tx_entry(t);
+ ent->b = b;
+ if ( (ent->l = b->loc) == NULL )
+ goto fail;
+ ent->data = read_loc_data(ptst, ent->l);
+ ent->next = *pent;
+ *pent = ent;
+
+ return ent->data;
+
+ found:
+ result = ent->data;
+ ent = t->check;
+ if ( is_stale(t, ent) ) goto fail;
+ if ( ++t->check == t->alloc_ptr ) t->check = (stm_tx_entry *)(t + 1);
+ return result;
+
+ fail:
+ penv = t->penv;
+ abort_stm_tx(ptst, t);
+ commit_stm_tx(ptst, t);
+ siglongjmp(*penv, 0);
+ assert(0);
+ return NULL;
+}
+
+
+void *write_stm_blk(ptst_t *ptst, stm_tx *t, stm_blk *b)
+{
+ stm_tx_entry **r_pent, **w_pent, *ent;
+ stm_loc *loc;
+ sigjmp_buf *penv;
+ void *result;
+
+ if ( b == dummy_obj )
+ {
+ if ( t->dummy == NULL )
+ {
+ t->dummy = gc_alloc(ptst, t->gc_data_id);
+ memcpy(t->dummy, dummy_data, t->blk_size);
+ }
+ return t->dummy;
+ }
+
+ w_pent = search_stm_tx_entry(&t->writes, b);
+ ent = *w_pent;
+ if ( (ent != NULL) && (ent->b == b) ) goto found;
+
+ r_pent = search_stm_tx_entry(&t->reads, b);
+ ent = *r_pent;
+ if ( (ent != NULL) && (ent->b == b) )
+ {
+ *r_pent = ent->next;
+ }
+ else
+ {
+ ent = alloc_stm_tx_entry(t);
+ ent->b = b;
+ if ( (ent->l = b->loc) == NULL )
+ goto fail;
+ }
+
+ loc = install_loc(ptst, t, b, ent->l);
+ if ( loc == NULL ) goto fail;
+
+ ent->l = loc;
+ ent->data = loc->new;
+ ent->next = *w_pent;
+ *w_pent = ent;
+
+ return ent->data;
+
+ found:
+ result = ent->data;
+ ent = t->check;
+ if ( is_stale(t, ent) ) goto fail;
+ if ( ++t->check == t->alloc_ptr ) t->check = (stm_tx_entry *)(t + 1);
+ return result;
+
+ fail:
+ penv = t->penv;
+ abort_stm_tx(ptst, t);
+ commit_stm_tx(ptst, t);
+ siglongjmp(*penv, 0);
+ assert(0);
+ return NULL;
+}
+
+
+void remove_from_tx(ptst_t *ptst, stm_tx *t, stm_blk *b)
+{
+ if ( dummy_obj == NULL )
+ {
+ dummy_obj = b;
+ dummy_data = read_loc_data(ptst, b->loc);
+ }
+}
+
+
+static void handle_fault(int sig)
+{
+ ptst_t *ptst;
+ stm_tx *t;
+
+ ptst = critical_enter();
+ t = priv_ptst[ptst->id].cur_tx;
+ if ( (t != NULL) && !validate_stm_tx(ptst, t) )
+ {
+ sigjmp_buf *penv = t->penv;
+ commit_stm_tx(ptst, t);
+ critical_exit(ptst);
+ siglongjmp(*penv, 0);
+ }
+
+ fail:
+ fprintf(stderr, "Error: unhandleable SIGSEGV!\n");
+ abort();
+}
+
+
+void _init_stm_subsystem(int pad_data)
+{
+ struct sigaction act;
+
+#ifdef SPARC
+ int i;
+ struct timespec rqtp;
+
+ rqtp.tv_sec = 0;
+ rqtp.tv_nsec = 1000;
+
+ while ( nanosleep(&rqtp, NULL) != 0 )
+ {
+ if ( errno != EINTR )
+ {
+ printf("Urk! Nanosleep not supported!\n");
+ exit(1);
+ }
+ }
+
+ for ( i = 0; i < RANDOM_SIZE; i++ )
+ rand_arr[i] = (unsigned int)random();
+#endif
+
+ do_padding = pad_data;
+ gc_blk_id = gc_add_allocator(ALLOCATOR_SIZE(sizeof(stm_blk)));
+ gc_loc_id = gc_add_allocator(ALLOCATOR_SIZE(sizeof(stm_loc)));
+ memset(priv_ptst, 0, sizeof(priv_ptst));
+
+ act.sa_handler = handle_fault;
+ sigemptyset(&act.sa_mask);
+ act.sa_flags = 0;
+ sigaction(SIGSEGV, &act, NULL);
+}
--- /dev/null
+/******************************************************************************
+ * stm_lock.c
+ *
+ * Lock-based software transactional memory (STM).
+ * Uses two-phase locking with multi-reader locks.
+ *
+ * Copyright (c) 2002-2003, K A Fraser
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution. Neither the name of the Keir Fraser
+ * nor the names of its contributors may be used to endorse or
+ * promote products derived from this software without specific
+ * prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "portable_defns.h"
+#include "ptst.h"
+#include "gc.h"
+#include <assert.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <setjmp.h>
+#include <signal.h>
+
+typedef struct stm_blk_st stm_blk;
+typedef struct stm_tx_entry_st stm_tx_entry;
+typedef struct stm_tx_st stm_tx;
+typedef struct stm_st stm;
+
+struct stm_blk_st {
+ void *data;
+ mrsw_lock_t lock;
+};
+
+struct stm_tx_entry_st {
+ stm_blk *b;
+ void *old;
+ void *new;
+ stm_tx_entry *next;
+};
+
+struct stm_tx_st {
+ int status;
+ stm_tx_entry *blocks;
+ stm_tx_entry *alloc_ptr, *check;
+ int gc_data_id, blk_size; /* copied from 'stm' structure */
+ sigjmp_buf *penv;
+};
+
+struct stm_st {
+ int gc_data_id;
+ int blk_size;
+};
+
+#define DESCRIPTOR_IN_USE(_t) ((_t)->penv != NULL)
+
+#define DESCRIPTOR_SIZE 4096
+#define MAX_TX_ENTS (DESCRIPTOR_SIZE / sizeof(stm_tx_entry))
+
+/* Private per-thread state. The array is indexed off ptst->id. */
+typedef struct {
+ char desc[DESCRIPTOR_SIZE];
+} priv_t;
+
+static priv_t priv_ptst[MAX_THREADS];
+static int gc_blk_id; /* Allocation id for block descriptors. */
+static int do_padding; /* Should all allocations be padded to a cache line? */
+
+#define ALLOCATOR_SIZE(_s) (do_padding ? CACHE_LINE_SIZE : (_s))
+
+#define TXS_IN_PROGRESS 0
+#define TXS_FAILED 1
+#define TXS_SUCCESSFUL 2
+
+bool_t commit_stm_tx(ptst_t *ptst, stm_tx *t);
+
+static stm_tx_entry *alloc_stm_tx_entry(stm_tx *t)
+{
+ stm_tx_entry *ent = t->alloc_ptr++;
+ assert(((unsigned long)t->alloc_ptr - (unsigned long)t) <=
+ DESCRIPTOR_SIZE);
+ return ent;
+}
+
+
+static stm_tx_entry **search_stm_tx_entry(stm_tx_entry **pnext, stm_blk *b)
+{
+ stm_tx_entry *next = *pnext;
+
+ while ( (next != NULL) && ((unsigned long)next->b < (unsigned long)b) )
+ {
+ pnext = &next->next;
+ next = *pnext;
+ }
+
+ return pnext;
+}
+
+
+stm *new_stm(ptst_t *ptst, int blk_size)
+{
+ stm *mem = malloc(CACHE_LINE_SIZE);
+ mem->blk_size = blk_size;
+ mem->gc_data_id = gc_add_allocator(ALLOCATOR_SIZE(blk_size));
+ return mem;
+}
+
+
+void free_stm(ptst_t *ptst, stm *mem)
+{
+ gc_remove_allocator(mem->gc_data_id);
+ free(mem);
+}
+
+
+stm_blk *new_stm_blk(ptst_t *ptst, stm *mem)
+{
+ stm_blk *b;
+ b = gc_alloc(ptst, gc_blk_id);
+ b->data = gc_alloc(ptst, mem->gc_data_id);
+ mrsw_init(&b->lock);
+ return b;
+}
+
+
+void free_stm_blk(ptst_t *ptst, stm *mem, stm_blk *b)
+{
+ gc_free(ptst, b->data, mem->gc_data_id);
+ gc_free(ptst, b, gc_blk_id);
+}
+
+
+void *init_stm_blk(ptst_t *ptst, stm *mem, stm_blk *b)
+{
+ return b->data;
+}
+
+
+int sizeof_stm_blk(ptst_t *ptst, stm *mem, stm_blk *b)
+{
+ return mem->blk_size;
+}
+
+
+stm_tx *new_stm_tx(ptst_t *ptst, stm *mem, sigjmp_buf *penv)
+{
+ stm_tx *t = (stm_tx *)&priv_ptst[ptst->id];
+ if ( DESCRIPTOR_IN_USE(t) ) goto nesting;
+ t->status = TXS_IN_PROGRESS;
+ t->blocks = NULL;
+ t->alloc_ptr = t->check = (stm_tx_entry *)(t + 1);
+ t->gc_data_id = mem->gc_data_id;
+ t->blk_size = mem->blk_size;
+ t->penv = penv;
+ return t;
+
+ nesting:
+ fprintf(stderr, "No nesting of transactions is allowed\n");
+ return NULL;
+}
+
+
+bool_t commit_stm_tx(ptst_t *ptst, stm_tx *t)
+{
+ stm_tx_entry *ent, *last_ent;
+ mrsw_qnode_t qn[MAX_TX_ENTS];
+ stm_blk *b;
+ void *old;
+ int i;
+
+ t->penv = NULL;
+
+ /* Outcome may have been decided by an 'abort' or 'validate' operation. */
+ if ( t->status != TXS_IN_PROGRESS ) goto out;
+
+ /* We start by taking locks in order, and checking old values. */
+ for ( i = 0, ent = t->blocks; ent != NULL; i++, ent = ent->next )
+ {
+ b = ent->b;
+ if ( (old = ent->old) == ent->new )
+ {
+ rd_lock(&b->lock, &qn[i]);
+ }
+ else
+ {
+ wr_lock(&b->lock, &qn[i]);
+ }
+ /* Check old value, and shortcut to failure if we mismatch. */
+ if ( b->data != old ) goto fail;
+ }
+
+ /*
+ * LINEARISATION POINT FOR SUCCESS:
+ * We haven't written new values yet, but that's okay as we have write
+ * locks on those locations. Noone can see old value now and yet still
+ * commit (as they'll be waiting for the read lock).
+ */
+ t->status = TXS_SUCCESSFUL;
+
+ /* We definitely succeed now: release locks and write new values. */
+ for ( i = 0, ent = t->blocks; ent != NULL; i++, ent = ent->next )
+ {
+ b = ent->b;
+ if ( ent->old == ent->new )
+ {
+ rd_unlock(&b->lock, &qn[i]);
+ }
+ else
+ {
+ b->data = ent->new;
+ wr_unlock(&b->lock, &qn[i]);
+ }
+ }
+
+ out:
+ if ( t->status == TXS_SUCCESSFUL )
+ {
+ for ( ent = t->blocks; ent != NULL; ent = ent->next )
+ {
+ if ( ent->old == ent->new ) continue;
+ gc_free(ptst, ent->old, t->gc_data_id);
+ }
+ return TRUE;
+ }
+ else
+ {
+ for ( ent = t->blocks; ent != NULL; ent = ent->next )
+ {
+ if ( ent->old == ent->new ) continue;
+ gc_unsafe_free(ptst, ent->new, t->gc_data_id);
+ }
+ return FALSE;
+ }
+
+ /*
+ * We put (hopefully rare) failure case out-of-line here.
+ * This is also the LINEARISTAION POINT FOR FAILURE.
+ */
+ fail:
+ last_ent = ent->next;
+ t->status = TXS_FAILED;
+ for ( i = 0, ent = t->blocks; ent != last_ent; i++, ent = ent->next )
+ {
+ b = ent->b;
+ if ( ent->old == ent->new )
+ {
+ rd_unlock(&b->lock, &qn[i]);
+ }
+ else
+ {
+ wr_unlock(&b->lock, &qn[i]);
+ }
+ }
+ goto out;
+}
+
+
+bool_t validate_stm_tx(ptst_t *ptst, stm_tx *t)
+{
+ stm_tx_entry *ent, *last_ent = NULL;
+ mrsw_qnode_t qn[MAX_TX_ENTS];
+ stm_blk *b;
+ void *old;
+ int i;
+
+ RMB();
+
+ /* Lock-acquire phase */
+ for ( i = 0, ent = t->blocks; ent != NULL; i++, ent = ent->next )
+ {
+ b = ent->b;
+
+ if ( (old = ent->old) == ent->new )
+ {
+ rd_lock(&b->lock, &qn[i]);
+ }
+ else
+ {
+ wr_lock(&b->lock, &qn[i]);
+ }
+
+ if ( b->data != old )
+ {
+ t->status = TXS_FAILED;
+ last_ent = ent->next;
+ break;
+ }
+ }
+
+ /* Lock-release phase */
+ for ( i = 0, ent = t->blocks; ent != last_ent; i++, ent = ent->next )
+ {
+ b = ent->b;
+ if ( ent->old == ent->new )
+ {
+ rd_unlock(&b->lock, &qn[i]);
+ }
+ else
+ {
+ wr_unlock(&b->lock, &qn[i]);
+ }
+ }
+
+ return t->status != TXS_FAILED;
+}
+
+
+void abort_stm_tx(ptst_t *ptst, stm_tx *t)
+{
+ t->status = TXS_FAILED;
+}
+
+
+void *read_stm_blk(ptst_t *ptst, stm_tx *t, stm_blk *b)
+{
+ stm_tx_entry **pent, *ent;
+ sigjmp_buf *penv;
+ void *result;
+
+ pent = search_stm_tx_entry(&t->blocks, b);
+ ent = *pent;
+ if ( (ent != NULL) && (ent->b == b) ) goto found;
+
+ ent = alloc_stm_tx_entry(t);
+ ent->b = b;
+ ent->old = b->data;
+ ent->new = ent->old;
+ ent->next = *pent;
+ *pent = ent;
+ return ent->new;
+
+ found:
+ result = ent->new;
+ ent = t->check;
+ if ( ent->b->data != ent->old ) goto fail;
+ if ( ++t->check == t->alloc_ptr ) t->check = (stm_tx_entry *)(t + 1);
+ return result;
+
+ fail:
+ penv = t->penv;
+ abort_stm_tx(ptst, t);
+ commit_stm_tx(ptst, t);
+ siglongjmp(*penv, 0);
+ assert(0);
+ return NULL;
+}
+
+
+void *write_stm_blk(ptst_t *ptst, stm_tx *t, stm_blk *b)
+{
+ stm_tx_entry **pent, *ent;
+ sigjmp_buf *penv;
+ void *result;
+
+ pent = search_stm_tx_entry(&t->blocks, b);
+ ent = *pent;
+ if ( (ent != NULL) && (ent->b == b) )
+ {
+ if ( ent->old != ent->new ) goto found;
+ }
+ else
+ {
+ ent = alloc_stm_tx_entry(t);
+ ent->b = b;
+ ent->old = b->data;
+ ent->next = *pent;
+ *pent = ent;
+ }
+
+ ent->new = gc_alloc(ptst, t->gc_data_id);
+ memcpy(ent->new, ent->old, t->blk_size);
+ return ent->new;
+
+ found:
+ result = ent->new;
+ ent = t->check;
+ if ( ent->b->data != ent->old ) goto fail;
+ if ( ++t->check == t->alloc_ptr ) t->check = (stm_tx_entry *)(t + 1);
+ return result;
+
+ fail:
+ penv = t->penv;
+ abort_stm_tx(ptst, t);
+ commit_stm_tx(ptst, t);
+ siglongjmp(*penv, 0);
+ assert(0);
+ return NULL;
+}
+
+
+void remove_from_tx(ptst_t *ptst, stm_tx *t, stm_blk *b)
+{
+ stm_tx_entry **pent, *ent;
+ void *data;
+
+ pent = search_stm_tx_entry(&t->blocks, b);
+ ent = *pent;
+ if ( (ent != NULL) && (ent->b == b) )
+ {
+ *pent = ent->next;
+ if ( (data = ent->new) != ent->old )
+ {
+ gc_free(ptst, data, t->gc_data_id);
+ }
+ }
+}
+
+
+static void handle_fault(int sig)
+{
+ ptst_t *ptst;
+ stm_tx *t;
+
+ ptst = critical_enter();
+ t = (stm_tx *)&priv_ptst[ptst->id];
+ if ( DESCRIPTOR_IN_USE(t) && !validate_stm_tx(ptst, t) )
+ {
+ sigjmp_buf *penv = t->penv;
+ commit_stm_tx(ptst, t);
+ critical_exit(ptst);
+ siglongjmp(*penv, 0);
+ }
+
+ fail:
+ fprintf(stderr, "Error: unhandleable SIGSEGV!\n");
+ abort();
+}
+
+
+void _init_stm_subsystem(int pad_data)
+{
+ struct sigaction act;
+
+ do_padding = pad_data;
+ gc_blk_id = gc_add_allocator(ALLOCATOR_SIZE(sizeof(stm_blk)));
+ memset(priv_ptst, 0, sizeof(priv_ptst));
+
+ act.sa_handler = handle_fault;
+ sigemptyset(&act.sa_mask);
+ act.sa_flags = 0;
+ sigaction(SIGSEGV, &act, NULL);
+}