From bb981b953f34fc0e63a0a09020c57329dd44ac98 Mon Sep 17 00:00:00 2001 From: "matt@linuxbox.com" Date: Wed, 22 Jul 2009 15:43:22 -0400 Subject: [PATCH] Portable lock-free data structures by Keir Fraser (MCAS) The MCAS suite, previously released as lock-free library, under a non-restrictive license. This software includes software previously released in 2003 under a GPL license, but released by the original copyright holder, Keir Fraser, under a BSD license, on 5/28/2008. Reviewed-on: http://gerrit.openafs.org/183 Tested-by: Derrick Brashear Reviewed-by: Derrick Brashear --- src/mcas/Makefile.mcas | 97 ++++ src/mcas/README | 91 ++++ src/mcas/README_LICENSE | 270 ++++++++++ src/mcas/alpha_defns.h | 90 ++++ src/mcas/bst_lock_fraser.c | 414 ++++++++++++++ src/mcas/bst_lock_kung.c | 372 +++++++++++++ src/mcas/bst_lock_manber.c | 411 ++++++++++++++ src/mcas/bst_mcas.c | 436 +++++++++++++++ src/mcas/gc.c | 671 +++++++++++++++++++++++ src/mcas/gc.h | 40 ++ src/mcas/ia64_defns.h | 99 ++++ src/mcas/intel_defns.h | 106 ++++ src/mcas/mcas.c | 574 ++++++++++++++++++++ src/mcas/mips_defns.h | 118 ++++ src/mcas/portable_defns.h | 406 ++++++++++++++ src/mcas/ppc_defns.h | 105 ++++ src/mcas/ptst.c | 107 ++++ src/mcas/ptst.h | 47 ++ src/mcas/random.h | 19 + src/mcas/rb_lock_concurrentwriters.c | 763 ++++++++++++++++++++++++++ src/mcas/rb_lock_mutex.c | 772 +++++++++++++++++++++++++++ src/mcas/rb_lock_serialisedwriters.c | 498 +++++++++++++++++ src/mcas/rb_stm.c | 535 +++++++++++++++++++ src/mcas/replay.c | 474 ++++++++++++++++ src/mcas/set.h | 102 ++++ src/mcas/set_harness.c | 574 ++++++++++++++++++++ src/mcas/skip_cas.c | 497 +++++++++++++++++ src/mcas/skip_lock.c | 435 +++++++++++++++ src/mcas/skip_mcas.c | 374 +++++++++++++ src/mcas/skip_stm.c | 273 ++++++++++ src/mcas/sparc_defns.h | 108 ++++ src/mcas/sparc_mcas.il | 30 ++ src/mcas/stm.h | 42 ++ src/mcas/stm_fraser.c | 661 +++++++++++++++++++++++ src/mcas/stm_herlihy.c | 688 ++++++++++++++++++++++++ src/mcas/stm_lock.c | 464 ++++++++++++++++ 36 files changed, 11763 insertions(+) create mode 100644 src/mcas/Makefile.mcas create mode 100644 src/mcas/README create mode 100644 src/mcas/README_LICENSE create mode 100644 src/mcas/alpha_defns.h create mode 100644 src/mcas/bst_lock_fraser.c create mode 100644 src/mcas/bst_lock_kung.c create mode 100644 src/mcas/bst_lock_manber.c create mode 100644 src/mcas/bst_mcas.c create mode 100644 src/mcas/gc.c create mode 100644 src/mcas/gc.h create mode 100644 src/mcas/ia64_defns.h create mode 100644 src/mcas/intel_defns.h create mode 100644 src/mcas/mcas.c create mode 100644 src/mcas/mips_defns.h create mode 100644 src/mcas/portable_defns.h create mode 100644 src/mcas/ppc_defns.h create mode 100644 src/mcas/ptst.c create mode 100644 src/mcas/ptst.h create mode 100644 src/mcas/random.h create mode 100644 src/mcas/rb_lock_concurrentwriters.c create mode 100644 src/mcas/rb_lock_mutex.c create mode 100644 src/mcas/rb_lock_serialisedwriters.c create mode 100644 src/mcas/rb_stm.c create mode 100644 src/mcas/replay.c create mode 100644 src/mcas/set.h create mode 100644 src/mcas/set_harness.c create mode 100644 src/mcas/skip_cas.c create mode 100644 src/mcas/skip_lock.c create mode 100644 src/mcas/skip_mcas.c create mode 100644 src/mcas/skip_stm.c create mode 100644 src/mcas/sparc_defns.h create mode 100644 src/mcas/sparc_mcas.il create mode 100644 src/mcas/stm.h create mode 100644 src/mcas/stm_fraser.c create mode 100644 src/mcas/stm_herlihy.c create mode 100644 src/mcas/stm_lock.c diff --git a/src/mcas/Makefile.mcas b/src/mcas/Makefile.mcas new file mode 100644 index 000000000..8aa23fa1e --- /dev/null +++ b/src/mcas/Makefile.mcas @@ -0,0 +1,97 @@ + +ARCH := SPARC +DEBUGGING := -DNDEBUG + +ifeq ($(ARCH),INTEL) +CC := gcc +CFLAGS := -O3 -DINTEL -fomit-frame-pointer -march=i686 +LDFLAGS := -lpthread +endif + +ifeq ($(ARCH),PPC) +CC := cc_r +CFLAGS := -O3 -DPPC -q64 -w +LDFLAGS := -lpthread -q64 +ASFLAGS := -a64 +endif + +ifeq ($(ARCH),IA64) +CC := gcc +CFLAGS := -O3 -DIA64 -fomit-frame-pointer +LDFLAGS := -lpthread +endif + +ifeq ($(ARCH),MIPS) +CC := gcc +CFLAGS := -O3 -DMIPS -fomit-frame-pointer +LDFLAGS := -lpthread +endif + +ifeq ($(ARCH),SPARC) +CC := /opt/SUNWspro/bin/cc +CFLAGS := -xO3 -DSPARC sparc_mcas.il -xarch=v9b +LDFLAGS := -DSPARC sparc_mcas.il -xarch=v9b -lthread -lrt +endif + +ifeq ($(ARCH),ALPHA) +CC := cc +CFLAGS := -accept vaxc_keywords -O3 -DALPHA +CFLAGS += -fomit-frame-pointer -DWEAK_MEM_ORDER +LDFLAGS := -lpthread +endif + +CFLAGS += $(DEBUGGING) +COMMON_DEPS += Makefile $(wildcard *.h) + +GC_HARNESS_TARGETS := skip_lock_perlist skip_lock_pernode skip_lock_perpointer +GC_HARNESS_TARGETS += skip_cas skip_mcas + +GC_HARNESS_TARGETS += bst_lock_fraser bst_lock_manber bst_lock_kung +GC_HARNESS_TARGETS += bst_mcas + +GC_HARNESS_TARGETS += rb_lock_concurrentwriters rb_lock_serialisedwriters +GC_HARNESS_TARGETS += rb_lock_mutex + +TARGETS := $(GC_HARNESS_TARGETS) +TARGETS += rb_stm_fraser rb_stm_herlihy rb_stm_lock +TARGETS += skip_stm_fraser skip_stm_herlihy skip_stm_lock + +all: $(TARGETS) replay + +clean: + rm -f $(TARGETS) replay *~ core *.o *.a + +replay: %: %.c $(COMMON_DEPS) + $(CC) $(CFLAGS) -c -o $(patsubst %.c,%.o,$<) $< + $(CC) -o $@ $(patsubst %.c,%.o,$<) $(LDFLAGS) + +tree_mcas.o: tree_mcas.c mcas.c $(COMMON_DEPS) + $(CC) $(CFLAGS) -c -o $@ $< +skip_lock_perpointer.o: skip_lock.c $(COMMON_DEPS) + $(CC) $(CFLAGS) -DTINY_MTX -c -o $@ $< +skip_lock_pernode.o: skip_lock.c $(COMMON_DEPS) + $(CC) $(CFLAGS) -c -o $@ $< +skip_lock_perlist.o: skip_lock.c $(COMMON_DEPS) + $(CC) $(CFLAGS) -DFAT_MTX -c -o $@ $< +skip_mcas.o: skip_mcas.c mcas.c $(COMMON_DEPS) + $(CC) $(CFLAGS) -c -o $@ $< + +%.o: %.c $(COMMON_DEPS) + $(CC) $(CFLAGS) -c -o $@ $< + +skip_stm_lock: skip_stm.o stm_lock.o set_harness.o ptst.o gc.o + $(CC) -o $@ $^ $(LDFLAGS) +skip_stm_fraser: skip_stm.o stm_fraser.o set_harness.o ptst.o gc.o + $(CC) -o $@ $^ $(LDFLAGS) +skip_stm_herlihy: skip_stm.o stm_herlihy.o set_harness.o ptst.o gc.o + $(CC) -o $@ $^ $(LDFLAGS) + +rb_stm_lock: rb_stm.o stm_lock.o set_harness.o ptst.o gc.o + $(CC) -o $@ $^ $(LDFLAGS) +rb_stm_fraser: rb_stm.o stm_fraser.o set_harness.o ptst.o gc.o + $(CC) -o $@ $^ $(LDFLAGS) +rb_stm_herlihy: rb_stm.o stm_herlihy.o set_harness.o ptst.o gc.o + $(CC) -o $@ $^ $(LDFLAGS) + +$(GC_HARNESS_TARGETS): %: %.o set_harness.o ptst.o gc.o + $(CC) -o $@ $^ $(LDFLAGS) diff --git a/src/mcas/README b/src/mcas/README new file mode 100644 index 000000000..9ff8b6e2f --- /dev/null +++ b/src/mcas/README @@ -0,0 +1,91 @@ + The Lock-Free Library + ===================== + + +1. Building +----------- +Edit the Makefile and set ARCH to the appropriate value. +Type 'make'. + + +2. What you get +--------------- +'stm_fraser.c' is an object-based STM with the programming API defined +in 'stm.h'. 'mcas.c' is an implementation of multi-word +compare-and-swap. + +These are used to build a number of search structures: skip lists, +binary search trees, and red-black trees. The executables are named as +follows: + + bst_lock_fraser --- BST implementation using per-node locks. + No locking for read operations. + bst_lock_kung --- BST implementation using per-node locks. + No locking for read operations. + bst_lock_manber --- BST implementation using per-node locks. + No locking for read operations. + bst_mcas --- BST implementation based on MCAS. + + rb_lock_concurrentwriters --- Red-black trees with concurrent writers. + Based on MCS multi-reader locks. + rb_lock_serialisedwriters --- Red-black trees with serialised writers. + Based on MCS multi-reader locks. + rb_lock_mutex --- Red-black trees with concurrent writers, and + no locking for read operations. Very fast! + rb_stm_fraser --- Red-black trees using Fraser's STM. + rb_stm_herlihy --- Red-black trees using Herlihy et al's STM. + rb_stm_lock --- Red-black trees using 2-phase-locking STM. + + skip_lock_perlist --- Skip lists with a single global lock. + No locking for read operations. + skip_lock_pernode --- Skip lists with a lock per node. + No locking for read operations. + skip_lock_perpointer --- Skip lists with a lock per pointer. + No locking for read operations. + skip_cas --- Skip lists built directly from CAS. + skip_mcas --- Skip lists based on MCAS. + skip_stm_fraser --- Skip lists using Fraser's STM. + skip_stm_herlihy --- Skip lists using Herlihy et al's STM. + skip_stm_lock --- Skip lists using 2-phase-locking STM. + +Each executable is run as: + + +'executable' is one of the above implementations. + +'num_threads' indicates the degree of parallelism. + +'read_proportion' determines what proportion of the random workload is +lookups as opposed to updates or removals. The proportion is out of 256. + +'key_power' indicates the key range. Key range is 2 ^ 'key_power'. +Since updates and removals are equally probable, the mean set size +will be 2 ^ ('key power' - 1). + + +3. Verifying correctness +------------------------ +To check that each implementation correctly behaves as a 'set' ought +to, you can define DO_WRITE_LOG in 'set_harness.c'. This will cause +each implementation to produce a log describing each operation that +was executed, and its result. + +This can be run through 'replay' which will serach for a linearisable +schedule. + + +4. Distribution license +----------------------- +The license is GPL. See the file COPYING for details. + + + -- Keir Fraser, 25th September 2003 + + +**** + +This software has been released by its original author, Keir Fraser, +with permission from his advisors, under a BSD license. For details, +please see README.LICENSE. + + -- Matt Benjamin, 07/24/2009 diff --git a/src/mcas/README_LICENSE b/src/mcas/README_LICENSE new file mode 100644 index 000000000..8912f1e1e --- /dev/null +++ b/src/mcas/README_LICENSE @@ -0,0 +1,270 @@ +A note on BSD licensing of the software contained herin. + +This software includes software previously released in 2003 under a +GPL license, but released by the original copyright holder, Keir +Fraser, under a BSD license, on 5/28/2008. + +The chain of electronic mails by which I, on behalf of the OpenAFS +project, requested and secured the grant of license (BSD terms, as +stated above) is included below. + +--Matt Benjamin +5/31/2008 + +--------------------GRANT OF LICENSE-------------------- + +Return-Path: +X-Original-To: matt@linuxbox.com +Delivered-To: matt@linuxbox.com +Received: by trosper.private.linuxbox.com (Postfix, from userid 65534) + id CA3F6D9CAB01; Wed, 28 May 2008 10:21:08 -0400 (EDT) +X-Spam-Checker-Version: SpamAssassin 3.1.7 (2006-10-05) on + trosper.private.linuxbox.com +X-Spam-Level: +X-Spam-Status: No, score=-2.2 required=4.0 tests=BAYES_00,HTML_30_40, + HTML_MESSAGE autolearn=disabled version=3.1.7 +Received: from aa.linuxbox.com (linuxbox.com [10.1.1.1]) + by trosper.private.linuxbox.com (Postfix) with ESMTP id BD4EEC0A1BE1 + for ; Wed, 28 May 2008 10:20:59 -0400 (EDT) +Received: from SMTP.EU.CITRIX.COM (smtp.eu.citrix.com [62.200.22.115]) + by aa.linuxbox.com (8.13.1/8.13.1/SuSE Linux 0.7) with ESMTP id m4SEKIT2032434 + for ; Wed, 28 May 2008 10:20:59 -0400 +X-IronPort-AV: E=Sophos;i="4.27,555,1204520400"; + d="scan'208,217";a="355906" +Received: from lonpexchmx01.citrite.net ([10.30.224.191]) + by LONPIPO01.EU.CITRIX.COM with ESMTP; 28 May 2008 10:19:40 -0400 +Received: from [10.80.3.247] ([10.80.3.247]) by lonpexchmx01.citrite.net with Microsoft SMTPSVC(6.0.3790.3959); + Wed, 28 May 2008 15:19:40 +0100 +User-Agent: Microsoft-Entourage/11.4.0.080122 +Date: Wed, 28 May 2008 15:19:24 +0100 +Subject: Re: MCAS licensing +From: Keir Fraser +To: Tim Harris , + Matt Benjamin +Message-ID: +Thread-Topic: MCAS licensing +Thread-Index: AcjAzdQjEv1TZSzBEd2wxgAX8io7RQ== +In-Reply-To: +Mime-version: 1.0 +Content-type: multipart/alternative; + boundary="B_3294832767_9324180" +X-OriginalArrivalTime: 28 May 2008 14:19:40.0162 (UTC) FILETIME=[DDC5DE20:01C8C0CD] +X-Greylist: Sender is SPF-compliant, not delayed by milter-greylist-2.0.2 (aa.linuxbox.com [134.215.213.37]); Wed, 28 May 2008 10:20:59 -0400 (EDT) + +> This message is in MIME format. Since your mail reader does not understand +this format, some or all of this message may not be legible. + +--B_3294832767_9324180 +Content-type: text/plain; + charset="ISO-8859-1" +Content-transfer-encoding: quoted-printable + +On 28/5/08 15:07, "Tim Harris" wrote: + +> I'm personally happy making it available under a different license. Howe= +ver, +> most of this work (and almost all the actual implementation) was by Keir +> Fraser, so he'll need to OK it as well. +> =20 +> Keir, if its OK with you then can we replace the copy of lock-free-lib at +> http://www.cl.cam.ac.uk/research/srg/netos/lock-free/src/lockfree-lib.tar= +.gz +> with one under a BSD-derived license? + +I=B9d be happy to relicense, however I=B9m unlikely to take the time to go +through the tarball changing all references to GPL to refer to a BSD-alike +license. Add to this the fact I do not have access to my CL account any +more. However I=B9m happy for the OpenAFS project to take the tarball and do +anything you like to it. The question then is simply what degree of +assurance/provability do you require that we will not renege on this +agreement later. Perhaps this email will suffice? :-) + + -- Keir + + + +--B_3294832767_9324180 +Content-type: text/html; + charset="ISO-8859-1" +Content-transfer-encoding: quoted-printable + + + +Re: MCAS licensing + + +On 28= +/5/08 15:07, "Tim Harris" <tim.harris@gmail.com> wrote:
+
+
I'm personally happy making it available under a differ= +ent license.  However,
+most of this work (and almost all the actual implementation) was by Keir +Fraser, so he'll need to OK it as well.

+Keir, if its OK with you then can we replace the copy of lock-free-lib at +http://www.cl.cam.ac.uk/research/srg/ne= +tos/lock-free/src/lockfree-lib.tar.gz
+
with one under a BSD-derived license?
+

+I’d be happy to relicense, however I’m unlikely to take the tim= +e to go through the tarball changing all references to GPL to refer to a BSD= +-alike license. Add to this the fact I do not have access to my CL account a= +ny more. However I’m happy for the OpenAFS project to take the tarball= + and do anything you like to it. The question then is simply what degree of = +assurance/provability do you require that we will not renege on this agreeme= +nt later. Perhaps this email will suffice? :-)
+
+ -- Keir
+
+
+ + + + +--B_3294832767_9324180-- + + +Return-Path: +X-Original-To: matt@linuxbox.com +Delivered-To: matt@linuxbox.com +Received: by trosper.private.linuxbox.com (Postfix, from userid 65534) + id ACE2DD9CAAFA; Wed, 28 May 2008 10:08:33 -0400 (EDT) +X-Spam-Checker-Version: SpamAssassin 3.1.7 (2006-10-05) on + trosper.private.linuxbox.com +X-Spam-Level: +X-Spam-Status: No, score=-2.2 required=4.0 tests=BAYES_00,HTML_30_40, + HTML_MESSAGE autolearn=disabled version=3.1.7 +Received: from aa.linuxbox.com (linuxbox.com [10.1.1.1]) + by trosper.private.linuxbox.com (Postfix) with ESMTP id 4A423C0A1BE1 + for ; Wed, 28 May 2008 10:08:19 -0400 (EDT) +Received: from rv-out-0506.google.com (rv-out-0506.google.com [209.85.198.236]) + by aa.linuxbox.com (8.13.1/8.13.1/SuSE Linux 0.7) with ESMTP id m4SE81ps000627 + for ; Wed, 28 May 2008 10:08:18 -0400 +Received: by rv-out-0506.google.com with SMTP id f6so3345779rvb.53 + for ; Wed, 28 May 2008 07:07:48 -0700 (PDT) +DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; + d=gmail.com; s=gamma; + h=domainkey-signature:received:received:message-id:date:from:to:subject:cc:in-reply-to:mime-version:content-type:references; + bh=WbCNQe38GAVns49oEFgubKUCi6HmETUXNog1K8qsoNo=; + b=oQe0Aw3vaqCldpLw+jf3wyBz8Hi6U6JzIH+7ZT9FzFDbp43+NAZoGNWGJH3VR1greg6LrHXEFwpS37c2GuiHhs6y9l5EDYfoJ40eLZxQZmmHDio4NgX+lhSNFQ68CXIr+sbG4kmiLdFjPLrRzFIbUTvGpiXSNirTBNcWgdgmCyI= +DomainKey-Signature: a=rsa-sha1; c=nofws; + d=gmail.com; s=gamma; + h=message-id:date:from:to:subject:cc:in-reply-to:mime-version:content-type:references; + b=nKi6rs9htpokBrXzI1sGNFdBJe1q8HhxHiBnR241rmGUFnRokBnulqktV3STx+pHJpS4xQj4fcCWyIUAWrbmHjziiz8j5k7E5gWTieDP8MHtZqM049INLp8IxtqT3Rgjp6YJASlWpwFOYwYO5I/CadqhcU3IsD7xCEtiPjIQ1ss= +Received: by 10.141.20.7 with SMTP id x7mr1206096rvi.82.1211983668495; + Wed, 28 May 2008 07:07:48 -0700 (PDT) +Received: by 10.140.158.5 with HTTP; Wed, 28 May 2008 07:07:48 -0700 (PDT) +Message-ID: +Date: Wed, 28 May 2008 15:07:48 +0100 +From: "Tim Harris" +To: "Matt Benjamin" +Subject: Re: MCAS licensing +Cc: keir.fraser@cl.cam.ac.uk +In-Reply-To: <483D640E.5090502@linuxbox.com> +MIME-Version: 1.0 +Content-Type: multipart/alternative; + boundary="----=_Part_9134_21622973.1211983668503" +References: <483D640E.5090502@linuxbox.com> +X-Greylist: Sender is SPF-compliant, not delayed by milter-greylist-2.0.2 (aa.linuxbox.com [134.215.213.37]); Wed, 28 May 2008 10:08:19 -0400 (EDT) + +------=_Part_9134_21622973.1211983668503 +Content-Type: text/plain; charset=ISO-8859-1 +Content-Transfer-Encoding: 7bit +Content-Disposition: inline + +Hi, + +I'm personally happy making it available under a different license. +However, +most of this work (and almost all the actual implementation) was by Keir +Fraser, so he'll need to OK it as well. + +Keir, if its OK with you then can we replace the copy of lock-free-lib at +http://www.cl.cam.ac.uk/research/srg/netos/lock-free/src/lockfree-lib.tar.gz +with one under a BSD-derived license? + +Thanks, + +Tim + + + + +On Wed, May 28, 2008 at 2:54 PM, Matt Benjamin wrote: + +> -----BEGIN PGP SIGNED MESSAGE----- +> Hash: SHA256 +> +> Hi Tim, +> +> Thank you (and colleagues) for your work. +> +> I work on a large open-source project (OpenAFS) whose license is, +> unfortunately, not GPL, and not viral. It seems like an outside chance, +> but we're experimenting with lock-free data structures in some +> subsystems, and it would be interesting to us if there were a possiblity +> of getting a product/project-specific license exemption allowing us to +> use MCAS. (It would be nice to be using a vetted, free library.) +> +> Thanks for your consideration, +> +> Matt +> +> - -- +> +> Matt Benjamin +> +> The Linux Box +> 206 South Fifth Ave. Suite 150 +> Ann Arbor, MI 48104 +> +> http://linuxbox.com +> +> tel. 734-761-4689 +> fax. 734-769-8938 +> cel. 734-216-5309 +> +> -----BEGIN PGP SIGNATURE----- +> Version: GnuPG v1.4.7 (GNU/Linux) +> Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org +> +> iD8DBQFIPWQOJiSUUSaRdSURCAMDAKCH4PPrl5TXtQj20oIZtwYs8p49qwCbBsXp +> Ha1vEHYNNIW5dxiet2bSFNo= +> =0ArW +> -----END PGP SIGNATURE----- +> + +------=_Part_9134_21622973.1211983668503 +Content-Type: text/html; charset=ISO-8859-1 +Content-Transfer-Encoding: 7bit +Content-Disposition: inline + +
Hi,
+
 
+
I'm personally happy making it available under a different license.  However,
+
most of this work (and almost all the actual implementation) was by Keir
+
Fraser, so he'll need to OK it as well.
+
 
+
Keir, if its OK with you then can we replace the copy of lock-free-lib at
+ +
with one under a BSD-derived license?
+
 
+
Thanks,
+
 
+
Tim
+
 
+


 
+
On Wed, May 28, 2008 at 2:54 PM, Matt Benjamin <matt@linuxbox.com> wrote:
+
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA256

Hi Tim,

Thank you (and colleagues) for your work.
+
I work on a large open-source project (OpenAFS) whose license is,
unfortunately, not GPL, and not viral.  It seems like an outside chance,
but we're experimenting with lock-free data structures in some
subsystems, and it would be interesting to us if there were a possiblity
+of getting a product/project-specific license exemption allowing us to
use MCAS.  (It would be nice to be using a vetted, free library.)

Thanks for your consideration,

Matt

- --

Matt Benjamin
+
The Linux Box
206 South Fifth Ave. Suite 150
Ann Arbor, MI  48104

http://linuxbox.com

tel. 734-761-4689
fax. 734-769-8938
cel. 734-216-5309
+
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.7 (GNU/Linux)
Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org

iD8DBQFIPWQOJiSUUSaRdSURCAMDAKCH4PPrl5TXtQj20oIZtwYs8p49qwCbBsXp
+Ha1vEHYNNIW5dxiet2bSFNo=
=0ArW
-----END PGP SIGNATURE-----

+ +------=_Part_9134_21622973.1211983668503-- diff --git a/src/mcas/alpha_defns.h b/src/mcas/alpha_defns.h new file mode 100644 index 000000000..f74558cf7 --- /dev/null +++ b/src/mcas/alpha_defns.h @@ -0,0 +1,90 @@ +#ifndef __ALPHA_DEFNS_H__ +#define __ALPHA_DEFNS_H__ + +#include +#include +#include + +#ifndef ALPHA +#define ALPHA +#endif + +#define CACHE_LINE_SIZE 64 + + +/* + * I. Compare-and-swap, fetch-and-store. + */ + +#define FAS32(_x,_n) asm ( \ + "1: ldl_l %v0, 0(%a0);" \ + " bis %a1, 0, %t0;" \ + " stl_c %t0, 0(%a0);" \ + " beq %t0, 1b;", (_x), (_n)) +#define FAS64(_x,_n) asm ( \ + "1: ldq_l %v0, 0(%a0);" \ + " bis %a1, 0, %t0;" \ + " stq_c %t0, 0(%a0);" \ + " beq %t0, 1b;", (_x), (_n)) +#define CAS32(_x,_o,_n) asm ( \ + "1: ldl_l %v0, 0(%a0);" \ + " cmpeq %v0, %a1, %t0;" \ + " beq %t0, 3f;" \ + " bis %a2, 0, %t0;" \ + " stl_c %t0, 0(%a0);" \ + " beq %t0, 1b;" \ + "3:", (_x), (_o), (_n)) +#define CAS64(_x,_o,_n) asm ( \ + "1: ldq_l %v0, 0(%a0);" \ + " cmpeq %v0, %a1, %t0;" \ + " beq %t0, 3f;" \ + " bis %a2, 0, %t0;" \ + " stq_c %t0, 0(%a0);" \ + " beq %t0, 1b;" \ + "3:", (_x), (_o), (_n)) +#define CAS(_x,_o,_n) ((sizeof (*_x) == 4)?CAS32(_x,_o,_n):CAS64(_x,_o,_n)) +#define FAS(_x,_n) ((sizeof (*_x) == 4)?FAS32(_x,_n) :FAS64(_x,_n)) +/* Update Integer location, return Old value. */ +#define CASIO(_x,_o,_n) CAS(_x,_o,_n) +#define FASIO(_x,_n) FAS(_x,_n) +/* Update Pointer location, return Old value. */ +#define CASPO(_x,_o,_n) (void*)CAS((_x),(void*)(_o),(void*)(_n)) +#define FASPO(_x,_n) (void*)FAS((_x),(void*)(_n)) +#define CAS32O CAS32 +#define CAS64O CAS64 + +/* + * II. Memory barriers. + * WMB(): All preceding write operations must commit before any later writes. + * RMB(): All preceding read operations must commit before any later reads. + * MB(): All preceding memory accesses must commit before any later accesses. + * + * If the compiler does not observe these barriers (but any sane compiler + * will!), then VOLATILE should be defined as 'volatile'. + */ + +#define MB() asm("mb") +#define WMB() asm("wmb") +#define RMB() (MB()) +#define VOLATILE /*volatile*/ + + +/* + * III. Cycle counter access. + */ + +#include +typedef unsigned long tick_t; +#define RDTICK() asm("rpcc %v0") + + +/* + * IV. Types. + */ + +typedef unsigned char _u8; +typedef unsigned short _u16; +typedef unsigned int _u32; +typedef unsigned long _u64; + +#endif /* __ALPHA_DEFNS_H__ */ diff --git a/src/mcas/bst_lock_fraser.c b/src/mcas/bst_lock_fraser.c new file mode 100644 index 000000000..b90dcb5c4 --- /dev/null +++ b/src/mcas/bst_lock_fraser.c @@ -0,0 +1,414 @@ +/****************************************************************************** + * bst_lock_fraser.c + * + * Lock-free binary serach trees (BSTs), based on per-node spinlocks. + * Uses threaded tree presentation as described in my PhD dissertation: + * "Practical Lock-Freedom", University of Cambridge, 2003. + * + * Copyright (c) 2002-2003, K A Fraser + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. Neither the name of the Keir Fraser + * nor the names of its contributors may be used to endorse or + * promote products derived from this software without specific + * prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#define __SET_IMPLEMENTATION__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "portable_defns.h" +#include "gc.h" +#include "set.h" + +#define MARK_THREAD 1 +#define THREAD(_p) ((node_t *)((int_addr_t)(_p)|(MARK_THREAD))) +#define UNTHREAD(_p) ((node_t *)((int_addr_t)(_p)&~MARK_THREAD)) +#define IS_THREAD(_p) ((int)((int_addr_t)(_p)&MARK_THREAD)) + +#define IS_GARBAGE(_n) ((_n)->v == NULL) + +typedef struct node_st node_t; +typedef struct set_st set_t; + +struct node_st +{ + setkey_t k; + setval_t v; + node_t *l, *r; + mcs_lock_t lock; +}; + +struct set_st +{ + node_t root; + node_t sentinel; +}; + +static int gc_id; + +/* We use these flags to determine whch nodes are currently locked. */ +#define P_LOCKED 0x01 +#define N_LOCKED 0x02 +#define PAL_LOCKED 0x04 +#define PAR_LOCKED 0x08 +#define AL_LOCKED 0x10 +#define AR_LOCKED 0x20 + +#define LOCK(_n, _qn, _flag) \ + do { \ + mcs_lock(&(_n)->lock, &(_qn)); \ + if ( IS_GARBAGE(_n) ) { \ + mcs_unlock(&(_n)->lock, &(_qn)); \ + goto retry; \ + } \ + lock_flags |= (_flag); \ + } while ( 0 ) + +#define UNLOCK(_n, _qn, _flag) \ + do { \ + if ( (lock_flags & (_flag)) ) \ + mcs_unlock(&(_n)->lock, &(_qn)); \ + } while ( 0 ) + + +/* + * Search for node with key == k. Return NULL if none such, else ptr to node. + * @ppn is filled in with parent node, or closest leaf if no match. + * p and n will both be unmarked and adjacent on return. + */ +static node_t *search(set_t *s, setkey_t k, node_t **ppn) +{ + node_t *p, *n, *c; + + retry: + p = &s->root; + n = p->r; + + while ( !IS_THREAD(n) ) + { + if ( k < n->k ) { + c = n->l; + assert(UNTHREAD(c)->k < n->k); + } else if ( k > n->k ) { + c = n->r; + assert(UNTHREAD(c)->k > n->k); + } else /* k == n->k */ + goto found; + + p = n; n = c; + } + + /* Follow final thread, just in case. */ + c = UNTHREAD(n); + if ( k == c->k ) goto followed_thread; + + found: + if ( ppn ) *ppn = p; + return n; + + followed_thread: + if ( ppn ) { RMB(); goto retry; } + return c; +} + + +set_t *set_alloc(void) +{ + set_t *s; + + s = malloc(sizeof(*s)); + mcs_init(&s->root.lock); + s->root.k = SENTINEL_KEYMIN; + s->root.v = (setval_t)(~0UL); + s->root.l = THREAD(&s->root); + s->root.r = THREAD(&s->sentinel); + + mcs_init(&s->sentinel.lock); + s->sentinel.k = SENTINEL_KEYMAX; + + return s; +} + + +setval_t set_update(set_t *s, setkey_t k, setval_t v, int overwrite) +{ + setval_t ov; + node_t *p, *n, *new = NULL; + qnode_t qp, qn; + ptst_t *ptst; + int lock_flags, r = 0; + + k = CALLER_TO_INTERNAL_KEY(k); + + ptst = critical_enter(); + + do { + ov = NULL; + lock_flags = 0; + + n = search(s, k, &p); + + if ( !IS_THREAD(n) ) + { + LOCK(n, qn, N_LOCKED); + ov = n->v; + if ( overwrite ) n->v = v; + } + else + { + if ( new == NULL ) + { + new = gc_alloc(ptst, gc_id); + mcs_init(&new->lock); + new->k = k; + new->v = v; + } + + LOCK(p, qp, P_LOCKED); + + if ( p->k < k ) + { + if ( (p->r != n) || (UNTHREAD(n)->k < k) ) goto retry; + new->l = THREAD(p); + new->r = n; + WMB(); + p->r = new; + } + else + { + if ( (p->l != n) || (UNTHREAD(n)->k > k) ) goto retry; + new->l = n; + new->r = THREAD(p); + WMB(); + p->l = new; + } + + new = NULL; /* node is now in tree */ + } + + r = 1; /* success */ + + retry: + UNLOCK(p, qp, P_LOCKED); + UNLOCK(n, qn, N_LOCKED); + } + while ( !r ); + + if ( new ) gc_free(ptst, new, gc_id); + critical_exit(ptst); + return ov; +} + + +#define FIND_HELPER(_d1, _d2, _n, _ap, _a) \ +{ \ + node_t *ac; \ + (_ap) = NULL; \ + (_a) = (_n); \ + ac = (_a)->_d1; \ + while ( !IS_THREAD(ac) ) \ + { \ + (_ap) = (_a); \ + (_a) = ac; \ + ac = (_a)->_d2; \ + } \ +} + + +/* + * Order of first two cases does matter! If @n is the left-link of @p, then + * we use DELETE_HELPER(l, r). What matters is what we do when @n is a leaf. + * In this case we end up choosing n->l to propagate to p->l -- this + * happens to be the correct choice :-) + * + * NB. Note symmetric deletion cases dependent on parameter @dir. We + * could simplify the algorithm by always following one direction. In fact, + * that is slightly worse, or much worse, depending on the chosen case + * (hint: works best with dir hardwired to zero :-).... + */ +#define dir 0 +#define DELETE_HELPER(_d1, _d2) \ + FIND_HELPER(_d1, _d2, n, pal, al); \ + FIND_HELPER(_d2, _d1, n, par, ar); \ + if ( IS_THREAD(n ## _d2) ) \ + { \ + if ( IS_THREAD(n ## _d1) ) \ + { \ + *p_pc = n ## _d1; \ + } \ + else \ + { \ + LOCK(al, qal, AL_LOCKED); \ + if ( al->_d2 != THREAD(n) ) goto retry; \ + *p_pc = n ## _d1; \ + al->_d2 = n ## _d2; \ + } \ + } \ + else if ( IS_THREAD(n ## _d1) ) \ + { \ + LOCK(ar, qar, AR_LOCKED); \ + if ( ar->_d1 != THREAD(n) ) goto retry; \ + *p_pc = n ## _d2; \ + ar->_d1 = n ## _d1; \ + } \ + else if ( dir ) \ + { \ + if ( par != n ) \ + { \ + LOCK(par, qpar, PAR_LOCKED); \ + if ( par->_d1 != ar ) goto retry; \ + } \ + LOCK(al, qal, AL_LOCKED); \ + LOCK(ar, qar, AR_LOCKED); \ + if ( (al->_d2 != THREAD(n)) || (ar->_d1 != THREAD(n)) ) goto retry; \ + al->_d2 = THREAD(ar); \ + ar->_d1 = n ## _d1; \ + if ( par != n ) \ + { \ + ac = ar->_d2; \ + ar->_d2 = n ## _d2; \ + par->_d1 = IS_THREAD(ac) ? THREAD(ar) : ac; \ + } \ + WMB(); /* New links in AR must appear before it is raised. */ \ + *p_pc = ar; \ + } \ + else \ + { \ + if ( pal != n ) \ + { \ + LOCK(pal, qpal, PAL_LOCKED); \ + if ( pal->_d2 != al ) goto retry; \ + } \ + LOCK(al, qal, AL_LOCKED); \ + LOCK(ar, qar, AR_LOCKED); \ + if ( (al->_d2 != THREAD(n)) || (ar->_d1 != THREAD(n)) ) goto retry; \ + al->_d2 = n ## _d2; \ + ar->_d1 = THREAD(al); \ + if ( pal != n ) \ + { \ + ac = al->_d1; \ + al->_d1 = n ## _d1; \ + pal->_d2 = IS_THREAD(ac) ? THREAD(al) : ac; \ + } \ + WMB(); /* New links in AL must appear before it is raised. */ \ + *p_pc = al; \ + } + + +/* @k: key of node to be deleted */ +setval_t set_remove(set_t *s, setkey_t k) +{ + node_t *p, *n, *nl, *nr, *al, *ar, *pal, *par, *ac, **p_pc; + qnode_t qp, qn, qal, qar, qpal, qpar; + int r = 0, lock_flags; + setval_t v; + ptst_t *ptst; + + k = CALLER_TO_INTERNAL_KEY(k); + + ptst = critical_enter(); + + do { + v = NULL; + lock_flags = 0; + + n = search(s, k, &p); + if ( IS_THREAD(n) ) goto out; + + LOCK(p, qp, P_LOCKED); + p_pc = (p->k > n->k) ? &p->l : &p->r; + if ( *p_pc != n ) goto retry; + + LOCK(n, qn, N_LOCKED); + + nl = n->l; + nr = n->r; + + if ( p->k > n->k ) + { + /* @n is leftwards link from @p. */ + DELETE_HELPER(l, r); + } + else + { + /* @n is rightwards link from @p. */ + DELETE_HELPER(r, l); + } + + r = 1; + v = n->v; + n->v = NULL; + + retry: + UNLOCK(p, qp, P_LOCKED); + UNLOCK(n, qn, N_LOCKED); + UNLOCK(pal, qpal, PAL_LOCKED); + UNLOCK(par, qpar, PAR_LOCKED); + UNLOCK(al, qal, AL_LOCKED); + UNLOCK(ar, qar, AR_LOCKED); + } + while ( !r ); + + gc_free(ptst, n, gc_id); + + out: + critical_exit(ptst); + return v; +} + + +setval_t set_lookup(set_t *s, setkey_t k) +{ + node_t *n; + setval_t v; + ptst_t *ptst; + + k = CALLER_TO_INTERNAL_KEY(k); + + ptst = critical_enter(); + + n = search(s, k, NULL); + v = (!IS_THREAD(n)) ? n->v : NULL; + + critical_exit(ptst); + return v; +} + + +void _init_set_subsystem(void) +{ + gc_id = gc_add_allocator(sizeof(node_t)); +} diff --git a/src/mcas/bst_lock_kung.c b/src/mcas/bst_lock_kung.c new file mode 100644 index 000000000..dedc62916 --- /dev/null +++ b/src/mcas/bst_lock_kung.c @@ -0,0 +1,372 @@ +/****************************************************************************** + * bst_lock_kung.c + * + * Lock-based binary search trees (BSTs), based on: + * H. T. Kung and Philip L. Lehman. + * "Concurrent manipulation of binary search trees". + * ACM Tranactions on Database Systems, Vol. 5, No. 3, September 1980. + * + * Copyright (c) 2002-2003, K A Fraser + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. Neither the name of the Keir Fraser + * nor the names of its contributors may be used to endorse or + * promote products derived from this software without specific + * prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#define __SET_IMPLEMENTATION__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "portable_defns.h" +#include "gc.h" +#include "set.h" + +#define IS_BLUE(_n) ((int)(_n)->v & 1) +#define MK_BLUE(_n) ((_n)->v = (setval_t)((unsigned long)(_n)->v | 1)) + +#define GET_VALUE(_n) ((setval_t)((unsigned long)(_n)->v & ~1UL)) + +#define LEFT 0 +#define RIGHT 1 +#define FOLLOW(_n, _d) ((_d) ? (_n)->r : (_n)->l) +#define UPDATE(_n, _d, _x) ((_d) ? ((_n)->r = (_x)) : ((_n)->l = (_x))) +#define FLIP(_d) ((_d)^1) + +typedef struct node_st node_t; +typedef struct set_st set_t; + +struct node_st +{ + setkey_t k; + setval_t v; + node_t *l, *r, *p; + mcs_lock_t lock; +}; + +struct set_st +{ + node_t root; +}; + +static int gc_id; + +#define LOCK(_n, _pqn) mcs_lock(&(_n)->lock, (_pqn)) +#define UNLOCK(_n, _pqn) mcs_unlock(&(_n)->lock, (_pqn)) + + +static node_t *weak_find(node_t *n, setkey_t k) +{ + while ( n != NULL ) + { + if ( n->k < k ) + n = n->r; + else if ( n->k > k ) + n = n->l; + else + break; + } + return n; +} + + +static node_t *find(node_t *n, setkey_t k, qnode_t *qn, int *pdir) +{ + int dir; + node_t *f, *s; + + s = n; + + do { + f = s; + retry: + if ( k < f->k ) + { + dir = LEFT; + s = f->l; + } + else + { + dir = RIGHT; + s = f->r; + } + } + while ( (s != NULL) && (s->k != k) ); + + LOCK(f, qn); + if ( IS_BLUE(f) ) + { + UNLOCK(f, qn); + f = f->p; + goto retry; + } + if ( s != FOLLOW(f, dir) ) + { + UNLOCK(f, qn); + goto retry; + } + + *pdir = dir; + return f; +} + + +static node_t *rotate(ptst_t *ptst, node_t *a, int dir1, + int dir2, node_t **pc, qnode_t *pqn[]) +{ + node_t *b = FOLLOW(a, dir1), *c = FOLLOW(b, dir2); + node_t *bp = gc_alloc(ptst, gc_id), *cp = gc_alloc(ptst, gc_id); + qnode_t c_qn; + + LOCK(c, &c_qn); + + memcpy(bp, b, sizeof(*b)); + memcpy(cp, c, sizeof(*c)); + + mcs_init(&bp->lock); + mcs_init(&cp->lock); + + LOCK(bp, pqn[3]); + LOCK(cp, pqn[2]); + + assert(!IS_BLUE(a)); + assert(!IS_BLUE(b)); + assert(!IS_BLUE(c)); + + UPDATE(cp, FLIP(dir2), bp); + UPDATE(bp, dir2, FOLLOW(c, FLIP(dir2))); + + UPDATE(a, dir1, cp); + b->p = a; + MK_BLUE(b); + c->p = cp; + MK_BLUE(c); + + gc_free(ptst, b, gc_id); + gc_free(ptst, c, gc_id); + + UNLOCK(a, pqn[0]); + UNLOCK(b, pqn[1]); + UNLOCK(c, &c_qn); + + *pc = bp; + return cp; +} + + +static void _remove(ptst_t *ptst, node_t *a, int dir1, int dir2, qnode_t **pqn) +{ + node_t *b = FOLLOW(a, dir1), *c = FOLLOW(b, dir2); + assert(FOLLOW(b, FLIP(dir2)) == NULL); + assert(!IS_BLUE(a)); + assert(!IS_BLUE(b)); + UPDATE(a, dir1, c); + UPDATE(b, FLIP(dir2), c); + b->p = a; + MK_BLUE(b); + gc_free(ptst, b, gc_id); + UNLOCK(a, pqn[0]); + UNLOCK(b, pqn[1]); +} + + +static void delete_by_rotation(ptst_t *ptst, node_t *f, int dir, + qnode_t *pqn[], int lock_idx) +{ + node_t *g, *h, *s = FOLLOW(f, dir); + + if ( s->v != NULL ) + { + UNLOCK(f, pqn[lock_idx+0]); + UNLOCK(s, pqn[lock_idx+1]); + return; + } + + if ( s->l == NULL ) + _remove(ptst, f, dir, RIGHT, pqn+lock_idx); + else if ( s->r == NULL ) + _remove(ptst, f, dir, LEFT, pqn+lock_idx); + else + { + g = rotate(ptst, f, dir, LEFT, &h, pqn+lock_idx); + lock_idx ^= 2; + if ( h->l == NULL ) + { + assert(h->v == NULL); + _remove(ptst, g, RIGHT, RIGHT, pqn+lock_idx); + } + else + { + delete_by_rotation(ptst, g, RIGHT, pqn, lock_idx); + LOCK(f, pqn[0]); + if ( (g != FOLLOW(f, dir)) || IS_BLUE(f) ) + { + UNLOCK(f, pqn[0]); + } + else + { + LOCK(g, pqn[1]); + /* + * XXX Check that there is a node H to be rotated up. + * This is missing from the original paper, and must surely + * be a bug (we lost all locks at previous delete_by_rotation, + * so we can't know the existence of G's children). + */ + if ( g->r != NULL ) + { + g = rotate(ptst, f, dir, RIGHT, &h, pqn); + UNLOCK(g, pqn[2]); + UNLOCK(h, pqn[3]); + } + else + { + UNLOCK(f, pqn[0]); + UNLOCK(g, pqn[1]); + } + } + } + } +} + + +set_t *set_alloc(void) +{ + set_t *s; + + s = malloc(sizeof(*s)); + mcs_init(&s->root.lock); + s->root.k = SENTINEL_KEYMIN; + s->root.v = (setval_t)(~1UL); /* dummy root node is white. */ + s->root.l = NULL; + s->root.r = NULL; + + return s; +} + + +setval_t set_update(set_t *s, setkey_t k, setval_t v, int overwrite) +{ + node_t *f, *w; + qnode_t f_qn, w_qn; + int dir; + setval_t ov = NULL; + ptst_t *ptst; + + k = CALLER_TO_INTERNAL_KEY(k); + + ptst = critical_enter(); + + retry: + f = find(&s->root, k, &f_qn, &dir); + + if ( (w = FOLLOW(f, dir)) != NULL ) + { + /* Protected by parent lock. */ + assert(!IS_BLUE(w)); + ov = w->v; + if ( overwrite || (ov == NULL) ) w->v = v; + } + else + { + w = gc_alloc(ptst, gc_id); + w->l = NULL; + w->r = NULL; + w->v = v; + w->k = k; + mcs_init(&w->lock); + UPDATE(f, dir, w); + } + + UNLOCK(f, &f_qn); + + critical_exit(ptst); + + return ov; +} + + +setval_t set_remove(set_t *s, setkey_t k) +{ + node_t *f, *w; + qnode_t qn[4], *pqn[] = { qn+0, qn+1, qn+2, qn+3, qn+0, qn+1 }; + int dir; + setval_t v = NULL; + ptst_t *ptst; + + k = CALLER_TO_INTERNAL_KEY(k); + + ptst = critical_enter(); + + f = find(&s->root, k, pqn[0], &dir); + if ( (w = FOLLOW(f, dir)) != NULL ) + { + LOCK(w, pqn[1]); + v = w->v; + w->v = NULL; + assert(!IS_BLUE(w)); + delete_by_rotation(ptst, f, dir, pqn, 0); + } + else + { + UNLOCK(f, pqn[0]); + } + + critical_exit(ptst); + + return v; +} + + +setval_t set_lookup(set_t *s, setkey_t k) +{ + node_t *n; + setval_t v = NULL; + ptst_t *ptst; + + k = CALLER_TO_INTERNAL_KEY(k); + + ptst = critical_enter(); + + n = weak_find(&s->root, k); + if ( n != NULL ) v = GET_VALUE(n); + + critical_exit(ptst); + return v; +} + + +void _init_set_subsystem(void) +{ + gc_id = gc_add_allocator(sizeof(node_t)); +} diff --git a/src/mcas/bst_lock_manber.c b/src/mcas/bst_lock_manber.c new file mode 100644 index 000000000..9b381b03e --- /dev/null +++ b/src/mcas/bst_lock_manber.c @@ -0,0 +1,411 @@ +/****************************************************************************** + * bst_lock_manber.c + * + * Lock-based binary search trees (BSTs), based on: + * Udi Manber and Richard E. Ladner. + * "Concurrency control in a dynamic search structure". + * ACM Transactions on Database Systems, Vol. 9, No. 3, September 1984. + * + * Copyright (c) 2002-2003, K A Fraser +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. Neither the name of the Keir Fraser + * nor the names of its contributors may be used to endorse or + * promote products derived from this software without specific + * prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#define __SET_IMPLEMENTATION__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "portable_defns.h" +#include "gc.h" +#include "set.h" + +#define GARBAGE_FLAG 1 +#define REDUNDANT_FLAG 2 + +#define IS_GARBAGE(_n) ((int)(_n)->v & GARBAGE_FLAG) +#define MK_GARBAGE(_n) \ + ((_n)->v = (setval_t)((unsigned long)(_n)->v | GARBAGE_FLAG)) + +#define IS_REDUNDANT(_n) ((int)(_n)->v & REDUNDANT_FLAG) +#define MK_REDUNDANT(_n) \ + ((_n)->v = (setval_t)((unsigned long)(_n)->v | REDUNDANT_FLAG)) + +#define GET_VALUE(_n) ((setval_t)((unsigned long)(_n)->v & ~3UL)) + +#define FOLLOW(_n, _k) (((_n)->k < (_k)) ? (_n)->r : (_n)->l) + +typedef struct node_st node_t; +typedef struct set_st set_t; + +struct node_st +{ + setkey_t k; + setval_t v; + node_t *l, *r, *p; + int copy; + mcs_lock_t lock; +}; + +struct set_st +{ + node_t root; +}; + +static int gc_id, hook_id; + +#define LOCK(_n, _pqn) mcs_lock(&(_n)->lock, (_pqn)) +#define UNLOCK(_n, _pqn) mcs_unlock(&(_n)->lock, (_pqn)) + + +static node_t *weak_search(node_t *n, setkey_t k) +{ + while ( (n != NULL) && (n->k != k) ) n = FOLLOW(n, k); + return n; +} + + +static node_t *strong_search(node_t *n, setkey_t k, qnode_t *qn) +{ + node_t *b = n; + node_t *a = FOLLOW(b, k); + + retry: + while ( (a != NULL) && (a->k != k) ) + { + b = a; + a = FOLLOW(a, k); + } + + if ( a == NULL ) + { + LOCK(b, qn); + if ( IS_GARBAGE(b) ) + { + UNLOCK(b, qn); + a = b->p; + goto retry; + } + else if ( (a = FOLLOW(b, k)) != NULL ) + { + UNLOCK(b, qn); + goto retry; + } + + a = b; + } + else + { + LOCK(a, qn); + if ( IS_GARBAGE(a) ) + { + UNLOCK(a, qn); + a = a->p; + goto retry; + } + else if ( IS_REDUNDANT(a) ) + { + UNLOCK(a, qn); + a = a->r; + goto retry; + } + } + + return a; +} + + +static void redundancy_removal(ptst_t *ptst, void *x) +{ + node_t *d, *e, *r; + qnode_t d_qn, e_qn; + setkey_t k; + + if ( x == NULL ) return; + + e = x; + k = e->k; + + if ( e->copy ) + { + r = weak_search(e->l, k); + assert((r == NULL) || !IS_REDUNDANT(r) || (r->r == e)); + assert(r != e); + redundancy_removal(ptst, r); + } + + do { + if ( IS_GARBAGE(e) ) return; + d = e->p; + LOCK(d, &d_qn); + if ( IS_GARBAGE(d) ) UNLOCK(d, &d_qn); + } + while ( IS_GARBAGE(d) ); + + LOCK(e, &e_qn); + + if ( IS_GARBAGE(e) || !IS_REDUNDANT(e) ) goto out_de; + + if ( d->l == e ) + { + d->l = e->l; + } + else + { + assert(d->r == e); + d->r = e->l; + } + + assert(e->r != NULL); + assert(e->r->k == k); + assert(e->r->copy); + assert(!IS_GARBAGE(e->r)); + assert(!e->copy); + + MK_GARBAGE(e); + + if ( e->l != NULL ) e->l->p = d; + + e->r->copy = 0; + + gc_free(ptst, e, gc_id); + + out_de: + UNLOCK(d, &d_qn); + UNLOCK(e, &e_qn); +} + + +/* NB. Node X is not locked on entry. */ +static void predecessor_substitution(ptst_t *ptst, set_t *s, node_t *x) +{ + node_t *a, *b, *e, *f, **pac; + qnode_t a_qn, b_qn, e_qn, f_qn; + setkey_t k; + + b = x; + k = x->k; + + do { + if ( (b == NULL) || (b->v != NULL) ) return; + a = b->p; + LOCK(a, &a_qn); + if ( IS_GARBAGE(a) ) UNLOCK(a, &a_qn); + } + while ( IS_GARBAGE(a) ); + + regain_lock: + LOCK(b, &b_qn); + + /* + * We do nothing if: + * 1. The node is already deleted (and is thus garbage); or + * 2. The node is redundant (redundancy removal will do it); or + * 3. The node has been reused. + * These can all be checked by looking at the value field. + */ + if ( b->v != NULL ) goto out_ab; + + /* + * If this node is a copy, then we can do redundancy removal right now. + * This is an improvement over Manber and Ladner's work. + */ + if ( b->copy ) + { + e = weak_search(b->l, k); + UNLOCK(b, &b_qn); + assert((e == NULL) || !IS_REDUNDANT(e) || (e->r == b)); + assert(e != b); + redundancy_removal(ptst, e); + goto regain_lock; + } + + pac = (a->k < k) ? &a->r : &a->l; + assert(*pac == b); + assert(b->p == a); + + if ( (b->l == NULL) || (b->r == NULL) ) + { + if ( b->r == NULL ) *pac = b->l; else *pac = b->r; + MK_GARBAGE(b); + if ( *pac != NULL ) (*pac)->p = a; + gc_free(ptst, b, gc_id); + goto out_ab; + } + else + { + e = strong_search(b->l, b->k, &e_qn); + assert(!IS_REDUNDANT(e) && !IS_GARBAGE(e) && (b != e)); + assert(e->k < b->k); + f = gc_alloc(ptst, gc_id); + f->k = e->k; + f->v = GET_VALUE(e); + f->copy = 1; + f->r = b->r; + f->l = b->l; + mcs_init(&f->lock); + LOCK(f, &f_qn); + + e->r = f; + MK_REDUNDANT(e); + *pac = f; + f->p = a; + f->r->p = f; + f->l->p = f; + + MK_GARBAGE(b); + gc_free(ptst, b, gc_id); + gc_add_ptr_to_hook_list(ptst, e, hook_id); + UNLOCK(e, &e_qn); + UNLOCK(f, &f_qn); + } + + out_ab: + UNLOCK(a, &a_qn); + UNLOCK(b, &b_qn); +} + + +set_t *set_alloc(void) +{ + set_t *s; + + s = malloc(sizeof(*s)); + mcs_init(&s->root.lock); + s->root.k = SENTINEL_KEYMIN; + /* Dummy root isn't redundant, nor is it garbage. */ + s->root.v = (setval_t)(~3UL); + s->root.l = NULL; + s->root.r = NULL; + s->root.p = NULL; + s->root.copy = 0; + + return s; +} + + +setval_t set_update(set_t *s, setkey_t k, setval_t v, int overwrite) +{ + node_t *a, *new; + qnode_t qn; + setval_t ov = NULL; + ptst_t *ptst; + + k = CALLER_TO_INTERNAL_KEY(k); + + ptst = critical_enter(); + + a = strong_search(&s->root, k, &qn); + if ( a->k != k ) + { + new = gc_alloc(ptst, gc_id); + mcs_init(&new->lock); + new->k = k; + new->v = v; + new->l = NULL; + new->r = NULL; + new->p = a; + new->copy = 0; + if ( a->k < k ) a->r = new; else a->l = new; + } + else + { + /* Direct A->V access is okay, as A isn't garbage or redundant. */ + ov = a->v; + if ( overwrite || (ov == NULL) ) a->v = v; + } + + UNLOCK(a, &qn); + + critical_exit(ptst); + + return ov; +} + + +setval_t set_remove(set_t *s, setkey_t k) +{ + node_t *a; + qnode_t qn; + setval_t v = NULL; + ptst_t *ptst; + + k = CALLER_TO_INTERNAL_KEY(k); + + ptst = critical_enter(); + + a = strong_search(&s->root, k, &qn); + /* Direct check of A->V is okay, as A isn't garbage or redundant. */ + if ( (a->k == k) && (a->v != NULL) ) + { + v = a->v; + a->v = NULL; + UNLOCK(a, &qn); + predecessor_substitution(ptst, s, a); + } + else + { + UNLOCK(a, &qn); + } + + critical_exit(ptst); + + return v; +} + + +setval_t set_lookup(set_t *s, setkey_t k) +{ + node_t *n; + setval_t v = NULL; + ptst_t *ptst; + + k = CALLER_TO_INTERNAL_KEY(k); + + ptst = critical_enter(); + + n = weak_search(&s->root, k); + if ( n != NULL ) v = GET_VALUE(n); + + critical_exit(ptst); + return v; +} + + +void _init_set_subsystem(void) +{ + gc_id = gc_add_allocator(sizeof(node_t)); + hook_id = gc_add_hook(redundancy_removal); +} diff --git a/src/mcas/bst_mcas.c b/src/mcas/bst_mcas.c new file mode 100644 index 000000000..c698ccc8e --- /dev/null +++ b/src/mcas/bst_mcas.c @@ -0,0 +1,436 @@ +/****************************************************************************** + * bst_mcas.c + * + * Lock-free binary search trees (BSTs), based on MCAS. + * Uses a threaded representation to synchronise searches with deletions. + * + * Copyright (c) 2002-2003, K A Fraser + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. Neither the name of the Keir Fraser + * nor the names of its contributors may be used to endorse or + * promote products derived from this software without specific + * prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#define __SET_IMPLEMENTATION__ + +#include +#include +#include +#include "portable_defns.h" +#include "gc.h" +#include "set.h" + +/* Allow MCAS marks to be detected using a single bitop (see IS_MCAS_OWNED). */ +#define MARK_IN_PROGRESS 2 +#define MARK_PTR_TO_CD 3 + +#define MARK_THREAD 1 +#define MARK_GARBAGE 4 + +#define THREAD(_p) ((node_t *)((int_addr_t)(_p)|(MARK_THREAD))) +#define GARBAGE(_p) ((node_t *)((int_addr_t)(_p)|(MARK_GARBAGE))) +#define UNTHREAD(_p) ((node_t *)((int_addr_t)(_p)&~MARK_THREAD)) +#define UNGARBAGE(_p) ((node_t *)((int_addr_t)(_p)&~MARK_GARBAGE)) +/* Following only matches 2 and 3 (mod 4). Those happen to be MCAS marks :) */ +#define IS_MCAS_OWNED(_p) ((int)((int_addr_t)(_p)&2)) +/* Matches 1 and 3 (mod 4). So only use if the ref is *not* owned by MCAS!! */ +#define IS_THREAD(_p) ((int)((int_addr_t)(_p)&MARK_THREAD)) +/* Only use if the ref is *not* owned by MCAS (which may use bit 2)!! */ +#define IS_GARBAGE(_p) ((int)((int_addr_t)(_p)&MARK_GARBAGE)) + +#include "mcas.c" + +typedef struct node_st node_t; +typedef struct set_st set_t; + +struct node_st +{ + setkey_t k; + setval_t v; + node_t *l, *r; +}; + +struct set_st +{ + node_t root; + node_t sentinel; +}; + +static int gc_id; + +#define READ_LINK(_var, _link) \ + do { \ + (_var) = (_link); \ + if ( !IS_MCAS_OWNED(_var) ) break; \ + mcas_fixup((void **)&(_link), (_var)); \ + } while ( 1 ) + +#define WEAK_READ_LINK(_var, _link) \ + do { \ + READ_LINK(_var, _link); \ + (_var) = UNGARBAGE(_var); \ + } while ( 0 ) + +#define STRONG_READ_LINK(_var, _link) \ + do { \ + READ_LINK(_var, _link); \ + if ( IS_GARBAGE(_var) ) goto retry; \ + } while ( 0 ) + +#define PROCESS_VAL(_v,_pv) \ + do { \ + while ( IS_MCAS_OWNED(_v) ) \ + { \ + mcas_fixup((void **)(_pv), (_v)); \ + (_v) = *(_pv); \ + } \ + } while ( 0 ) + + +/* + * Search for node with key == k. Return NULL if none such, else ptr to node. + * @ppn is filled in with parent node, or closest leaf if no match. + * p and n will both be unmarked and adjacent on return. + */ +static node_t *search(set_t *s, setkey_t k, node_t **ppn) +{ + node_t *p, *n, *c; + + retry: + p = &s->root; + WEAK_READ_LINK(n, p->r); + + while ( !IS_THREAD(n) ) + { + if ( k < n->k ) { + WEAK_READ_LINK(c, n->l); + assert(UNTHREAD(c)->k < n->k); + } else if ( k > n->k ) { + WEAK_READ_LINK(c, n->r); + assert(UNTHREAD(c)->k > n->k); + } else /* k == n->k */ + goto found; + + p = n; n = c; + } + + /* Follow final thread, just in case. */ + c = UNTHREAD(n); + if ( k == c->k ) goto followed_thread; + + found: + if ( ppn ) *ppn = p; + return n; + + followed_thread: + if ( ppn ) { RMB(); goto retry; } + return c; +} + + +set_t *set_alloc(void) +{ + set_t *s; + + static int mcas_inited = 0; + if ( !CASIO(&mcas_inited, 0, 1) ) + { + if ( (sizeof(node_t) % 8) != 0 ) + { + fprintf(stderr, "FATAL: node_t must be multiple of 8 bytes\n"); + *((int*)0)=0; + } + mcas_init(); + } + + s = malloc(sizeof(*s)); + s->root.k = SENTINEL_KEYMIN; + s->root.v = NULL; + s->root.l = THREAD(&s->root); + s->root.r = THREAD(&s->sentinel); + + s->sentinel.k = SENTINEL_KEYMAX; + + return s; +} + + +setval_t set_update(set_t *s, setkey_t k, setval_t v, int overwrite) +{ + setval_t ov, nov; + node_t *p, *n, *new = NULL, **ppc; + ptst_t *ptst; + + k = CALLER_TO_INTERNAL_KEY(k); + + ptst = critical_enter(); + + do { + retry: + ov = NULL; + + n = search(s, k, &p); + if ( !IS_THREAD(n) ) + { + /* Already a @k node in the set: update its mapping. */ + nov = n->v; + do { + ov = nov; + PROCESS_VAL(ov, &n->v); + if ( ov == NULL ) goto retry; + } + while ( overwrite && ((nov = CASPO(&n->v, ov, v)) != ov) ); + + goto out; + } + + if ( new == NULL ) + { + new = gc_alloc(ptst, gc_id); + new->k = k; + new->v = v; + } + + if ( p->k < k ) + { + /* Ensure we insert in the correct interval. */ + if ( UNTHREAD(n)->k < k ) goto retry; + new->l = THREAD(p); + new->r = n; + ppc = &p->r; + } + else + { + if ( UNTHREAD(n)->k > k ) goto retry; + new->l = n; + new->r = THREAD(p); + ppc = &p->l; + } + + WMB_NEAR_CAS(); + } + while ( CASPO(ppc, n, new) != n ); + + new = NULL; + + out: + if ( new ) gc_free(ptst, new, gc_id); + critical_exit(ptst); + return ov; +} + + +#define FIND_HELPER(_d1, _d2, _n, _ap, _a) \ +{ \ + node_t *ac; \ + (_ap) = NULL; \ + (_a) = (_n); \ + WEAK_READ_LINK(ac, (_a)->_d1); \ + while ( !IS_THREAD(ac) ) \ + { \ + (_ap) = (_a); \ + (_a) = ac; \ + WEAK_READ_LINK(ac, (_a)->_d2); \ + } \ +} + + +/* + * Order of first two cases does matter! If @n is the left-link of @p, then + * we use DELETE_HELPER(l, r). What matters is what we do when @n is a leaf. + * In this case we end up choosing n->l to propagate to p->l -- this + * happens to be the correct choice :-) + * + * NB. Note symmetric deletion cases dependent on parameter @dir. We + * could simplify the algorithm by always following one direction. In fact, + * that is slightly worse, or much worse, depending on the chosen case + * (hint: works best with dir hardwired to zero :-).... + */ +#define dir 0 +#define DELETE_HELPER(_d1, _d2) \ + FIND_HELPER(_d1, _d2, n, pal, al); \ + FIND_HELPER(_d2, _d1, n, par, ar); \ + if ( IS_THREAD(n ## _d2) ) \ + { \ + if ( IS_THREAD(n ## _d1) ) \ + { \ + r = mcas(4, \ + (void **)&n->v, v, NULL, \ + (void **)&n->l, nl, GARBAGE(nl), \ + (void **)&n->r, nr, GARBAGE(nr), \ + (void **)p_pc, n, n ## _d1); \ + } \ + else \ + { \ + if ( al == n ) goto retry; \ + r = mcas(5, \ + (void **)&n->v, v, NULL, \ + (void **)&n->l, nl, GARBAGE(nl), \ + (void **)&n->r, nr, GARBAGE(nr), \ + (void **)p_pc, n, n ## _d1, \ + (void **)&al->_d2, THREAD(n), n ## _d2); \ + } \ + } \ + else if ( IS_THREAD(n ## _d1) ) \ + { \ + if ( ar == n ) goto retry; \ + r = mcas(5, \ + (void **)&n->v, v, NULL, \ + (void **)&n->l, nl, GARBAGE(nl), \ + (void **)&n->r, nr, GARBAGE(nr), \ + (void **)p_pc, n, n ## _d2, \ + (void **)&ar->_d1, THREAD(n), n ## _d1); \ + } \ + else if ( dir ) \ + { \ + if ( (al == n) || (ar == n) ) goto retry; \ + if ( par == n ) \ + { \ + r = mcas(6, \ + (void **)&n->v, v, NULL, \ + (void **)&ar->_d1, THREAD(n), n ## _d1, \ + (void **)&al->_d2, THREAD(n), THREAD(ar), \ + (void **)&n->l, nl, GARBAGE(nl), \ + (void **)&n->r, nr, GARBAGE(nr), \ + (void **)p_pc, n, ar); \ + } \ + else \ + { \ + STRONG_READ_LINK(ac, ar->_d2); \ + r = mcas(8, \ + (void **)&n->v, v, NULL, \ + (void **)&par->_d1, ar, \ + (IS_THREAD(ac) ? THREAD(ar) : ac), \ + (void **)&ar->_d2, ac, n ## _d2, \ + (void **)&ar->_d1, THREAD(n), n ## _d1, \ + (void **)&al->_d2, THREAD(n), THREAD(ar), \ + (void **)&n->l, nl, GARBAGE(nl), \ + (void **)&n->r, nr, GARBAGE(nr), \ + (void **)p_pc, n, ar); \ + } \ + } \ + else \ + { \ + if ( (al == n) || (ar == n) ) goto retry; \ + if ( pal == n ) \ + { \ + r = mcas(6, \ + (void **)&n->v, v, NULL, \ + (void **)&al->_d2, THREAD(n), n ## _d2, \ + (void **)&ar->_d1, THREAD(n), THREAD(al), \ + (void **)&n->l, nl, GARBAGE(nl), \ + (void **)&n->r, nr, GARBAGE(nr), \ + (void **)p_pc, n, al); \ + } \ + else \ + { \ + STRONG_READ_LINK(ac, al->_d1); \ + r = mcas(8, \ + (void **)&n->v, v, NULL, \ + (void **)&pal->_d2, al, \ + (IS_THREAD(ac) ? THREAD(al) : ac), \ + (void **)&al->_d1, ac, n ## _d1, \ + (void **)&al->_d2, THREAD(n), n ## _d2, \ + (void **)&ar->_d1, THREAD(n), THREAD(al), \ + (void **)&n->l, nl, GARBAGE(nl), \ + (void **)&n->r, nr, GARBAGE(nr), \ + (void **)p_pc, n, al); \ + } \ + } + + +/* @k: key of node to be deleted */ +setval_t set_remove(set_t *s, setkey_t k) +{ + node_t *p, *n, *nl, *nr, *al, *ar, *pal, *par, *ac, **p_pc; + int r = 0; + setval_t v; + ptst_t *ptst; + + k = CALLER_TO_INTERNAL_KEY(k); + + ptst = critical_enter(); + + do + { + retry: + v = NULL; + + /* Node present? */ + n = search(s, k, &p); + if ( IS_THREAD(n) ) goto out; + + /* Already deleted? */ + v = n->v; + PROCESS_VAL(v, &n->v); + if ( v == NULL ) goto out; + + STRONG_READ_LINK(nl, n->l); + STRONG_READ_LINK(nr, n->r); + p_pc = (p->k > n->k) ? &p->l : &p->r; + + if ( p->k > n->k ) + { + /* @n is leftwards link from @p. */ + DELETE_HELPER(l, r); + } + else + { + /* @n is rightwards link from @p. */ + DELETE_HELPER(r, l); + } + } while ( !r ); + + gc_free(ptst, n, gc_id); + + out: + critical_exit(ptst); + return v; +} + + +setval_t set_lookup(set_t *s, setkey_t k) +{ + node_t *n; + setval_t v; + ptst_t *ptst; + + k = CALLER_TO_INTERNAL_KEY(k); + + ptst = critical_enter(); + + n = search(s, k, NULL); + v = (!IS_THREAD(n)) ? n->v : NULL; + PROCESS_VAL(v, &n->v); + + critical_exit(ptst); + return v; +} + + +void _init_set_subsystem(void) +{ + gc_id = gc_add_allocator(sizeof(node_t)); +} diff --git a/src/mcas/gc.c b/src/mcas/gc.c new file mode 100644 index 000000000..f5445c4b3 --- /dev/null +++ b/src/mcas/gc.c @@ -0,0 +1,671 @@ +/****************************************************************************** + * gc.c + * + * A fully recycling epoch-based garbage collector. Works by counting + * threads in and out of critical regions, to work out when + * garbage queues can be fully deleted. + * + * Copyright (c) 2001-2003, K A Fraser + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. Neither the name of the Keir Fraser + * nor the names of its contributors may be used to endorse or + * promote products derived from this software without specific + * prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include "portable_defns.h" +#include "gc.h" + +/*#define MINIMAL_GC*/ +/*#define YIELD_TO_HELP_PROGRESS*/ +#define PROFILE_GC + +/* Recycled nodes are filled with this value if WEAK_MEM_ORDER. */ +#define INVALID_BYTE 0 +#define INITIALISE_NODES(_p,_c) memset((_p), INVALID_BYTE, (_c)); + +/* Number of unique block sizes we can deal with. */ +#define MAX_SIZES 20 + +#define MAX_HOOKS 4 + +/* + * The initial number of allocation chunks for each per-blocksize list. + * Popular allocation lists will steadily increase the allocation unit + * in line with demand. + */ +#define ALLOC_CHUNKS_PER_LIST 10 + +/* + * How many times should a thread call gc_enter(), seeing the same epoch + * each time, before it makes a reclaim attempt? + */ +#define ENTRIES_PER_RECLAIM_ATTEMPT 100 + +/* + * 0: current epoch -- threads are moving to this; + * -1: some threads may still throw garbage into this epoch; + * -2: no threads can see this epoch => we can zero garbage lists; + * -3: all threads see zeros in these garbage lists => move to alloc lists. + */ +#ifdef WEAK_MEM_ORDER +#define NR_EPOCHS 4 +#else +#define NR_EPOCHS 3 +#endif + +/* + * A chunk amortises the cost of allocation from shared lists. It also + * helps when zeroing nodes, as it increases per-cacheline pointer density + * and means that node locations don't need to be brought into the cache + * (most architectures have a non-temporal store instruction). + */ +#define BLKS_PER_CHUNK 100 +typedef struct chunk_st chunk_t; +struct chunk_st +{ + chunk_t *next; /* chunk chaining */ + unsigned int i; /* the next entry in blk[] to use */ + void *blk[BLKS_PER_CHUNK]; +}; + +static struct gc_global_st +{ + CACHE_PAD(0); + + /* The current epoch. */ + VOLATILE unsigned int current; + CACHE_PAD(1); + + /* Exclusive access to gc_reclaim(). */ + VOLATILE unsigned int inreclaim; + CACHE_PAD(2); + + /* + * RUN-TIME CONSTANTS (to first approximation) + */ + + /* Memory page size, in bytes. */ + unsigned int page_size; + + /* Node sizes (run-time constants). */ + int nr_sizes; + int blk_sizes[MAX_SIZES]; + + /* Registered epoch hooks. */ + int nr_hooks; + hook_fn_t hook_fns[MAX_HOOKS]; + CACHE_PAD(3); + + /* + * DATA WE MAY HIT HARD + */ + + /* Chain of free, empty chunks. */ + chunk_t * VOLATILE free_chunks; + + /* Main allocation lists. */ + chunk_t * VOLATILE alloc[MAX_SIZES]; + VOLATILE unsigned int alloc_size[MAX_SIZES]; +#ifdef PROFILE_GC + VOLATILE unsigned int total_size; + VOLATILE unsigned int allocations; +#endif +} gc_global; + + +/* Per-thread state. */ +struct gc_st +{ + /* Epoch that this thread sees. */ + unsigned int epoch; + + /* Number of calls to gc_entry() since last gc_reclaim() attempt. */ + unsigned int entries_since_reclaim; + +#ifdef YIELD_TO_HELP_PROGRESS + /* Number of calls to gc_reclaim() since we last yielded. */ + unsigned int reclaim_attempts_since_yield; +#endif + + /* Used by gc_async_barrier(). */ + void *async_page; + int async_page_state; + + /* Garbage lists. */ + chunk_t *garbage[NR_EPOCHS][MAX_SIZES]; + chunk_t *garbage_tail[NR_EPOCHS][MAX_SIZES]; + chunk_t *chunk_cache; + + /* Local allocation lists. */ + chunk_t *alloc[MAX_SIZES]; + unsigned int alloc_chunks[MAX_SIZES]; + + /* Hook pointer lists. */ + chunk_t *hook[NR_EPOCHS][MAX_HOOKS]; +}; + + +#define MEM_FAIL(_s) \ +do { \ + fprintf(stderr, "OUT OF MEMORY: %d bytes at line %d\n", (_s), __LINE__); \ + exit(1); \ +} while ( 0 ) + + +/* Allocate more empty chunks from the heap. */ +#define CHUNKS_PER_ALLOC 1000 +static chunk_t *alloc_more_chunks(void) +{ + int i; + chunk_t *h, *p; + + h = p = ALIGNED_ALLOC(CHUNKS_PER_ALLOC * sizeof(*h)); + if ( h == NULL ) MEM_FAIL(CHUNKS_PER_ALLOC * sizeof(*h)); + + for ( i = 1; i < CHUNKS_PER_ALLOC; i++ ) + { + p->next = p + 1; + p++; + } + + p->next = h; + + return(h); +} + + +/* Put a chain of chunks onto a list. */ +static void add_chunks_to_list(chunk_t *ch, chunk_t *head) +{ + chunk_t *h_next, *new_h_next, *ch_next; + ch_next = ch->next; + new_h_next = head->next; + do { ch->next = h_next = new_h_next; WMB_NEAR_CAS(); } + while ( (new_h_next = CASPO(&head->next, h_next, ch_next)) != h_next ); +} + + +/* Allocate a chain of @n empty chunks. Pointers may be garbage. */ +static chunk_t *get_empty_chunks(int n) +{ + int i; + chunk_t *new_rh, *rh, *rt, *head; + + retry: + head = gc_global.free_chunks; + new_rh = head->next; + do { + rh = new_rh; + rt = head; + WEAK_DEP_ORDER_RMB(); + for ( i = 0; i < n; i++ ) + { + if ( (rt = rt->next) == head ) + { + /* Allocate some more chunks. */ + add_chunks_to_list(alloc_more_chunks(), head); + goto retry; + } + } + } + while ( (new_rh = CASPO(&head->next, rh, rt->next)) != rh ); + + rt->next = rh; + return(rh); +} + + +/* Get @n filled chunks, pointing at blocks of @sz bytes each. */ +static chunk_t *get_filled_chunks(int n, int sz) +{ + chunk_t *h, *p; + char *node; + int i; + +#ifdef PROFILE_GC + ADD_TO(gc_global.total_size, n * BLKS_PER_CHUNK * sz); + ADD_TO(gc_global.allocations, 1); +#endif + + node = ALIGNED_ALLOC(n * BLKS_PER_CHUNK * sz); + if ( node == NULL ) MEM_FAIL(n * BLKS_PER_CHUNK * sz); +#ifdef WEAK_MEM_ORDER + INITIALISE_NODES(node, n * BLKS_PER_CHUNK * sz); +#endif + + h = p = get_empty_chunks(n); + do { + p->i = BLKS_PER_CHUNK; + for ( i = 0; i < BLKS_PER_CHUNK; i++ ) + { + p->blk[i] = node; + node += sz; + } + } + while ( (p = p->next) != h ); + + return(h); +} + + +/* + * gc_async_barrier: Cause an asynchronous barrier in all other threads. We do + * this by causing a TLB shootdown to be propagated to all other processors. + * Each time such an action is required, this function calls: + * mprotect(async_page, , ) + * Each thread's state contains a memory page dedicated for this purpose. + */ +#ifdef WEAK_MEM_ORDER +static void gc_async_barrier(gc_t *gc) +{ + mprotect(gc->async_page, gc_global.page_size, + gc->async_page_state ? PROT_READ : PROT_NONE); + gc->async_page_state = !gc->async_page_state; +} +#else +#define gc_async_barrier(_g) ((void)0) +#endif + + +/* Grab a level @i allocation chunk from main chain. */ +static chunk_t *get_alloc_chunk(gc_t *gc, int i) +{ + chunk_t *alloc, *p, *new_p, *nh; + unsigned int sz; + + alloc = gc_global.alloc[i]; + new_p = alloc->next; + + do { + p = new_p; + while ( p == alloc ) + { + sz = gc_global.alloc_size[i]; + nh = get_filled_chunks(sz, gc_global.blk_sizes[i]); + ADD_TO(gc_global.alloc_size[i], sz >> 3); + gc_async_barrier(gc); + add_chunks_to_list(nh, alloc); + p = alloc->next; + } + WEAK_DEP_ORDER_RMB(); + } + while ( (new_p = CASPO(&alloc->next, p, p->next)) != p ); + + p->next = p; + assert(p->i == BLKS_PER_CHUNK); + return(p); +} + + +#ifndef MINIMAL_GC +/* + * gc_reclaim: Scans the list of struct gc_perthread looking for the lowest + * maximum epoch number seen by a thread that's in the list code. If it's the + * current epoch, the "nearly-free" lists from the previous epoch are + * reclaimed, and the epoch is incremented. + */ +static void gc_reclaim(void) +{ + ptst_t *ptst, *first_ptst, *our_ptst = NULL; + gc_t *gc = NULL; + unsigned long curr_epoch; + chunk_t *ch, *t; + int two_ago, three_ago, i, j; + + /* Barrier to entering the reclaim critical section. */ + if ( gc_global.inreclaim || CASIO(&gc_global.inreclaim, 0, 1) ) return; + + /* + * Grab first ptst structure *before* barrier -- prevent bugs + * on weak-ordered architectures. + */ + first_ptst = ptst_first(); + MB(); + curr_epoch = gc_global.current; + + /* Have all threads seen the current epoch, or not in mutator code? */ + for ( ptst = first_ptst; ptst != NULL; ptst = ptst_next(ptst) ) + { + if ( (ptst->count > 1) && (ptst->gc->epoch != curr_epoch) ) goto out; + } + + /* + * Three-epoch-old garbage lists move to allocation lists. + * Two-epoch-old garbage lists are cleaned out. + */ + two_ago = (curr_epoch+2) % NR_EPOCHS; + three_ago = (curr_epoch+1) % NR_EPOCHS; + if ( gc_global.nr_hooks != 0 ) + our_ptst = (ptst_t *)pthread_getspecific(ptst_key); + for ( ptst = first_ptst; ptst != NULL; ptst = ptst_next(ptst) ) + { + gc = ptst->gc; + + for ( i = 0; i < gc_global.nr_sizes; i++ ) + { +#ifdef WEAK_MEM_ORDER + int sz = gc_global.blk_sizes[i]; + if ( gc->garbage[two_ago][i] != NULL ) + { + chunk_t *head = gc->garbage[two_ago][i]; + ch = head; + do { + int j; + for ( j = 0; j < ch->i; j++ ) + INITIALISE_NODES(ch->blk[j], sz); + } + while ( (ch = ch->next) != head ); + } +#endif + + /* NB. Leave one chunk behind, as it is probably not yet full. */ + t = gc->garbage[three_ago][i]; + if ( (t == NULL) || ((ch = t->next) == t) ) continue; + gc->garbage_tail[three_ago][i]->next = ch; + gc->garbage_tail[three_ago][i] = t; + t->next = t; + add_chunks_to_list(ch, gc_global.alloc[i]); + } + + for ( i = 0; i < gc_global.nr_hooks; i++ ) + { + hook_fn_t fn = gc_global.hook_fns[i]; + ch = gc->hook[three_ago][i]; + if ( ch == NULL ) continue; + gc->hook[three_ago][i] = NULL; + + t = ch; + do { for ( j = 0; j < t->i; j++ ) fn(our_ptst, t->blk[j]); } + while ( (t = t->next) != ch ); + + add_chunks_to_list(ch, gc_global.free_chunks); + } + } + + /* Update current epoch. */ + WMB(); + gc_global.current = (curr_epoch+1) % NR_EPOCHS; + + out: + gc_global.inreclaim = 0; +} +#endif /* MINIMAL_GC */ + + +void *gc_alloc(ptst_t *ptst, int alloc_id) +{ + gc_t *gc = ptst->gc; + chunk_t *ch; + + ch = gc->alloc[alloc_id]; + if ( ch->i == 0 ) + { + if ( gc->alloc_chunks[alloc_id]++ == 100 ) + { + gc->alloc_chunks[alloc_id] = 0; + add_chunks_to_list(ch, gc_global.free_chunks); + gc->alloc[alloc_id] = ch = get_alloc_chunk(gc, alloc_id); + } + else + { + chunk_t *och = ch; + ch = get_alloc_chunk(gc, alloc_id); + ch->next = och->next; + och->next = ch; + gc->alloc[alloc_id] = ch; + } + } + + return ch->blk[--ch->i]; +} + + +static chunk_t *chunk_from_cache(gc_t *gc) +{ + chunk_t *ch = gc->chunk_cache, *p = ch->next; + + if ( ch == p ) + { + gc->chunk_cache = get_empty_chunks(100); + } + else + { + ch->next = p->next; + p->next = p; + } + + p->i = 0; + return(p); +} + + +void gc_free(ptst_t *ptst, void *p, int alloc_id) +{ +#ifndef MINIMAL_GC + gc_t *gc = ptst->gc; + chunk_t *prev, *new, *ch = gc->garbage[gc->epoch][alloc_id]; + + if ( ch == NULL ) + { + gc->garbage[gc->epoch][alloc_id] = ch = chunk_from_cache(gc); + gc->garbage_tail[gc->epoch][alloc_id] = ch; + } + else if ( ch->i == BLKS_PER_CHUNK ) + { + prev = gc->garbage_tail[gc->epoch][alloc_id]; + new = chunk_from_cache(gc); + gc->garbage[gc->epoch][alloc_id] = new; + new->next = ch; + prev->next = new; + ch = new; + } + + ch->blk[ch->i++] = p; +#endif +} + + +void gc_add_ptr_to_hook_list(ptst_t *ptst, void *ptr, int hook_id) +{ + gc_t *gc = ptst->gc; + chunk_t *och, *ch = gc->hook[gc->epoch][hook_id]; + + if ( ch == NULL ) + { + gc->hook[gc->epoch][hook_id] = ch = chunk_from_cache(gc); + } + else + { + ch = ch->next; + if ( ch->i == BLKS_PER_CHUNK ) + { + och = gc->hook[gc->epoch][hook_id]; + ch = chunk_from_cache(gc); + ch->next = och->next; + och->next = ch; + } + } + + ch->blk[ch->i++] = ptr; +} + + +void gc_unsafe_free(ptst_t *ptst, void *p, int alloc_id) +{ + gc_t *gc = ptst->gc; + chunk_t *ch; + + ch = gc->alloc[alloc_id]; + if ( ch->i < BLKS_PER_CHUNK ) + { + ch->blk[ch->i++] = p; + } + else + { + gc_free(ptst, p, alloc_id); + } +} + + +void gc_enter(ptst_t *ptst) +{ +#ifdef MINIMAL_GC + ptst->count++; + MB(); +#else + gc_t *gc = ptst->gc; + int new_epoch, cnt; + + retry: + cnt = ptst->count++; + MB(); + if ( cnt == 1 ) + { + new_epoch = gc_global.current; + if ( gc->epoch != new_epoch ) + { + gc->epoch = new_epoch; + gc->entries_since_reclaim = 0; +#ifdef YIELD_TO_HELP_PROGRESS + gc->reclaim_attempts_since_yield = 0; +#endif + } + else if ( gc->entries_since_reclaim++ == 100 ) + { + ptst->count--; +#ifdef YIELD_TO_HELP_PROGRESS + if ( gc->reclaim_attempts_since_yield++ == 10000 ) + { + gc->reclaim_attempts_since_yield = 0; + sched_yield(); + } +#endif + gc->entries_since_reclaim = 0; + gc_reclaim(); + goto retry; + } + } +#endif +} + + +void gc_exit(ptst_t *ptst) +{ + MB(); + ptst->count--; +} + + +gc_t *gc_init(void) +{ + gc_t *gc; + int i; + + gc = ALIGNED_ALLOC(sizeof(*gc)); + if ( gc == NULL ) MEM_FAIL(sizeof(*gc)); + memset(gc, 0, sizeof(*gc)); + +#ifdef WEAK_MEM_ORDER + /* Initialise shootdown state. */ + gc->async_page = mmap(NULL, gc_global.page_size, PROT_NONE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if ( gc->async_page == (void *)MAP_FAILED ) MEM_FAIL(gc_global.page_size); + gc->async_page_state = 1; +#endif + + gc->chunk_cache = get_empty_chunks(100); + + /* Get ourselves a set of allocation chunks. */ + for ( i = 0; i < gc_global.nr_sizes; i++ ) + { + gc->alloc[i] = get_alloc_chunk(gc, i); + } + for ( ; i < MAX_SIZES; i++ ) + { + gc->alloc[i] = chunk_from_cache(gc); + } + + return(gc); +} + + +int gc_add_allocator(int alloc_size) +{ + int ni, i = gc_global.nr_sizes; + while ( (ni = CASIO(&gc_global.nr_sizes, i, i+1)) != i ) i = ni; + gc_global.blk_sizes[i] = alloc_size; + gc_global.alloc_size[i] = ALLOC_CHUNKS_PER_LIST; + gc_global.alloc[i] = get_filled_chunks(ALLOC_CHUNKS_PER_LIST, alloc_size); + return i; +} + + +void gc_remove_allocator(int alloc_id) +{ + /* This is a no-op for now. */ +} + + +int gc_add_hook(hook_fn_t fn) +{ + int ni, i = gc_global.nr_hooks; + while ( (ni = CASIO(&gc_global.nr_hooks, i, i+1)) != i ) i = ni; + gc_global.hook_fns[i] = fn; + return i; +} + + +void gc_remove_hook(int hook_id) +{ + /* This is a no-op for now. */ +} + + +void _destroy_gc_subsystem(void) +{ +#ifdef PROFILE_GC + printf("Total heap: %u bytes (%.2fMB) in %u allocations\n", + gc_global.total_size, (double)gc_global.total_size / 1000000, + gc_global.allocations); +#endif +} + + +void _init_gc_subsystem(void) +{ + memset(&gc_global, 0, sizeof(gc_global)); + + gc_global.page_size = (unsigned int)sysconf(_SC_PAGESIZE); + gc_global.free_chunks = alloc_more_chunks(); + + gc_global.nr_hooks = 0; + gc_global.nr_sizes = 0; +} diff --git a/src/mcas/gc.h b/src/mcas/gc.h new file mode 100644 index 000000000..962e1aa30 --- /dev/null +++ b/src/mcas/gc.h @@ -0,0 +1,40 @@ +#ifndef __GC_H__ +#define __GC_H__ + +typedef struct gc_st gc_t; + +/* Most of these functions peek into a per-thread state struct. */ +#include "ptst.h" + +/* Initialise GC section of given per-thread state structure. */ +gc_t *gc_init(void); + +int gc_add_allocator(int alloc_size); +void gc_remove_allocator(int alloc_id); + +/* + * Memory allocate/free. An unsafe free can be used when an object was + * not made visible to other processes. + */ +void *gc_alloc(ptst_t *ptst, int alloc_id); +void gc_free(ptst_t *ptst, void *p, int alloc_id); +void gc_unsafe_free(ptst_t *ptst, void *p, int alloc_id); + +/* + * Hook registry. Allows users to hook in their own per-epoch delay + * lists. + */ +typedef void (*hook_fn_t)(ptst_t *, void *); +int gc_add_hook(hook_fn_t fn); +void gc_remove_hook(int hook_id); +void gc_add_ptr_to_hook_list(ptst_t *ptst, void *ptr, int hook_id); + +/* Per-thread entry/exit from critical regions */ +void gc_enter(ptst_t *ptst); +void gc_exit(ptst_t *ptst); + +/* Start-of-day initialisation of garbage collector. */ +void _init_gc_subsystem(void); +void _destroy_gc_subsystem(void); + +#endif /* __GC_H__ */ diff --git a/src/mcas/ia64_defns.h b/src/mcas/ia64_defns.h new file mode 100644 index 000000000..7413b1b14 --- /dev/null +++ b/src/mcas/ia64_defns.h @@ -0,0 +1,99 @@ +#ifndef __IA64_DEFNS_H__ +#define __IA64_DEFNS_H__ + +#include +#include + +#ifndef IA64 +#define IA64 +#endif + +#define CACHE_LINE_SIZE 64 + +/* + * I. Compare-and-swap. + */ + +#define CAS32(_a, _o, _n) \ +({ __typeof__(_o) __o = _o; \ + __asm__ __volatile__("mov ar.ccv=%0 ;;" :: "rO" (_o)); \ + __asm__ __volatile__("cmpxchg4.acq %0=%1,%2,ar.ccv ;; " \ + : "=r" (__o), "=m" (*(_a)) \ + : "r"(_n)); \ + __o; \ +}) + +#define CAS64(_a, _o, _n) \ +({ __typeof__(_o) __o = _o; \ + __asm__ __volatile__("mov ar.ccv=%0 ;;" :: "rO" (_o)); \ + __asm__ __volatile__("cmpxchg8.acq %0=%1,%2,ar.ccv ;; " \ + : "=r" (__o), "=m" (*(_a)) \ + : "r"(_n)); \ + __o; \ +}) + +#define FAS32(_a, _n) \ +({ __typeof__(_n) __o; \ + __asm__ __volatile__("xchg4 %0=%1,%2 ;; " \ + : "=r" (__o), "=m" (*(_a)) \ + : "r"(_n)); \ + __o; \ +}) + +#define FAS64(_a, _n) \ +({ __typeof__(_n) __o; \ + __asm__ __volatile__("xchg8 %0=%1,%2 ;; " \ + : "=r" (__o), "=m" (*(_a)) \ + : "r"(_n)); \ + __o; \ +}) + +#define CAS(_x,_o,_n) ((sizeof (*_x) == 4)?CAS32(_x,_o,_n):CAS64(_x,_o,_n)) +#define FAS(_x,_n) ((sizeof (*_x) == 4)?FAS32(_x,_n) :FAS64(_x,_n)) + +/* Update Integer location, return Old value. */ +#define CASIO CAS +#define FASIO FAS +/* Update Pointer location, return Old value. */ +#define CASPO CAS64 +#define FASPO FAS64 +/* Update 32/64-bit location, return Old value. */ +#define CAS32O CAS32 +#define CAS64O CAS64 + + +/* + * II. Memory barriers. + * WMB(): All preceding write operations must commit before any later writes. + * RMB(): All preceding read operations must commit before any later reads. + * MB(): All preceding memory accesses must commit before any later accesses. + * + * If the compiler does not observe these barriers (but any sane compiler + * will!), then VOLATILE should be defined as 'volatile'. + */ + +#define MB() __asm__ __volatile__ (";; mf ;; " : : : "memory") +#define WMB() MB() +#define RMB() MB() +#define VOLATILE /*volatile*/ + +/* + * III. Cycle counter access. + */ + +typedef unsigned long long tick_t; +#define RDTICK() \ + ({ tick_t __t; __asm__ __volatile__ ("mov %0=ar.itc ;;" : "=rO" (__t)); __t; }) + + + +/* + * IV. Types. + */ + +typedef unsigned char _u8; +typedef unsigned short _u16; +typedef unsigned int _u32; +typedef unsigned long long _u64; + +#endif /* __IA64_DEFNS_H__ */ diff --git a/src/mcas/intel_defns.h b/src/mcas/intel_defns.h new file mode 100644 index 000000000..fcdb03d7d --- /dev/null +++ b/src/mcas/intel_defns.h @@ -0,0 +1,106 @@ +#ifndef __INTEL_DEFNS_H__ +#define __INTEL_DEFNS_H__ + +#include +#include + +#ifndef INTEL +#define INTEL +#endif + +#define CACHE_LINE_SIZE 64 + +#if 0 +#define pthread_mutex_init(_m,_i) \ +({ pthread_mutex_init(_m,_i); (_m)->__m_kind = PTHREAD_MUTEX_ADAPTIVE_NP; }) +#endif + + +/* + * I. Compare-and-swap. + */ + +/* + * This is a strong barrier! Reads cannot be delayed beyond a later store. + * Reads cannot be hoisted beyond a LOCK prefix. Stores always in-order. + */ +#define CAS(_a, _o, _n) \ +({ __typeof__(_o) __o = _o; \ + __asm__ __volatile__( \ + "lock cmpxchg %3,%1" \ + : "=a" (__o), "=m" (*(volatile unsigned int *)(_a)) \ + : "0" (__o), "r" (_n) ); \ + __o; \ +}) + +#define FAS(_a, _n) \ +({ __typeof__(_n) __o; \ + __asm__ __volatile__( \ + "lock xchg %0,%1" \ + : "=r" (__o), "=m" (*(volatile unsigned int *)(_a)) \ + : "0" (_n) ); \ + __o; \ +}) + +#define CAS64(_a, _o, _n) \ +({ __typeof__(_o) __o = _o; \ + __asm__ __volatile__( \ + "movl %3, %%ecx;" \ + "movl %4, %%ebx;" \ + "lock cmpxchg8b %1" \ + : "=A" (__o), "=m" (*(volatile unsigned long long *)(_a)) \ + : "0" (__o), "m" (_n >> 32), "m" (_n) \ + : "ebx", "ecx" ); \ + __o; \ +}) + +/* Update Integer location, return Old value. */ +#define CASIO CAS +#define FASIO FAS +/* Update Pointer location, return Old value. */ +#define CASPO CAS +#define FASPO FAS +/* Update 32/64-bit location, return Old value. */ +#define CAS32O CAS +#define CAS64O CAS64 + +/* + * II. Memory barriers. + * WMB(): All preceding write operations must commit before any later writes. + * RMB(): All preceding read operations must commit before any later reads. + * MB(): All preceding memory accesses must commit before any later accesses. + * + * If the compiler does not observe these barriers (but any sane compiler + * will!), then VOLATILE should be defined as 'volatile'. + */ + +#define MB() __asm__ __volatile__ ("lock; addl $0,0(%%esp)" : : : "memory") +#define WMB() __asm__ __volatile__ ("" : : : "memory") +#define RMB() MB() +#define VOLATILE /*volatile*/ + +/* On Intel, CAS is a strong barrier, but not a compile barrier. */ +#define RMB_NEAR_CAS() WMB() +#define WMB_NEAR_CAS() WMB() +#define MB_NEAR_CAS() WMB() + + +/* + * III. Cycle counter access. + */ + +typedef unsigned long long tick_t; +#define RDTICK() \ + ({ tick_t __t; __asm__ __volatile__ ("rdtsc" : "=A" (__t)); __t; }) + + +/* + * IV. Types. + */ + +typedef unsigned char _u8; +typedef unsigned short _u16; +typedef unsigned int _u32; +typedef unsigned long long _u64; + +#endif /* __INTEL_DEFNS_H__ */ diff --git a/src/mcas/mcas.c b/src/mcas/mcas.c new file mode 100644 index 000000000..2f8c97ee4 --- /dev/null +++ b/src/mcas/mcas.c @@ -0,0 +1,574 @@ +/****************************************************************************** + * mcas.c + * + * MCAS implemented as described in: + * A Practical Multi-Word Compare-and-Swap Operation + * Timothy Harris, Keir Fraser and Ian Pratt + * Proceedings of the IEEE Symposium on Distributed Computing, Oct 2002 + * + * Copyright (c) 2002-2003, K A Fraser + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. Neither the name of the Keir Fraser + * nor the names of its contributors may be used to endorse or + * promote products derived from this software without specific + * prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include + +typedef struct CasDescriptor CasDescriptor_t; +typedef struct CasEntry CasEntry_t; +typedef struct per_thread_state_t per_thread_state_t; + +extern int num_threads; + +#define ARENA_SIZE 40960 + +struct per_thread_state_t +{ + int id; + CasDescriptor_t *next_descriptor; + void *arena; + void *arena_lim; +}; + + +static pthread_key_t mcas_ptst_key; + +typedef struct pad128 { char pad[128]; } pad128_t; + + +/* CAS descriptors. */ + +#define STATUS_IN_PROGRESS 0 +#define STATUS_SUCCEEDED 1 +#define STATUS_FAILED 2 +#define STATUS_ABORTED 3 + +struct CasEntry { + void **ptr; + void *old; + void *new; +}; + +struct CasDescriptor { + int status; + int length; + CasDescriptor_t *pt[MAX_THREADS]; + int rc; + CasDescriptor_t *fc; /* free chain */ + CasEntry_t entries[1]; +}; + +/* Marked pointers. */ +typedef unsigned long ptr_int; +#ifndef MARK_IN_PROGRESS +#define MARK_IN_PROGRESS 1 +#endif +#ifndef MARK_PTR_TO_CD +#define MARK_PTR_TO_CD 2 +#endif + +#define get_markedness(p) (((ptr_int) (p)) & 3) +#define get_unmarked_reference(p) ((void *) (((ptr_int) (p)) & (~3))) +#define get_marked_reference(p,m) ((void *) (((ptr_int) (p)) | m)) + +static bool_t mcas0 (per_thread_state_t *ptst, CasDescriptor_t *cd); +static per_thread_state_t *get_ptst (void); + +pad128_t p0; /* I'm worried these important RO vars might be false shared */ +static int cas_sz; +static int num_ptrs = 1024; +static int ptr_mult = 1; +pad128_t p1; + +static void *ALLOC(int size) +{ + void *a = calloc(1, size); + if ( a == NULL ) abort(); + return a; +} + +static void *ALLOC_ALONE (int size) +{ + int ps = sysconf(_SC_PAGESIZE); + int req = ps + size + ps; + char *res = ALLOC(req); + return (void *)(res + ps); +} + +static int next_thread_id = 0; +static per_thread_state_t *ptsts = NULL; + +static void new_arena (per_thread_state_t *ptst, int size) +{ + ptst->arena = ALLOC(size); + if ( !ptst->arena ) abort(); + ptst->arena_lim = (((char *) ptst->arena) + size); +} + +static per_thread_state_t *get_ptst (void) +{ + per_thread_state_t *result; + int r; + + result = pthread_getspecific(mcas_ptst_key); + + if ( result == NULL ) + { + int my_id; + int largest = sysconf(_SC_PAGESIZE); + + if ( largest < sizeof (per_thread_state_t) ) + largest = sizeof (per_thread_state_t); + + ALLOC (largest); + result = ALLOC (largest); + ALLOC (largest); + + do { my_id = next_thread_id; } + while ( CASIO (&next_thread_id, my_id, my_id + 1) != my_id ); + + result->id = my_id; + ptsts = result; + + new_arena(result, ARENA_SIZE); + + r = pthread_setspecific(mcas_ptst_key, result); + assert(r == 0); + } + + return result; +} + +static void release_descriptor (CasDescriptor_t *cd) +{ + per_thread_state_t *ptst = get_ptst (); + cd->fc = ptst->next_descriptor; + ptst->next_descriptor = cd; +} + +static int rc_delta_descriptor (CasDescriptor_t *cd, + int delta) +{ + int rc, new_rc = cd->rc; + + do { rc = new_rc; } + while ( (new_rc = CASIO (&(cd->rc), rc, rc + delta)) != rc ); + + return rc; +} + +static void rc_up_descriptor (CasDescriptor_t *cd) +{ + rc_delta_descriptor(cd, 2); + MB(); +} + +static void rc_down_descriptor (CasDescriptor_t *cd) +{ + int old_rc, new_rc, cur_rc = cd->rc; + + do { + old_rc = cur_rc; + new_rc = old_rc - 2; + if ( new_rc == 0 ) new_rc = 1; else MB(); + } + while ( (cur_rc = CASIO(&(cd->rc), old_rc, new_rc)) != old_rc ); + + if ( old_rc == 2 ) + release_descriptor(cd); +} + +static CasDescriptor_t *new_descriptor (per_thread_state_t *ptst, int length) +{ + CasDescriptor_t *result; + int i; + + CasDescriptor_t **ptr = &(ptst->next_descriptor); + result = *ptr; + while ( (result != NULL) && (result->length != length) ) + { + ptr = &(result->fc); + result = *ptr; + } + + if ( result == NULL ) + { + int alloc_size; + + alloc_size = sizeof (CasDescriptor_t) + + ((length - 1) * sizeof (CasEntry_t)); + + result = (CasDescriptor_t *) ptst->arena; + ptst->arena = ((char *) (ptst->arena)) + alloc_size; + + if ( ptst->arena >= ptst->arena_lim ) + { + new_arena(ptst, ARENA_SIZE); + result = (CasDescriptor_t *) ptst->arena; + ptst->arena = ((char *) (ptst->arena)) + alloc_size; + } + + for ( i = 0; i < num_threads; i++ ) + result->pt[i] = result; + + result->length = length; + result->rc = 2; + } + else + { + *ptr = result->fc; + assert((result->rc & 1) == 1); + rc_delta_descriptor(result, 1); /* clears lowest bit */ + } + + assert(result->length == length); + + return result; +} + +static void *read_from_cd (void **ptr, CasDescriptor_t *cd, bool_t get_old) +{ + CasEntry_t *ce; + int i; + int n; + + n = cd->length; + for ( i = 0; i < n; i++ ) + { + ce = &(cd->entries[i]); + if ( ce->ptr == ptr ) + return get_old ? ce->old : ce->new; + } + + assert(0); + return NULL; +} + +static void *read_barrier_lite (void **ptr) +{ + CasDescriptor_t *cd; + void *v; + int m; + + retry_read_barrier: + v = *ptr; + m = get_markedness(v); + + if ( m == MARK_PTR_TO_CD ) + { + WEAK_DEP_ORDER_RMB(); + cd = get_unmarked_reference(v); + + rc_up_descriptor(cd); + if ( *ptr != v ) + { + rc_down_descriptor(cd); + goto retry_read_barrier; + } + + v = read_from_cd(ptr, cd, (cd->status != STATUS_SUCCEEDED)); + + rc_down_descriptor(cd); + } + else if ( m == MARK_IN_PROGRESS ) + { + WEAK_DEP_ORDER_RMB(); + cd = *(CasDescriptor_t **)get_unmarked_reference(v); + + rc_up_descriptor(cd); + if ( *ptr != v ) + { + rc_down_descriptor(cd); + goto retry_read_barrier; + } + + v = read_from_cd(ptr, cd, (cd->status != STATUS_SUCCEEDED)); + + rc_down_descriptor(cd); + } + + return v; +} + +static void clean_descriptor (CasDescriptor_t *cd) +{ + int i; + void *mcd; + int status; + + status = cd->status; + assert(status == STATUS_SUCCEEDED || status == STATUS_FAILED); + + mcd = get_marked_reference(cd, MARK_PTR_TO_CD); + + if (status == STATUS_SUCCEEDED) + for ( i = 0; i < cd->length; i++ ) + CASPO (cd->entries[i].ptr, mcd, cd->entries[i].new); + else + for ( i = 0; i < cd->length; i++ ) + CASPO(cd->entries[i].ptr, mcd, cd->entries[i].old); +} + +static bool_t mcas_fixup (void **ptr, + void *value_read) +{ + int m; + + retry_mcas_fixup: + m = get_markedness(value_read); + if ( m == MARK_PTR_TO_CD ) + { + CasDescriptor_t *helpee; + helpee = get_unmarked_reference(value_read); + + rc_up_descriptor(helpee); + if ( *ptr != value_read ) + { + rc_down_descriptor(helpee); + value_read = *ptr; + goto retry_mcas_fixup; + } + + mcas0(NULL, helpee); + + rc_down_descriptor(helpee); + + return TRUE; + } + else if ( m == MARK_IN_PROGRESS ) + { + CasDescriptor_t *other_cd; + + WEAK_DEP_ORDER_RMB(); + other_cd = *(CasDescriptor_t **)get_unmarked_reference(value_read); + + rc_up_descriptor(other_cd); + if ( *ptr != value_read ) + { + rc_down_descriptor(other_cd); + value_read = *ptr; + goto retry_mcas_fixup; + } + + if ( other_cd->status == STATUS_IN_PROGRESS ) + CASPO(ptr, + value_read, + get_marked_reference(other_cd, MARK_PTR_TO_CD)); + else + CASPO(ptr, + value_read, + read_from_cd(ptr, other_cd, TRUE)); + + rc_down_descriptor (other_cd); + return TRUE; + } + + return FALSE; +} + +static void *read_barrier (void **ptr) +{ + void *v; + + do { v = *ptr; } + while ( mcas_fixup(ptr, v) ); + + return v; +} + +static bool_t mcas0 (per_thread_state_t *ptst, CasDescriptor_t *cd) +{ + int i; + int n; + int desired_status; + bool_t final_success; + void *mcd; + void *dmcd; + int old_status; + + if ( ptst == NULL ) + ptst = get_ptst(); + + MB(); /* required for sequential consistency */ + + if ( cd->status == STATUS_SUCCEEDED ) + { + clean_descriptor(cd); + final_success = TRUE; + goto out; + } + else if ( cd->status == STATUS_FAILED ) + { + clean_descriptor(cd); + final_success = FALSE; + goto out; + } + + /* Attempt to link in all entries in the descriptor. */ + mcd = get_marked_reference(cd, MARK_PTR_TO_CD); + dmcd = get_marked_reference(&(cd->pt[ptst->id]), MARK_IN_PROGRESS); + + desired_status = STATUS_SUCCEEDED; + + retry: + n = cd->length; + for (i = 0; i < n; i ++) + { + CasEntry_t *ce = &(cd->entries[i]); + void *value_read = CASPO(ce->ptr, ce->old, dmcd); + + if ( (value_read != ce->old) && + (value_read != dmcd) && + (value_read != mcd) ) + { + if ( mcas_fixup(ce->ptr, value_read) ) + goto retry; + desired_status = STATUS_FAILED; + break; + } + + RMB_NEAR_CAS(); /* ensure check of status occurs after CASPO. */ + if ( cd->status != STATUS_IN_PROGRESS ) + { + CASPO(ce->ptr, dmcd, ce->old); + break; + } + + if ( value_read != mcd ) + { + value_read = CASPO(ce->ptr, dmcd, mcd); + assert((value_read == dmcd) || + (value_read == mcd) || + (cd->status != STATUS_IN_PROGRESS)); + } + } + + /* + * All your ptrs are belong to us (or we've been helped and + * already known to have succeeded or failed). Try to + * propagate our desired result into the status field. + */ + + /* + * When changing to success, we must have all pointer ownerships + * globally visible. But we get this without a memory barrier, as + * 'desired_status' is dependent on the outcome of each CASPO + * to MARK_IN_PROGRESS. + * + * Architectures providing CAS natively all specify that the operation + * is _indivisible_. That is, the write will be done when the CAS + * completes. + * + * Architectures providing LL/SC are even better: any following + * instruction in program order is control-dependent on the CAS, because + * CAS may be retried if SC fails. All we need is that SC gets to point + * of coherency before producing its result: even Alpha provides this! + */ + WEAK_DEP_ORDER_WMB(); + old_status = CASIO((int *)&cd->status, + STATUS_IN_PROGRESS, + desired_status); + /* + * This ensures final sequential consistency. + * Also ensures that the status update is visible before cleanup. + */ + WMB_NEAR_CAS(); + + clean_descriptor(cd); + final_success = (cd->status == STATUS_SUCCEEDED); + + out: + return final_success; +} + + +void mcas_init (void) +{ + int r = pthread_key_create(&mcas_ptst_key, NULL); + if ( r != 0 ) abort(); +} + +/***********************************************************************/ + +bool_t mcas (int n, + void **ptr, void *old, void *new, + ...) +{ + va_list ap; + int i; + CasDescriptor_t *cd; + CasEntry_t *ce; + int result = 0; + per_thread_state_t *ptst = get_ptst(); + + cd = new_descriptor(ptst, n); + + cd->status = STATUS_IN_PROGRESS; + cd->length = n; + + ce = cd->entries; + ce->ptr = ptr; + ce->old = old; + ce->new = new; + + va_start(ap, new); + for ( i = 1; i < n; i++ ) + { + ce ++; + ce->ptr = va_arg(ap, void **); + ce->old = va_arg(ap, void *); + ce->new = va_arg(ap, void *); + } + va_end (ap); + + /* Insertion sort. Fail on non-unique pointers. */ + for ( i = 1, ce = &cd->entries[1]; i < n; i++, ce++ ) + { + int j; + CasEntry_t *cei, tmp; + for ( j = i-1, cei = ce-1; j >= 0; j--, cei-- ) + if ( cei->ptr <= ce->ptr ) break; + if ( cei->ptr == ce->ptr ) goto out; + if ( ++cei != ce ) + { + tmp = *ce; + memmove(cei+1, cei, (ce-cei)*sizeof(CasEntry_t)); + *cei = tmp; + } + } + + result = mcas0(ptst, cd); + assert(cd->status != STATUS_IN_PROGRESS); + + out: + rc_down_descriptor (cd); + return result; +} + diff --git a/src/mcas/mips_defns.h b/src/mcas/mips_defns.h new file mode 100644 index 000000000..7fc120682 --- /dev/null +++ b/src/mcas/mips_defns.h @@ -0,0 +1,118 @@ +#ifndef __MIPS_DEFNS_H__ +#define __MIPS_DEFNS_H__ + +#include +#include + +#ifndef MIPS +#define MIPS +#endif + +#define _SC_NPROCESSORS_ONLN _SC_NPROC_ONLN + +#define CACHE_LINE_SIZE 64 + + +/* + * I. Compare-and-swap. + */ + +#define FAS32(_a, _n) \ +({ __typeof__(_n) __r; \ + __asm__ __volatile__( \ + "1: ll %0,%1 ;" \ + " move $3,%2 ;" \ + " sc $3,%1 ;" \ + " beqz $3,1b ;" \ + : "=&r" (__r), "=m" (*(_a)) \ + : "r" (_n) : "$3" ); \ + __r; \ +}) + +#define FAS64(_a, _n) \ +({ __typeof__(_n) __r; \ + __asm__ __volatile__( \ + "1: lld %0,%1 ;" \ + " move $3,%2 ;" \ + " scd $3,%1 ;" \ + " beqz $3,1b ;" \ + : "=&r" (__r), "=m" (*(_a)) \ + : "r" (_n) : "$3" ); \ + __r; \ +}) + +#define CAS32(_a, _o, _n) \ +({ __typeof__(_o) __r; \ + __asm__ __volatile__( \ + "1: ll %0,%1 ;" \ + " bne %0,%2,2f ;" \ + " move $3,%3 ;" \ + " sc $3,%1 ;" \ + " beqz $3,1b ;" \ + "2: " \ + : "=&r" (__r), "=m" (*(_a)) \ + : "r" (_o), "r" (_n) : "$3" ); \ + __r; \ +}) + +#define CAS64(_a, _o, _n) \ +({ __typeof__(_o) __r; \ + __asm__ __volatile__( \ + "1: lld %0,%1 ;" \ + " bne %0,%2,2f ;" \ + " move $3,%3 ;" \ + " scd $3,%1 ;" \ + " beqz $3,1b ;" \ + "2: " \ + : "=&r" (__r), "=m" (*(_a)) \ + : "r" (_o), "r" (_n) : "$3" ); \ + __r; \ +}) + +#define CAS(_x,_o,_n) ((sizeof (*_x) == 4)?CAS32(_x,_o,_n):CAS64(_x,_o,_n)) +#define FAS(_x,_n) ((sizeof (*_x) == 4)?FAS32(_x,_n) :FAS64(_x,_n)) +/* Update Integer location, return Old value. */ +#define CASIO(_x,_o,_n) CAS(_x,_o,_n) +#define FASIO(_x,_n) FAS(_x,_n) +/* Update Pointer location, return Old value. */ +#define CASPO(_x,_o,_n) (void*)CAS((_x),(void*)(_o),(void*)(_n)) +#define FASPO(_x,_n) (void*)FAS((_x),(void*)(_n)) +/* Update 32/64-bit location, return Old value. */ +#define CAS32O CAS32 +#define CAS64O CAS64 + +/* + * II. Memory barriers. + * WMB(): All preceding write operations must commit before any later writes. + * RMB(): All preceding read operations must commit before any later reads. + * MB(): All preceding memory accesses must commit before any later accesses. + * + * If the compiler does not observe these barriers (but any sane compiler + * will!), then VOLATILE should be defined as 'volatile'. + */ + +#define MB() __asm__ __volatile__ ("sync" : : : "memory") +#define WMB() MB() +#define RMB() MB() +#define VOLATILE /*volatile*/ + + +/* + * III. Cycle counter access. + */ + +typedef unsigned long long tick_t; +#define RDTICK() \ + ({ tick_t __t; __asm__ __volatile__ ("dmfc0 %0,$9" : "=r" (__t)); __t; }) + + +/* + * IV. Types. + */ + +typedef unsigned char _u8; +typedef unsigned short _u16; +typedef unsigned int _u32; +typedef unsigned long long _u64; + +#endif /* __INTEL_DEFNS_H__ */ diff --git a/src/mcas/portable_defns.h b/src/mcas/portable_defns.h new file mode 100644 index 000000000..fb1c246e0 --- /dev/null +++ b/src/mcas/portable_defns.h @@ -0,0 +1,406 @@ +#ifndef __PORTABLE_DEFNS_H__ +#define __PORTABLE_DEFNS_H__ + +#define MAX_THREADS 128 /* Nobody will ever have more! */ + +#if defined(SPARC) +#include "sparc_defns.h" +#elif defined(INTEL) +#include "intel_defns.h" +#elif defined(PPC) +#include "ppc_defns.h" +#elif defined(IA64) +#include "ia64_defns.h" +#elif defined(MIPS) +#include "mips_defns.h" +#elif defined(ALPHA) +#include "alpha_defns.h" +#else +#error "A valid architecture has not been defined" +#endif + +#include + +#ifndef MB_NEAR_CAS +#define RMB_NEAR_CAS() RMB() +#define WMB_NEAR_CAS() WMB() +#define MB_NEAR_CAS() MB() +#endif + +typedef unsigned long int_addr_t; + +typedef int bool_t; +#define FALSE 0 +#define TRUE 1 + +#define ADD_TO(_v,_x) \ +do { \ + int __val = (_v), __newval; \ + while ( (__newval = CASIO(&(_v),__val,__val+(_x))) != __val ) \ + __val = __newval; \ +} while ( 0 ) + +/* + * Allow us to efficiently align and pad structures so that shared fields + * don't cause contention on thread-local or read-only fields. + */ +#define CACHE_PAD(_n) char __pad ## _n [CACHE_LINE_SIZE] +#define ALIGNED_ALLOC(_s) \ + ((void *)(((unsigned long)malloc((_s)+CACHE_LINE_SIZE*2) + \ + CACHE_LINE_SIZE - 1) & ~(CACHE_LINE_SIZE-1))) + +/* + * Interval counting + */ + +typedef unsigned int interval_t; +#define get_interval(_i) \ +do { \ + interval_t _ni = interval; \ + do { _i = _ni; } while ( (_ni = CASIO(&interval, _i, _i+1)) != _i ); \ +} while ( 0 ) + +/* + * POINTER MARKING + */ + +#define get_marked_ref(_p) ((void *)(((unsigned long)(_p)) | 1)) +#define get_unmarked_ref(_p) ((void *)(((unsigned long)(_p)) & ~1)) +#define is_marked_ref(_p) (((unsigned long)(_p)) & 1) + + +/* + * SUPPORT FOR WEAK ORDERING OF MEMORY ACCESSES + */ + +#ifdef WEAK_MEM_ORDER + +#define MAYBE_GARBAGE (0) + +/* Read field @_f into variable @_x. */ +#define READ_FIELD(_x,_f) \ +do { \ + (_x) = (_f); \ + if ( (_x) == MAYBE_GARBAGE ) { RMB(); (_x) = (_f); } \ + while ( 0 ) + +#define WEAK_DEP_ORDER_RMB() RMB() +#define WEAK_DEP_ORDER_WMB() WMB() +#define WEAK_DEP_ORDER_MB() MB() + +#else + +/* Read field @_f into variable @_x. */ +#define READ_FIELD(_x,_f) ((_x) = (_f)) + +#define WEAK_DEP_ORDER_RMB() ((void)0) +#define WEAK_DEP_ORDER_WMB() ((void)0) +#define WEAK_DEP_ORDER_MB() ((void)0) + +#endif + +/* + * Strong LL/SC operations + */ + +static _u32 strong_ll(_u64 *ptr, int p) +{ + _u64 val_read; + _u64 new_val; + _u64 flag; + + flag = (1LL << p); + + new_val = *ptr; + do { + val_read = new_val; + new_val = val_read | flag; + } while ( ((val_read & flag) == 0) && + ((new_val = CAS64O(ptr, val_read, new_val)) != val_read) ); + + return (_u32) (val_read >> 32); +} + +static int strong_vl(_u64 *ptr, int p) +{ + _u64 val_read; + _u64 flag; + + flag = (1LL << p); + val_read = *ptr; + + return (val_read & flag); +} + +static int strong_sc(_u64 *ptr, int p, _u32 n) +{ + _u64 val_read; + _u64 new_val; + _u64 flag; + + flag = (1LL << p); + val_read = *ptr; + + while ( (val_read & flag) != 0 ) + { + new_val = (((_u64)n) << 32); + + if ( (new_val = CAS64O(ptr, val_read, new_val)) == val_read ) + { + return 1; + } + + val_read = new_val; + } + + return 0; +} + +static void s_store(_u64 *ptr, _u32 n) +{ + _u64 new_val; + + new_val = (((_u64)n) << 32); + *ptr = new_val; +} + +static _u32 s_load(_u64 *ptr) +{ + _u64 val_read; + + val_read = *ptr; + return (val_read >> 32); +} + + +/* + * MCS lock + */ + +typedef struct qnode_t qnode_t; + +struct qnode_t { + qnode_t *next; + int locked; +}; + +typedef struct { + qnode_t *tail; +} mcs_lock_t; + +static void mcs_init(mcs_lock_t *lock) +{ + lock->tail = NULL; +} + +static void mcs_lock(mcs_lock_t *lock, qnode_t *qn) +{ + qnode_t *pred; + + qn->next = NULL; + qn->locked = 1; + WMB_NEAR_CAS(); + + pred = FASPO(&lock->tail, qn); + if ( pred != NULL ) + { + pred->next = qn; + while ( qn->locked ) RMB(); + } + + MB(); +} + +static void mcs_unlock(mcs_lock_t *lock, qnode_t *qn) +{ + qnode_t *t = qn->next; + + MB(); + + if ( t == NULL ) + { + if ( CASPO(&lock->tail, qn, NULL) == qn ) return; + while ( (t = qn->next) == NULL ) RMB(); + WEAK_DEP_ORDER_MB(); + } + + t->locked = 0; +} + + +/* + * MCS fair MRSW lock. + */ + +typedef struct mrsw_qnode_st mrsw_qnode_t; + +struct mrsw_qnode_st { +#define CLS_RD 0 +#define CLS_WR 1 + int class; +#define ST_NOSUCC 0 +#define ST_RDSUCC 1 +#define ST_WRSUCC 2 +#define ST_SUCCMASK 3 +#define ST_BLOCKED 4 + int state; + mrsw_qnode_t *next; +}; + +typedef struct { + mrsw_qnode_t *tail; + mrsw_qnode_t *next_writer; + int reader_count; +} mrsw_lock_t; + + +#define CLEAR_BLOCKED(_qn) ADD_TO((_qn)->state, -ST_BLOCKED) + +static void mrsw_init(mrsw_lock_t *lock) +{ + memset(lock, 0, sizeof(*lock)); +} + +static void rd_lock(mrsw_lock_t *lock, mrsw_qnode_t *qn) +{ + mrsw_qnode_t *pred, *next; + + qn->class = CLS_RD; + qn->next = NULL; + qn->state = ST_NOSUCC | ST_BLOCKED; + + WMB_NEAR_CAS(); + + pred = FASPO(&lock->tail, qn); + + if ( pred == NULL ) + { + ADD_TO(lock->reader_count, 1); + CLEAR_BLOCKED(qn); + } + else + { + if ( (pred->class == CLS_WR) || + (CASIO(&pred->state, ST_BLOCKED|ST_NOSUCC, ST_BLOCKED|ST_RDSUCC) + == (ST_BLOCKED|ST_NOSUCC)) ) + { + WEAK_DEP_ORDER_WMB(); + pred->next = qn; + while ( (qn->state & ST_BLOCKED) ) RMB(); + } + else + { + ADD_TO(lock->reader_count, 1); + pred->next = qn; + WEAK_DEP_ORDER_WMB(); + CLEAR_BLOCKED(qn); + } + } + + if ( qn->state == ST_RDSUCC ) + { + while ( (next = qn->next) == NULL ) RMB(); + ADD_TO(lock->reader_count, 1); + WEAK_DEP_ORDER_WMB(); + CLEAR_BLOCKED(next); + } + + RMB(); +} + +static void rd_unlock(mrsw_lock_t *lock, mrsw_qnode_t *qn) +{ + mrsw_qnode_t *next = qn->next; + int c, oc; + + RMB(); + + if ( (next != NULL) || (CASPO(&lock->tail, qn, NULL) != qn) ) + { + while ( (next = qn->next) == NULL ) RMB(); + if ( (qn->state & ST_SUCCMASK) == ST_WRSUCC ) + { + lock->next_writer = next; + WMB_NEAR_CAS(); /* set next_writer before dec'ing refcnt */ + } + } + + /* Bounded to maximum # readers if no native atomic_decrement */ + c = lock->reader_count; + while ( (oc = CASIO(&lock->reader_count, c, c-1)) != c ) c = oc; + + if ( c == 1 ) + { + WEAK_DEP_ORDER_MB(); + if ( (next = lock->next_writer) != NULL ) + { + RMB(); + if ( (lock->reader_count == 0) && + (CASPO(&lock->next_writer, next, NULL) == next) ) + { + WEAK_DEP_ORDER_WMB(); + CLEAR_BLOCKED(next); + } + } + } +} + +static void wr_lock(mrsw_lock_t *lock, mrsw_qnode_t *qn) +{ + mrsw_qnode_t *pred; + int os, s; + + qn->class = CLS_WR; + qn->next = NULL; + qn->state = ST_NOSUCC | ST_BLOCKED; + + WMB_NEAR_CAS(); + + pred = FASPO(&lock->tail, qn); + + if ( pred == NULL ) + { + WEAK_DEP_ORDER_WMB(); + lock->next_writer = qn; + MB(); /* check reader_count after setting next_writer. */ + if ( (lock->reader_count == 0) && + (CASPO(&lock->next_writer, qn, NULL) == qn) ) + { + CLEAR_BLOCKED(qn); + } + } + else + { + s = pred->state; + /* Bounded while loop: only one other remote update may occur. */ + while ( (os = CASIO(&pred->state, s, s | ST_WRSUCC)) != s ) s = os; + WMB(); + pred->next = qn; + } + + while ( (qn->state & ST_BLOCKED) ) RMB(); + + MB(); +} + +static void wr_unlock(mrsw_lock_t *lock, mrsw_qnode_t *qn) +{ + mrsw_qnode_t *next = qn->next; + + MB(); + + if ( (next != NULL) || (CASPO(&lock->tail, qn, NULL) != qn) ) + { + while ( (next = qn->next) == NULL ) RMB(); + WEAK_DEP_ORDER_MB(); + if ( next->class == CLS_RD ) + { + ADD_TO(lock->reader_count, 1); + WMB(); + } + CLEAR_BLOCKED(next); + } +} + + +#endif /* __PORTABLE_DEFNS_H__ */ diff --git a/src/mcas/ppc_defns.h b/src/mcas/ppc_defns.h new file mode 100644 index 000000000..c52def995 --- /dev/null +++ b/src/mcas/ppc_defns.h @@ -0,0 +1,105 @@ +#ifndef __PPC_DEFNS_H__ +#define __PPC_DEFNS_H__ + +#ifndef PPC +#define PPC +#endif + +#include +#include +#include + +#define CACHE_LINE_SIZE 64 + +#include + + +/* + * I. Compare-and-swap. + */ + +static int FAS32(void *, int); +static long FAS64(void *, long); +static int CAS32(void *, int, int); +static long CAS64(void *, long, long); + +#pragma mc_func FAS32 { \ + "7c001828" /* 1: lwarx r0,0,r3 */ \ + "7c80192d" /* stwcx r4,0,r3 */ \ + "4082fff8" /* bne 1 */ \ + "60030000" /* ori r3,0,r0 */ \ +} + +#pragma mc_func FAS64 { \ + "7c0018a8" /* 1: ldarx r0,0,r3 */ \ + "7c8019ad" /* stdcx r4,0,r3 */ \ + "4082fff8" /* bne 1 */ \ + "60030000" /* ori r3,0,r0 */ \ +} + +#pragma mc_func CAS32 { \ + "7c001828" /* 1: lwarx r0,0,r3 */ \ + "7c002000" /* cmpw r0,r4 */ \ + "4082000c" /* bne 2 */ \ + "7ca0192d" /* stwcx r5,0,r3 */ \ + "4082fff0" /* bne 1 */ \ + "60030000" /* 2: ori r3,0,r0 */ \ +} + +#pragma mc_func CAS64 { \ + "7c0018a8" /* 1: ldarx r0,0,r3 */ \ + "7c202000" /* cmpd r0,r4 */ \ + "4082000c" /* bne 2 */ \ + "7ca019ad" /* stdcx r5,0,r3 */ \ + "4082fff0" /* bne 1 */ \ + "60030000" /* 2: ori r3,0,r0 */ \ +} + +#define CASIO(_a,_o,_n) ((int)CAS32((int*)(_a),(int)(_o),(int)(_n))) +#define FASIO(_a,_n) ((int)FAS32((int*)(_a),(int)(_n))) +#define CASPO(_a,_o,_n) ((void *)(CAS64((long*)(_a),(long)(_o),(long)(_n)))) +#define FASPO(_a,_n) ((void *)(FAS64((long*)(_a),(long)(_n)))) +#define CAS32O(_a,_o,_n) ((_u32)(CAS32((_u32*)(_a),(_u32)(_o),(_u32)(_n)))) +#define CAS64O(_a,_o,_n) ((_u64)(CAS64((long*)(_a),(long)(_o),(long)(_n)))) + + +/* + * II. Memory barriers. + * WMB(): All preceding write operations must commit before any later writes. + * RMB(): All preceding read operations must commit before any later reads. + * MB(): All preceding memory accesses must commit before any later accesses. + * + * If the compiler does not observe these barriers (but any sane compiler + * will!), then VOLATILE should be defined as 'volatile'. + */ + +static void WMB(void); +static void RMB(void); +static void MB(void); + +#pragma mc_func WMB { "7c0004ac" } /* msync (orders memory transactions) */ +#pragma mc_func RMB { "4c00012c" } /* isync (orders instruction issue) */ +#pragma mc_func MB { "7c0004ac" } /* msync (orders memory transactions) */ + +#define VOLATILE /*volatile*/ + + +/* + * III. Cycle counter access. + */ + +typedef unsigned long tick_t; +static tick_t RDTICK(void); +#pragma mc_func RDTICK { "7c6c42e6" } /* mftb r3 */ + + +/* + * IV. Types. + */ + +typedef unsigned char _u8; +typedef unsigned short _u16; +typedef unsigned int _u32; +typedef unsigned long _u64; + +#endif /* __PPC_DEFNS_H__ */ diff --git a/src/mcas/ptst.c b/src/mcas/ptst.c new file mode 100644 index 000000000..5c23d53a2 --- /dev/null +++ b/src/mcas/ptst.c @@ -0,0 +1,107 @@ +/****************************************************************************** + * ptst.c + * + * Per-thread state management. Essentially the state management parts + * of MB's garbage-collection code have been pulled out and placed here, + * for the use of other utility routines. + * + * Copyright (c) 2002-2003, K A Fraser + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. Neither the name of the Keir Fraser + * nor the names of its contributors may be used to endorse or + * promote products derived from this software without specific + * prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include "portable_defns.h" +#include "ptst.h" + + +pthread_key_t ptst_key; +ptst_t *ptst_list; + +static unsigned int next_id; + +ptst_t *critical_enter(void) +{ + ptst_t *ptst, *next, *new_next; + unsigned int id, oid; + + ptst = (ptst_t *)pthread_getspecific(ptst_key); + if ( ptst == NULL ) + { + for ( ptst = ptst_first(); ptst != NULL; ptst = ptst_next(ptst) ) + { + if ( (ptst->count == 0) && (CASIO(&ptst->count, 0, 1) == 0) ) + { + break; + } + } + + if ( ptst == NULL ) + { + ptst = ALIGNED_ALLOC(sizeof(*ptst)); + if ( ptst == NULL ) exit(1); + memset(ptst, 0, sizeof(*ptst)); + ptst->gc = gc_init(); + rand_init(ptst); + ptst->count = 1; + id = next_id; + while ( (oid = CASIO(&next_id, id, id+1)) != id ) id = oid; + ptst->id = id; + new_next = ptst_list; + do { + ptst->next = next = new_next; + WMB_NEAR_CAS(); + } + while ( (new_next = CASPO(&ptst_list, next, ptst)) != next ); + } + + pthread_setspecific(ptst_key, ptst); + } + + gc_enter(ptst); + return(ptst); +} + + +static void ptst_destructor(ptst_t *ptst) +{ + ptst->count = 0; +} + + +void _init_ptst_subsystem(void) +{ + ptst_list = NULL; + next_id = 0; + WMB(); + if ( pthread_key_create(&ptst_key, (void (*)(void *))ptst_destructor) ) + { + exit(1); + } +} diff --git a/src/mcas/ptst.h b/src/mcas/ptst.h new file mode 100644 index 000000000..8e6e30874 --- /dev/null +++ b/src/mcas/ptst.h @@ -0,0 +1,47 @@ +/****************************************************************************** + * ptst.h + * + * Per-thread state management. + * + * Copyright (c) 2002-2003, K A Fraser + */ + +#ifndef __PTST_H__ +#define __PTST_H__ + +typedef struct ptst_st ptst_t; + +#include "gc.h" +#include "random.h" + +struct ptst_st +{ + /* Thread id */ + unsigned int id; + + /* State management */ + ptst_t *next; + unsigned int count; + /* Utility structures */ + gc_t *gc; + rand_t rand; +}; + +extern pthread_key_t ptst_key; + +/* + * Enter/leave a critical region. A thread gets a state handle for + * use during critical regions. + */ +ptst_t *critical_enter(void); +#define critical_exit(_p) gc_exit(_p) + +/* Iterators */ +extern ptst_t *ptst_list; +#define ptst_first() (ptst_list) +#define ptst_next(_p) ((_p)->next) + +/* Called once at start-of-day for entire application. */ +void _init_ptst_subsystem(void); + +#endif /* __PTST_H__ */ diff --git a/src/mcas/random.h b/src/mcas/random.h new file mode 100644 index 000000000..9a4826ff1 --- /dev/null +++ b/src/mcas/random.h @@ -0,0 +1,19 @@ +/****************************************************************************** + * random.h + * + * A really simple random-number generator. Crappy linear congruential + * taken from glibc, but has at least a 2^32 period. + */ + +#ifndef __RANDOM_H__ +#define __RANDOM_H__ + +typedef unsigned long rand_t; + +#define rand_init(_ptst) \ + ((_ptst)->rand = RDTICK()) + +#define rand_next(_ptst) \ + ((_ptst)->rand = ((_ptst)->rand * 1103515245) + 12345) + +#endif /* __RANDOM_H__ */ diff --git a/src/mcas/rb_lock_concurrentwriters.c b/src/mcas/rb_lock_concurrentwriters.c new file mode 100644 index 000000000..39867f553 --- /dev/null +++ b/src/mcas/rb_lock_concurrentwriters.c @@ -0,0 +1,763 @@ +/****************************************************************************** + * rb_lock_concurrentwriters.c + * + * Lock-based red-black trees, based on Hanke's relaxed balancing operations. + * + * For more details on the local tree restructuring operations used here: + * S. Hanke, T. Ottmann, and E. Soisalon-Soininen. + * "Relaxed balanced red-black trees". + * 3rd Italian Conference on Algorithms and Complexity, pages 193-204. + * + * Rather than issuing up-in and up-out requests to a balancing process, + * each operation is directly responsible for local rebalancing. However, + * this process can be split into a number of individual restructuring + * operations, and locks can be released between each operation. Between + * operations, we mark the node concerned as UNBALANCED -- contending + * updates will then wait for this mark to be removed before continuing. + * + * Copyright (c) 2002-2003, K A Fraser + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. Neither the name of the Keir Fraser + * nor the names of its contributors may be used to endorse or + * promote products derived from this software without specific + * prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#define __SET_IMPLEMENTATION__ + +#include +#include +#include +#include +#include "portable_defns.h" +#include "gc.h" +#include "set.h" + +#define BLACK_MARK 0 +#define RED_MARK 1 +#define UNBALANCED_MARK 2 + +#define SET_VALUE(_v,_n) \ + ((_v) = ((setval_t)(((unsigned long)(_v)&3)|((unsigned long)(_n))))) +#define GET_VALUE(_v) ((setval_t)((int_addr_t)(_v) & ~3UL)) +#define GET_COLOUR(_v) ((int_addr_t)(_v) & 1) +#define SET_COLOUR(_v,_c) \ + ((setval_t)(((unsigned long)(_v)&~1UL)|(unsigned long)(_c))) + +#define IS_BLACK(_v) (GET_COLOUR(_v) == 0) +#define IS_RED(_v) (GET_COLOUR(_v) == 1) +#define IS_UNBALANCED(_v) (((int_addr_t)(_v) & 2) == 2) + +#define MK_BLACK(_v) ((setval_t)(((int_addr_t)(_v)&~1UL) | 0)) +#define MK_RED(_v) ((setval_t)(((int_addr_t)(_v)&~1UL) | 1)) +#define MK_BALANCED(_v) ((setval_t)(((int_addr_t)(_v)&~2UL) | 0)) +#define MK_UNBALANCED(_v) ((setval_t)(((int_addr_t)(_v)&~2UL) | 2)) + +#define GARBAGE_VALUE ((setval_t)4) +#define IS_GARBAGE(_n) (GET_VALUE((_n)->v) == GARBAGE_VALUE) +#define MK_GARBAGE(_n) (SET_VALUE((_n)->v, GARBAGE_VALUE)) + +#define INTERNAL_VALUE ((void *)0xdeadbee0) + +#define IS_ROOT(_n) ((_n)->p->k == 0) +#define IS_LEAF(_n) ((_n)->l == NULL) + +/* TRUE if node X is a child of P. */ +#define ADJACENT(_p,_x) (((_p)->l==(_x))||((_p)->r==(_x))) + +typedef struct node_st node_t; +typedef struct set_st set_t; + +struct node_st +{ + setkey_t k; + setval_t v; + node_t *l, *r, *p; + mrsw_lock_t lock; +}; + +struct set_st +{ + node_t root; + node_t null; + node_t dummy_g, dummy_gg; +}; + +static int gc_id; + +/* Nodes p, x, y must be locked for writing. */ +static void left_rotate(node_t *x) +{ + node_t *y = x->r, *p = x->p; + x->r = y->l; + x->r->p = x; + x->p = y; + y->l = x; + y->p = p; + if ( x == p->l ) p->l = y; else p->r = y; +} + + +/* Nodes p, x, y must be locked for writing. */ +static void right_rotate(node_t *x) +{ + node_t *y = x->l, *p = x->p; + x->l = y->r; + x->l->p = x; + x->p = y; + y->r = x; + y->p = p; + if ( x == p->l ) p->l = y; else p->r = y; +} + + +static void fix_unbalance_up(node_t *x) +{ + mrsw_qnode_t x_qn, g_qn, p_qn, w_qn, gg_qn; + node_t *g, *p, *w, *gg; + int done = 0; + + do { + assert(IS_UNBALANCED(x->v)); + if ( IS_GARBAGE(x) ) return; + + p = x->p; + g = p->p; + gg = g->p; + + wr_lock(&gg->lock, &gg_qn); + if ( !ADJACENT(gg, g) || IS_UNBALANCED(gg->v) || IS_GARBAGE(gg) ) + goto unlock_gg; + + wr_lock(&g->lock, &g_qn); + if ( !ADJACENT(g, p) || IS_UNBALANCED(g->v) ) goto unlock_ggg; + + wr_lock(&p->lock, &p_qn); + if ( !ADJACENT(p, x) || IS_UNBALANCED(p->v) ) goto unlock_pggg; + + wr_lock(&x->lock, &x_qn); + + assert(IS_RED(x->v)); + assert(IS_UNBALANCED(x->v)); + + if ( IS_BLACK(p->v) ) + { + /* Case 1. Nothing to do. */ + x->v = MK_BALANCED(x->v); + done = 1; + goto unlock_xpggg; + } + + if ( IS_ROOT(x) ) + { + /* Case 2. */ + x->v = MK_BLACK(MK_BALANCED(x->v)); + done = 1; + goto unlock_xpggg; + } + + if ( IS_ROOT(p) ) + { + /* Case 2. */ + p->v = MK_BLACK(p->v); + x->v = MK_BALANCED(x->v); + done = 1; + goto unlock_xpggg; + } + + if ( g->l == p ) w = g->r; else w = g->l; + wr_lock(&w->lock, &w_qn); + + if ( IS_RED(w->v) ) + { + /* Case 5. */ + /* In all other cases, doesn't change colour or subtrees. */ + if ( IS_UNBALANCED(w->v) ) goto unlock_wxpggg; + g->v = MK_UNBALANCED(MK_RED(g->v)); + p->v = MK_BLACK(p->v); + w->v = MK_BLACK(w->v); + x->v = MK_BALANCED(x->v); + done = 2; + goto unlock_wxpggg; + } + + /* Cases 3 & 4. Both of these need the great-grandfather locked. */ + if ( p == g->l ) + { + if ( x == p->l ) + { + /* Case 3. Single rotation. */ + x->v = MK_BALANCED(x->v); + p->v = MK_BLACK(p->v); + g->v = MK_RED(g->v); + right_rotate(g); + } + else + { + /* Case 4. Double rotation. */ + x->v = MK_BALANCED(MK_BLACK(x->v)); + g->v = MK_RED(g->v); + left_rotate(p); + right_rotate(g); + } + } + else /* SYMMETRIC CASE */ + { + if ( x == p->r ) + { + /* Case 3. Single rotation. */ + x->v = MK_BALANCED(x->v); + p->v = MK_BLACK(p->v); + g->v = MK_RED(g->v); + left_rotate(g); + } + else + { + /* Case 4. Double rotation. */ + x->v = MK_BALANCED(MK_BLACK(x->v)); + g->v = MK_RED(g->v); + right_rotate(p); + left_rotate(g); + } + } + + done = 1; + + unlock_wxpggg: + wr_unlock(&w->lock, &w_qn); + unlock_xpggg: + wr_unlock(&x->lock, &x_qn); + unlock_pggg: + wr_unlock(&p->lock, &p_qn); + unlock_ggg: + wr_unlock(&g->lock, &g_qn); + unlock_gg: + wr_unlock(&gg->lock, &gg_qn); + + if ( done == 2 ) + { + x = g; + done = 0; + } + } + while ( !done ); +} + + +static void fix_unbalance_down(node_t *x) +{ + /* WN == W_NEAR, WF == W_FAR (W_FAR is further, in key space, from X). */ + mrsw_qnode_t x_qn, w_qn, p_qn, g_qn, wn_qn, wf_qn; + node_t *w, *p, *g, *wn, *wf; + int done = 0; + + do { + if ( !IS_UNBALANCED(x->v) || IS_GARBAGE(x) ) return; + + p = x->p; + g = p->p; + + wr_lock(&g->lock, &g_qn); + if ( !ADJACENT(g, p) || IS_UNBALANCED(g->v) || IS_GARBAGE(g) ) + goto unlock_g; + + wr_lock(&p->lock, &p_qn); + if ( !ADJACENT(p, x) || IS_UNBALANCED(p->v) ) goto unlock_pg; + + wr_lock(&x->lock, &x_qn); + + if ( !IS_BLACK(x->v) || !IS_UNBALANCED(x->v) ) + { + done = 1; + goto unlock_xpg; + } + + if ( IS_ROOT(x) ) + { + x->v = MK_BALANCED(x->v); + done = 1; + goto unlock_xpg; + } + + w = (x == p->l) ? p->r : p->l; + wr_lock(&w->lock, &w_qn); + if ( IS_UNBALANCED(w->v) ) + { + if ( IS_BLACK(w->v) ) + { + /* Funky relaxed rules to the rescue. */ + x->v = MK_BALANCED(x->v); + w->v = MK_BALANCED(w->v); + if ( IS_BLACK(p->v) ) + { + p->v = MK_UNBALANCED(p->v); + done = 2; + } + else + { + p->v = MK_BLACK(p->v); + done = 1; + } + } + goto unlock_wxpg; + } + + assert(!IS_LEAF(w)); + + if ( x == p->l ) + { + wn = w->l; + wf = w->r; + } + else + { + wn = w->r; + wf = w->l; + } + + wr_lock(&wn->lock, &wn_qn); + /* Hanke has an extra relaxed transform here. It's not needed. */ + if ( IS_UNBALANCED(wn->v) ) goto unlock_wnwxpg; + + wr_lock(&wf->lock, &wf_qn); + if ( IS_UNBALANCED(wf->v) ) goto unlock_wfwnwxpg; + + if ( IS_RED(w->v) ) + { + /* Case 1. Rotate at parent. */ + assert(IS_BLACK(p->v) && IS_BLACK(wn->v) && IS_BLACK(wf->v)); + w->v = MK_BLACK(w->v); + p->v = MK_RED(p->v); + if ( x == p->l ) left_rotate(p); else right_rotate(p); + goto unlock_wfwnwxpg; + } + + if ( IS_BLACK(wn->v) && IS_BLACK(wf->v) ) + { + if ( IS_RED(p->v) ) + { + /* Case 2. Simple recolouring. */ + p->v = MK_BLACK(p->v); + done = 1; + } + else + { + /* Case 5. Simple recolouring. */ + p->v = MK_UNBALANCED(p->v); + done = 2; + } + w->v = MK_RED(w->v); + x->v = MK_BALANCED(x->v); + goto unlock_wfwnwxpg; + } + + if ( x == p->l ) + { + if ( IS_RED(wf->v) ) + { + /* Case 3. Single rotation. */ + wf->v = MK_BLACK(wf->v); + w->v = SET_COLOUR(w->v, GET_COLOUR(p->v)); + p->v = MK_BLACK(p->v); + x->v = MK_BALANCED(x->v); + left_rotate(p); + } + else + { + /* Case 4. Double rotation. */ + assert(IS_RED(wn->v)); + wn->v = SET_COLOUR(wn->v, GET_COLOUR(p->v)); + p->v = MK_BLACK(p->v); + x->v = MK_BALANCED(x->v); + right_rotate(w); + left_rotate(p); + } + } + else /* SYMMETRIC CASE: X == P->R */ + { + if ( IS_RED(wf->v) ) + { + /* Case 3. Single rotation. */ + wf->v = MK_BLACK(wf->v); + w->v = SET_COLOUR(w->v, GET_COLOUR(p->v)); + p->v = MK_BLACK(p->v); + x->v = MK_BALANCED(x->v); + right_rotate(p); + } + else + { + /* Case 4. Double rotation. */ + assert(IS_RED(wn->v)); + wn->v = SET_COLOUR(wn->v, GET_COLOUR(p->v)); + p->v = MK_BLACK(p->v); + x->v = MK_BALANCED(x->v); + left_rotate(w); + right_rotate(p); + } + } + + done = 1; + + unlock_wfwnwxpg: + wr_unlock(&wf->lock, &wf_qn); + unlock_wnwxpg: + wr_unlock(&wn->lock, &wn_qn); + unlock_wxpg: + wr_unlock(&w->lock, &w_qn); + unlock_xpg: + wr_unlock(&x->lock, &x_qn); + unlock_pg: + wr_unlock(&p->lock, &p_qn); + unlock_g: + wr_unlock(&g->lock, &g_qn); + + if ( done == 2 ) + { + x = p; + done = 0; + } + } + while ( !done ); +} + + +static void delete_finish(ptst_t *ptst, node_t *x) +{ + mrsw_qnode_t g_qn, p_qn, w_qn, x_qn; + node_t *g, *p, *w; + int done = 0; + + do { + if ( IS_GARBAGE(x) ) return; + + p = x->p; + g = p->p; + + wr_lock(&g->lock, &g_qn); + if ( !ADJACENT(g, p) || IS_UNBALANCED(g->v) || IS_GARBAGE(g) ) + goto unlock_g; + + wr_lock(&p->lock, &p_qn); + /* Removing unbalanced red nodes is okay. */ + if ( !ADJACENT(p, x) || (IS_UNBALANCED(p->v) && IS_BLACK(p->v)) ) + goto unlock_pg; + + wr_lock(&x->lock, &x_qn); + if ( IS_UNBALANCED(x->v) ) goto unlock_xpg; + if ( GET_VALUE(x->v) != NULL ) + { + done = 1; + goto unlock_xpg; + } + + if ( p->l == x ) w = p->r; else w = p->l; + assert(w != x); + wr_lock(&w->lock, &w_qn); + if ( IS_UNBALANCED(w->v) ) goto unlock_wxpg; + + if ( g->l == p ) g->l = w; else g->r = w; + MK_GARBAGE(p); gc_free(ptst, p, gc_id); + MK_GARBAGE(x); gc_free(ptst, x, gc_id); + w->p = g; + if ( IS_BLACK(p->v) && IS_BLACK(w->v) ) + { + w->v = MK_UNBALANCED(w->v); + done = 2; + } + else + { + w->v = MK_BLACK(w->v); + done = 1; + } + + unlock_wxpg: + wr_unlock(&w->lock, &w_qn); + unlock_xpg: + wr_unlock(&x->lock, &x_qn); + unlock_pg: + wr_unlock(&p->lock, &p_qn); + unlock_g: + wr_unlock(&g->lock, &g_qn); + } + while ( !done ); + + if ( done == 2 ) fix_unbalance_down(w); +} + + +set_t *set_alloc(void) +{ + ptst_t *ptst; + set_t *set; + node_t *root, *null; + + ptst = critical_enter(); + + set = (set_t *)malloc(sizeof(*set)); + memset(set, 0, sizeof(*set)); + + root = &set->root; + null = &set->null; + + root->k = 0; + root->v = MK_RED(INTERNAL_VALUE); + root->l = NULL; + root->r = null; + root->p = NULL; + mrsw_init(&root->lock); + + null->k = SENTINEL_KEYMIN; + null->v = MK_BLACK(INTERNAL_VALUE); + null->l = NULL; + null->r = NULL; + null->p = root; + mrsw_init(&null->lock); + + set->dummy_gg.l = &set->dummy_g; + set->dummy_g.p = &set->dummy_gg; + set->dummy_g.l = &set->root; + set->root.p = &set->dummy_g; + + critical_exit(ptst); + + return set; +} + + +setval_t set_update(set_t *s, setkey_t k, setval_t v, int overwrite) +{ + ptst_t *ptst; + node_t *x, *y, *z, *new_internal, *new_leaf; + mrsw_qnode_t qn[2], *y_pqn=qn+0, *z_pqn=qn+1, *t_pqn, x_qn; + int fix_up = 0; + setval_t ov = NULL; + + k = CALLER_TO_INTERNAL_KEY(k); + + ptst = critical_enter(); + + /* + * We start our search by read-lock-coupling from the root. + * There is a special case, when there is only one node in the tree. + * In this case, we take a write lock on the root. + */ + retry_from_root: + z = &s->root; + rd_lock(&z->lock, z_pqn); + + /* + * We read-couple down the tree until we get within two nodes of the + * required leaf. We then speculatively take write locks. + */ + carry_on: + while ( (y = (k <= z->k) ? z->l : z->r) != NULL ) + { + if ( IS_LEAF(y) ) + { + y = z; + rd_unlock(&z->lock, z_pqn); + wr_lock(&y->lock, y_pqn); + x = (k <= z->k) ? z->l : z->r; + if ( IS_GARBAGE(y) || !IS_LEAF(x) ) + { + wr_unlock(&y->lock, y_pqn); + goto retry_from_root; + } + wr_lock(&x->lock, &x_qn); + assert(!IS_GARBAGE(x)); + goto found_and_locked; + } + + x = (k <= y->k) ? y->l : y->r; + if ( IS_LEAF(x) ) goto found; + rd_lock(&y->lock, y_pqn); + rd_unlock(&z->lock, z_pqn); + z = y; + t_pqn = y_pqn; + y_pqn = z_pqn; + z_pqn = t_pqn; + } + + /* + * At this point Z is read locked, and next two nodes on search path + * are probably the last. Certainly there is more than one on the path. + */ + found: + wr_lock(&y->lock, y_pqn); + x = (k <= y->k) ? y->l : y->r; + if ( !IS_LEAF(x) ) + { + wr_unlock(&y->lock, y_pqn); + goto carry_on; + } + wr_lock(&x->lock, &x_qn); + rd_unlock(&z->lock, z_pqn); + + found_and_locked: + /* + * At this point, node X is write locked and may be correct node. + * Y is X's parent, and is also write locked. No other node is locked. + */ + assert(!IS_GARBAGE(x)); + if ( x->k == k ) + { + ov = GET_VALUE(x->v); + if ( overwrite || (ov == NULL) ) + { + SET_VALUE(x->v, v); + } + } + else + { + new_leaf = gc_alloc(ptst, gc_id); + new_internal = gc_alloc(ptst, gc_id); + new_leaf->k = k; + new_leaf->v = MK_BLACK(v); + new_leaf->l = NULL; + new_leaf->r = NULL; + new_leaf->p = new_internal; + mrsw_init(&new_leaf->lock); + if ( x->k < k ) + { + new_internal->k = x->k; + new_internal->l = x; + new_internal->r = new_leaf; + } + else + { + new_internal->k = k; + new_internal->l = new_leaf; + new_internal->r = x; + } + new_internal->p = y; + mrsw_init(&new_internal->lock); + x->p = new_internal; + if ( y->l == x ) y->l = new_internal; else y->r = new_internal; + if ( IS_UNBALANCED(x->v) ) + { + x->v = MK_BALANCED(x->v); + new_internal->v = MK_BLACK(INTERNAL_VALUE); + } + else if ( IS_RED(y->v) ) + { + new_internal->v = MK_UNBALANCED(MK_RED(INTERNAL_VALUE)); + fix_up = 1; + } + else + { + new_internal->v = MK_RED(INTERNAL_VALUE); + } + } + + wr_unlock(&y->lock, y_pqn); + wr_unlock(&x->lock, &x_qn); + + if ( fix_up ) fix_unbalance_up(new_internal); + + critical_exit(ptst); + + return ov; +} + + +setval_t set_remove(set_t *s, setkey_t k) +{ + ptst_t *ptst; + node_t *y, *z; + mrsw_qnode_t qn[2], *y_pqn=qn+0, *z_pqn=qn+1, *t_pqn; + setval_t ov = NULL; + + k = CALLER_TO_INTERNAL_KEY(k); + + ptst = critical_enter(); + + z = &s->root; + rd_lock(&z->lock, z_pqn); + + while ( (y = (k <= z->k) ? z->l : z->r) != NULL ) + { + if ( IS_LEAF(y) ) + wr_lock(&y->lock, y_pqn); + else + rd_lock(&y->lock, y_pqn); + rd_unlock(&z->lock, z_pqn); + z = y; + t_pqn = y_pqn; + y_pqn = z_pqn; + z_pqn = t_pqn; + } + + if ( z->k == k ) + { + ov = GET_VALUE(z->v); + SET_VALUE(z->v, NULL); + } + + wr_unlock(&z->lock, z_pqn); + + if ( ov != NULL ) delete_finish(ptst, z); + + critical_exit(ptst); + return ov; +} + + +setval_t set_lookup(set_t *s, setkey_t k) +{ + ptst_t *ptst; + node_t *m, *n; + mrsw_qnode_t qn[2], *m_pqn=&qn[0], *n_pqn=&qn[1], *t_pqn; + setval_t v = NULL; + + k = CALLER_TO_INTERNAL_KEY(k); + + ptst = critical_enter(); + + n = &s->root; + rd_lock(&n->lock, n_pqn); + + while ( (m = (k <= n->k) ? n->l : n->r) != NULL ) + { + rd_lock(&m->lock, m_pqn); + rd_unlock(&n->lock, n_pqn); + n = m; + t_pqn = m_pqn; + m_pqn = n_pqn; + n_pqn = t_pqn; + } + + if ( k == n->k ) v = GET_VALUE(n->v); + + rd_unlock(&n->lock, n_pqn); + + critical_exit(ptst); + + return v; +} + + +void _init_set_subsystem(void) +{ + gc_id = gc_add_allocator(sizeof(node_t)); +} diff --git a/src/mcas/rb_lock_mutex.c b/src/mcas/rb_lock_mutex.c new file mode 100644 index 000000000..ba7878996 --- /dev/null +++ b/src/mcas/rb_lock_mutex.c @@ -0,0 +1,772 @@ +/****************************************************************************** + * rb_lock_mutex.c + * + * Lock-based red-black trees, based on Hanke's relaxed balancing operations. + * + * For more details on the local tree restructuring operations used here: + * S. Hanke, T. Ottmann, and E. Soisalon-Soininen. + * "Relaxed balanced red-black trees". + * 3rd Italian Conference on Algorithms and Complexity, pages 193-204. + * + * Rather than issuing up-in and up-out requests to a balancing process, + * each operation is directly responsible for local rebalancing. However, + * this process can be split into a number of individual restructuring + * operations, and locks can be released between each operation. Between + * operations, we mark the node concerned as UNBALANCED -- contending + * updates will then wait for this mark to be removed before continuing. + * + * Copyright (c) 2002-2003, K A Fraser + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. Neither the name of the Keir Fraser + * nor the names of its contributors may be used to endorse or + * promote products derived from this software without specific + * prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#define __SET_IMPLEMENTATION__ + +#include +#include +#include +#include +#include "portable_defns.h" +#include "gc.h" +#include "set.h" + +#define BLACK_MARK 0 +#define RED_MARK 1 +#define UNBALANCED_MARK 2 + +#define SET_VALUE(_v,_n) \ + ((_v) = ((setval_t)(((unsigned long)(_v)&3)|((unsigned long)(_n))))) +#define GET_VALUE(_v) ((setval_t)((int_addr_t)(_v) & ~3UL)) +#define GET_COLOUR(_v) ((int_addr_t)(_v) & 1) +#define SET_COLOUR(_v,_c) \ + ((setval_t)(((unsigned long)(_v)&~1UL)|(unsigned long)(_c))) + +#define IS_BLACK(_v) (GET_COLOUR(_v) == 0) +#define IS_RED(_v) (GET_COLOUR(_v) == 1) +#define IS_UNBALANCED(_v) (((int_addr_t)(_v) & 2) == 2) + +#define MK_BLACK(_v) ((setval_t)(((int_addr_t)(_v)&~1UL) | 0)) +#define MK_RED(_v) ((setval_t)(((int_addr_t)(_v)&~1UL) | 1)) +#define MK_BALANCED(_v) ((setval_t)(((int_addr_t)(_v)&~2UL) | 0)) +#define MK_UNBALANCED(_v) ((setval_t)(((int_addr_t)(_v)&~2UL) | 2)) + +#define GARBAGE_VALUE ((setval_t)4) +#define IS_GARBAGE(_n) (GET_VALUE((_n)->v) == GARBAGE_VALUE) +#define MK_GARBAGE(_n) (SET_VALUE((_n)->v, GARBAGE_VALUE)) + +#define INTERNAL_VALUE ((void *)0xdeadbee0) + +#define IS_ROOT(_n) ((_n)->p->k == 0) +#define IS_LEAF(_n) ((_n)->l == NULL) + +/* TRUE if node X is a child of P. */ +#define ADJACENT(_p,_x) (((_p)->l==(_x))||((_p)->r==(_x))) + +typedef struct node_st node_t; +typedef struct set_st set_t; + +struct node_st +{ + setkey_t k; + setval_t v; + node_t *l, *r, *p; + mcs_lock_t lock; +}; + +struct set_st +{ + node_t root; + node_t null; + node_t dummy_g, dummy_gg; +}; + +static int gc_id; + +/* Nodes p, x, y must be locked. */ +static void left_rotate(ptst_t *ptst, node_t *x) +{ + node_t *y = x->r, *p = x->p, *nx; + + nx = gc_alloc(ptst, gc_id); + nx->p = y; + nx->l = x->l; + nx->r = y->l; + nx->k = x->k; + nx->v = x->v; + mcs_init(&nx->lock); + + WMB(); + + y->p = p; + x->l->p = nx; + y->l->p = nx; + y->l = nx; + if ( x == p->l ) p->l = y; else p->r = y; + + MK_GARBAGE(x); + gc_free(ptst, x, gc_id); +} + + +/* Nodes p, x, y must be locked. */ +static void right_rotate(ptst_t *ptst, node_t *x) +{ + node_t *y = x->l, *p = x->p, *nx; + + nx = gc_alloc(ptst, gc_id); + nx->p = y; + nx->l = y->r; + nx->r = x->r; + nx->k = x->k; + nx->v = x->v; + mcs_init(&nx->lock); + + WMB(); + + y->p = p; + x->r->p = nx; + y->r->p = nx; + y->r = nx; + if ( x == p->l ) p->l = y; else p->r = y; + + MK_GARBAGE(x); + gc_free(ptst, x, gc_id); +} + + +static void fix_unbalance_up(ptst_t *ptst, node_t *x) +{ + qnode_t x_qn, g_qn, p_qn, w_qn, gg_qn; + node_t *g, *p, *w, *gg; + int done = 0; + + do { + assert(IS_UNBALANCED(x->v)); + if ( IS_GARBAGE(x) ) return; + + p = x->p; + g = p->p; + gg = g->p; + + mcs_lock(&gg->lock, &gg_qn); + if ( !ADJACENT(gg, g) || IS_UNBALANCED(gg->v) || IS_GARBAGE(gg) ) + goto unlock_gg; + + mcs_lock(&g->lock, &g_qn); + if ( !ADJACENT(g, p) || IS_UNBALANCED(g->v) ) goto unlock_ggg; + + mcs_lock(&p->lock, &p_qn); + if ( !ADJACENT(p, x) || IS_UNBALANCED(p->v) ) goto unlock_pggg; + + mcs_lock(&x->lock, &x_qn); + + assert(IS_RED(x->v)); + assert(IS_UNBALANCED(x->v)); + + if ( IS_BLACK(p->v) ) + { + /* Case 1. Nothing to do. */ + x->v = MK_BALANCED(x->v); + done = 1; + goto unlock_xpggg; + } + + if ( IS_ROOT(x) ) + { + /* Case 2. */ + x->v = MK_BLACK(MK_BALANCED(x->v)); + done = 1; + goto unlock_xpggg; + } + + if ( IS_ROOT(p) ) + { + /* Case 2. */ + p->v = MK_BLACK(p->v); + x->v = MK_BALANCED(x->v); + done = 1; + goto unlock_xpggg; + } + + if ( g->l == p ) w = g->r; else w = g->l; + mcs_lock(&w->lock, &w_qn); + + if ( IS_RED(w->v) ) + { + /* Case 5. */ + /* In all other cases, doesn't change colour or subtrees. */ + if ( IS_UNBALANCED(w->v) ) goto unlock_wxpggg; + g->v = MK_UNBALANCED(MK_RED(g->v)); + p->v = MK_BLACK(p->v); + w->v = MK_BLACK(w->v); + x->v = MK_BALANCED(x->v); + done = 2; + goto unlock_wxpggg; + } + + /* Cases 3 & 4. Both of these need the great-grandfather locked. */ + if ( p == g->l ) + { + if ( x == p->l ) + { + /* Case 3. Single rotation. */ + x->v = MK_BALANCED(x->v); + p->v = MK_BLACK(p->v); + g->v = MK_RED(g->v); + right_rotate(ptst, g); + } + else + { + /* Case 4. Double rotation. */ + x->v = MK_BALANCED(MK_BLACK(x->v)); + g->v = MK_RED(g->v); + left_rotate(ptst, p); + right_rotate(ptst, g); + } + } + else /* SYMMETRIC CASE */ + { + if ( x == p->r ) + { + /* Case 3. Single rotation. */ + x->v = MK_BALANCED(x->v); + p->v = MK_BLACK(p->v); + g->v = MK_RED(g->v); + left_rotate(ptst, g); + } + else + { + /* Case 4. Double rotation. */ + x->v = MK_BALANCED(MK_BLACK(x->v)); + g->v = MK_RED(g->v); + right_rotate(ptst, p); + left_rotate(ptst, g); + } + } + + done = 1; + + unlock_wxpggg: + mcs_unlock(&w->lock, &w_qn); + unlock_xpggg: + mcs_unlock(&x->lock, &x_qn); + unlock_pggg: + mcs_unlock(&p->lock, &p_qn); + unlock_ggg: + mcs_unlock(&g->lock, &g_qn); + unlock_gg: + mcs_unlock(&gg->lock, &gg_qn); + + if ( done == 2 ) + { + x = g; + done = 0; + } + } + while ( !done ); +} + + +static void fix_unbalance_down(ptst_t *ptst, node_t *x) +{ + /* WN == W_NEAR, WF == W_FAR (W_FAR is further, in key space, from X). */ + qnode_t x_qn, w_qn, p_qn, g_qn, wn_qn, wf_qn; + node_t *w, *p, *g, *wn, *wf; + int done = 0; + + do { + if ( !IS_UNBALANCED(x->v) || IS_GARBAGE(x) ) return; + + p = x->p; + g = p->p; + + mcs_lock(&g->lock, &g_qn); + if ( !ADJACENT(g, p) || IS_UNBALANCED(g->v) || IS_GARBAGE(g) ) + goto unlock_g; + + mcs_lock(&p->lock, &p_qn); + if ( !ADJACENT(p, x) || IS_UNBALANCED(p->v) ) goto unlock_pg; + + mcs_lock(&x->lock, &x_qn); + + if ( !IS_BLACK(x->v) || !IS_UNBALANCED(x->v) ) + { + done = 1; + goto unlock_xpg; + } + + if ( IS_ROOT(x) ) + { + x->v = MK_BALANCED(x->v); + done = 1; + goto unlock_xpg; + } + + w = (x == p->l) ? p->r : p->l; + mcs_lock(&w->lock, &w_qn); + if ( IS_UNBALANCED(w->v) ) + { + if ( IS_BLACK(w->v) ) + { + /* Funky relaxed rules to the rescue. */ + x->v = MK_BALANCED(x->v); + w->v = MK_BALANCED(w->v); + if ( IS_BLACK(p->v) ) + { + p->v = MK_UNBALANCED(p->v); + done = 2; + } + else + { + p->v = MK_BLACK(p->v); + done = 1; + } + } + goto unlock_wxpg; + } + + assert(!IS_LEAF(w)); + + if ( x == p->l ) + { + wn = w->l; + wf = w->r; + } + else + { + wn = w->r; + wf = w->l; + } + + mcs_lock(&wn->lock, &wn_qn); + /* Hanke has an extra relaxed transform here. It's not needed. */ + if ( IS_UNBALANCED(wn->v) ) goto unlock_wnwxpg; + + mcs_lock(&wf->lock, &wf_qn); + if ( IS_UNBALANCED(wf->v) ) goto unlock_wfwnwxpg; + + if ( IS_RED(w->v) ) + { + /* Case 1. Rotate at parent. */ + assert(IS_BLACK(p->v) && IS_BLACK(wn->v) && IS_BLACK(wf->v)); + w->v = MK_BLACK(w->v); + p->v = MK_RED(p->v); + if ( x == p->l ) left_rotate(ptst, p); else right_rotate(ptst, p); + goto unlock_wfwnwxpg; + } + + if ( IS_BLACK(wn->v) && IS_BLACK(wf->v) ) + { + if ( IS_RED(p->v) ) + { + /* Case 2. Simple recolouring. */ + p->v = MK_BLACK(p->v); + done = 1; + } + else + { + /* Case 5. Simple recolouring. */ + p->v = MK_UNBALANCED(p->v); + done = 2; + } + w->v = MK_RED(w->v); + x->v = MK_BALANCED(x->v); + goto unlock_wfwnwxpg; + } + + if ( x == p->l ) + { + if ( IS_RED(wf->v) ) + { + /* Case 3. Single rotation. */ + wf->v = MK_BLACK(wf->v); + w->v = SET_COLOUR(w->v, GET_COLOUR(p->v)); + p->v = MK_BLACK(p->v); + x->v = MK_BALANCED(x->v); + left_rotate(ptst, p); + } + else + { + /* Case 4. Double rotation. */ + assert(IS_RED(wn->v)); + wn->v = SET_COLOUR(wn->v, GET_COLOUR(p->v)); + p->v = MK_BLACK(p->v); + x->v = MK_BALANCED(x->v); + right_rotate(ptst, w); + left_rotate(ptst, p); + } + } + else /* SYMMETRIC CASE: X == P->R */ + { + if ( IS_RED(wf->v) ) + { + /* Case 3. Single rotation. */ + wf->v = MK_BLACK(wf->v); + w->v = SET_COLOUR(w->v, GET_COLOUR(p->v)); + p->v = MK_BLACK(p->v); + x->v = MK_BALANCED(x->v); + right_rotate(ptst, p); + } + else + { + /* Case 4. Double rotation. */ + assert(IS_RED(wn->v)); + wn->v = SET_COLOUR(wn->v, GET_COLOUR(p->v)); + p->v = MK_BLACK(p->v); + x->v = MK_BALANCED(x->v); + left_rotate(ptst, w); + right_rotate(ptst, p); + } + } + + done = 1; + + unlock_wfwnwxpg: + mcs_unlock(&wf->lock, &wf_qn); + unlock_wnwxpg: + mcs_unlock(&wn->lock, &wn_qn); + unlock_wxpg: + mcs_unlock(&w->lock, &w_qn); + unlock_xpg: + mcs_unlock(&x->lock, &x_qn); + unlock_pg: + mcs_unlock(&p->lock, &p_qn); + unlock_g: + mcs_unlock(&g->lock, &g_qn); + + if ( done == 2 ) + { + x = p; + done = 0; + } + } + while ( !done ); +} + + +static void delete_finish(ptst_t *ptst, node_t *x) +{ + qnode_t g_qn, p_qn, w_qn, x_qn; + node_t *g, *p, *w; + int done = 0; + + do { + if ( IS_GARBAGE(x) ) return; + + p = x->p; + g = p->p; + + mcs_lock(&g->lock, &g_qn); + if ( !ADJACENT(g, p) || IS_UNBALANCED(g->v) || IS_GARBAGE(g) ) + goto unlock_g; + + mcs_lock(&p->lock, &p_qn); + /* Removing unbalanced red nodes is okay. */ + if ( !ADJACENT(p, x) || (IS_UNBALANCED(p->v) && IS_BLACK(p->v)) ) + goto unlock_pg; + + mcs_lock(&x->lock, &x_qn); + if ( IS_UNBALANCED(x->v) ) goto unlock_xpg; + if ( GET_VALUE(x->v) != NULL ) + { + done = 1; + goto unlock_xpg; + } + + if ( p->l == x ) w = p->r; else w = p->l; + assert(w != x); + mcs_lock(&w->lock, &w_qn); + if ( IS_UNBALANCED(w->v) ) goto unlock_wxpg; + + if ( g->l == p ) g->l = w; else g->r = w; + MK_GARBAGE(p); gc_free(ptst, p, gc_id); + MK_GARBAGE(x); gc_free(ptst, x, gc_id); + w->p = g; + if ( IS_BLACK(p->v) && IS_BLACK(w->v) ) + { + w->v = MK_UNBALANCED(w->v); + done = 2; + } + else + { + w->v = MK_BLACK(w->v); + done = 1; + } + + unlock_wxpg: + mcs_unlock(&w->lock, &w_qn); + unlock_xpg: + mcs_unlock(&x->lock, &x_qn); + unlock_pg: + mcs_unlock(&p->lock, &p_qn); + unlock_g: + mcs_unlock(&g->lock, &g_qn); + } + while ( !done ); + + if ( done == 2 ) fix_unbalance_down(ptst, w); +} + + +set_t *set_alloc(void) +{ + ptst_t *ptst; + set_t *set; + node_t *root, *null; + + ptst = critical_enter(); + + set = (set_t *)malloc(sizeof(*set)); + memset(set, 0, sizeof(*set)); + + root = &set->root; + null = &set->null; + + root->k = 0; + root->v = MK_RED(INTERNAL_VALUE); + root->l = NULL; + root->r = null; + root->p = NULL; + mcs_init(&root->lock); + + null->k = SENTINEL_KEYMIN; + null->v = MK_BLACK(INTERNAL_VALUE); + null->l = NULL; + null->r = NULL; + null->p = root; + mcs_init(&null->lock); + + set->dummy_gg.l = &set->dummy_g; + set->dummy_g.p = &set->dummy_gg; + set->dummy_g.l = &set->root; + set->root.p = &set->dummy_g; + + critical_exit(ptst); + + return set; +} + + +setval_t set_update(set_t *s, setkey_t k, setval_t v, int overwrite) +{ + ptst_t *ptst; + qnode_t y_qn, z_qn; + node_t *y, *z, *new_internal, *new_leaf; + int fix_up = 0; + setval_t ov = NULL; + + k = CALLER_TO_INTERNAL_KEY(k); + + ptst = critical_enter(); + + retry: + z = &s->root; + while ( (y = (k <= z->k) ? z->l : z->r) != NULL ) + z = y; + + y = z->p; + mcs_lock(&y->lock, &y_qn); + if ( (((k <= y->k) ? y->l : y->r) != z) || IS_GARBAGE(y) ) + { + mcs_unlock(&y->lock, &y_qn); + goto retry; + } + + mcs_lock(&z->lock, &z_qn); + assert(!IS_GARBAGE(z) && IS_LEAF(z)); + + if ( z->k == k ) + { + ov = GET_VALUE(z->v); + if ( overwrite || (ov == NULL) ) + SET_VALUE(z->v, v); + } + else + { + new_leaf = gc_alloc(ptst, gc_id); + new_internal = gc_alloc(ptst, gc_id); + new_leaf->k = k; + new_leaf->v = MK_BLACK(v); + new_leaf->l = NULL; + new_leaf->r = NULL; + + new_leaf->p = new_internal; + mcs_init(&new_leaf->lock); + if ( z->k < k ) + { + new_internal->k = z->k; + new_internal->l = z; + new_internal->r = new_leaf; + } + else + { + new_internal->k = k; + new_internal->l = new_leaf; + new_internal->r = z; + } + new_internal->p = y; + mcs_init(&new_internal->lock); + + if ( IS_UNBALANCED(z->v) ) + { + z->v = MK_BALANCED(z->v); + new_internal->v = MK_BLACK(INTERNAL_VALUE); + } + else if ( IS_RED(y->v) ) + { + new_internal->v = MK_UNBALANCED(MK_RED(INTERNAL_VALUE)); + fix_up = 1; + } + else + { + new_internal->v = MK_RED(INTERNAL_VALUE); + } + + WMB(); + + z->p = new_internal; + if ( y->l == z ) y->l = new_internal; else y->r = new_internal; + } + + mcs_unlock(&y->lock, &y_qn); + mcs_unlock(&z->lock, &z_qn); + + if ( fix_up ) + fix_unbalance_up(ptst, new_internal); + + out: + critical_exit(ptst); + + return ov; +} + + +setval_t set_remove(set_t *s, setkey_t k) +{ + ptst_t *ptst; + node_t *y, *z; + qnode_t z_qn; + setval_t ov = NULL; + + k = CALLER_TO_INTERNAL_KEY(k); + + ptst = critical_enter(); + + z = &s->root; + while ( (y = (k <= z->k) ? z->l : z->r) != NULL ) + z = y; + + if ( z->k == k ) + { + mcs_lock(&z->lock, &z_qn); + if ( !IS_GARBAGE(z) ) + { + ov = GET_VALUE(z->v); + + SET_VALUE(z->v, NULL); + } + mcs_unlock(&z->lock, &z_qn); + } + + if ( ov != NULL ) + delete_finish(ptst, z); + + critical_exit(ptst); + + return ov; +} + + +setval_t set_lookup(set_t *s, setkey_t k) +{ + ptst_t *ptst; + node_t *m, *n; + setval_t v; + + k = CALLER_TO_INTERNAL_KEY(k); + + ptst = critical_enter(); + + n = &s->root; + while ( (m = (k <= n->k) ? n->l : n->r) != NULL ) + n = m; + + v = (k == n->k) ? GET_VALUE(n->v) : NULL; + if ( v == GARBAGE_VALUE ) v = NULL; + + critical_exit(ptst); + + return v; +} + + +void _init_set_subsystem(void) +{ + gc_id = gc_add_allocator(sizeof(node_t)); +} + +#if 0 +static int valll=0, bug=0, nrb=-1; +static void __traverse(node_t *n, int d, int _nrb) +{ + int i; + if ( n == NULL ) + { + if ( nrb == -1 ) nrb = _nrb; + if ( nrb != _nrb ) + printf("Imbalance at depth %d (%d,%d)\n", d, nrb, _nrb); + return; + } + if ( IS_LEAF(n) && (n->k != 0) ) + { + assert(n->l == NULL); + assert(n->r == NULL); + assert(IS_BLACK(n->v)); + } + if ( !IS_LEAF(n) && IS_RED(n->v) ) + { + assert(IS_BLACK(n->l->v)); + assert(IS_BLACK(n->r->v)); + } + if ( IS_BLACK(n->v) ) _nrb++; + __traverse(n->l, d+1, _nrb); + if ( valll > n->k ) bug=1; +#if 0 + for ( i = 0; i < d; i++ ) printf(" "); + printf("%c%p K: %5d V: %p P: %p L: %p R: %p depth: %d\n", + IS_BLACK(n->v) ? 'B' : 'R', n, n->k, n->v, n->p, n->l, n->r, d); +#endif + valll = n->k; + __traverse(n->r, d+1, _nrb); +} +void check_tree(set_t *s) +{ + __traverse(s->root.r, 0, 0); + if ( bug ) + printf("***********************************************************************************************\n"); +} +#endif diff --git a/src/mcas/rb_lock_serialisedwriters.c b/src/mcas/rb_lock_serialisedwriters.c new file mode 100644 index 000000000..0b7e37504 --- /dev/null +++ b/src/mcas/rb_lock_serialisedwriters.c @@ -0,0 +1,498 @@ +/****************************************************************************** + * rb_lock_serialisedwriters.c + * + * Lock-based red-black trees, using multi-reader locks. + * + * Updates are serialised on a global mutual-exclusion spinlock. + * + * Updates never need to read-lock, as updates are serialised. Must write-lock + * for all node changes except colour changes and parent-link updates. + * + * Searches must read-lock down the tree, as they do not serialise. + * + * Copyright (c) 2002-2003, K A Fraser + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. Neither the name of the Keir Fraser + * nor the names of its contributors may be used to endorse or + * promote products derived from this software without specific + * prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#define __SET_IMPLEMENTATION__ + +#include +#include +#include +#include +#include "portable_defns.h" +#include "gc.h" +#include "set.h" + +#define IS_BLACK(_v) ((int_addr_t)(_v)&1) +#define IS_RED(_v) (!IS_BLACK(_v)) +#define MK_BLACK(_v) ((setval_t)((int_addr_t)(_v)|1)) +#define MK_RED(_v) ((setval_t)((int_addr_t)(_v)&~1)) +#define GET_VALUE(_v) (MK_RED(_v)) +#define GET_COLOUR(_v) (IS_BLACK(_v)) +#define SET_COLOUR(_v,_c) ((setval_t)((unsigned long)(_v)|(unsigned long)(_c))) + +typedef struct node_st node_t; +typedef struct set_st set_t; + +struct node_st +{ + setkey_t k; + setval_t v; + node_t *l, *r, *p; + mrsw_lock_t lock; +}; + +struct set_st +{ + node_t root; + CACHE_PAD(0); + mcs_lock_t writer_lock; +}; + +static node_t null; +static int gc_id; + +static void left_rotate(node_t *x) +{ + mrsw_qnode_t p_qn, x_qn, y_qn; + node_t *y = x->r, *p = x->p; + + wr_lock(&p->lock, &p_qn); + wr_lock(&x->lock, &x_qn); + wr_lock(&y->lock, &y_qn); + + /* No need to write-lock to update parent link. */ + if ( (x->r = y->l) != &null ) x->r->p = x; + + x->p = y; + y->l = x; + y->p = p; + if ( x == p->l ) p->l = y; else p->r = y; + + wr_unlock(&y->lock, &y_qn); + wr_unlock(&x->lock, &x_qn); + wr_unlock(&p->lock, &p_qn); +} + + +static void right_rotate(node_t *x) +{ + mrsw_qnode_t p_qn, x_qn, y_qn; + node_t *y = x->l, *p = x->p; + + wr_lock(&p->lock, &p_qn); + wr_lock(&x->lock, &x_qn); + wr_lock(&y->lock, &y_qn); + + /* No need to write-lock to update parent link. */ + if ( (x->l = y->r) != &null ) x->l->p = x; + + x->p = y; + y->r = x; + y->p = p; + if ( x == p->l ) p->l = y; else p->r = y; + + wr_unlock(&y->lock, &y_qn); + wr_unlock(&x->lock, &x_qn); + wr_unlock(&p->lock, &p_qn); +} + + +/* No locks held on entry/exit. Colour changes safe. Rotations lock for us. */ +static void delete_fixup(ptst_t *ptst, set_t *s, node_t *x) +{ + node_t *p, *w; + + while ( (x->p != &s->root) && IS_BLACK(x->v) ) + { + p = x->p; + + if ( x == p->l ) + { + w = p->r; + if ( IS_RED(w->v) ) + { + w->v = MK_BLACK(w->v); + p->v = MK_RED(p->v); + /* Node W will be new parent of P. */ + left_rotate(p); + /* Get new sibling W. */ + w = p->r; + } + + if ( IS_BLACK(w->l->v) && IS_BLACK(w->r->v) ) + { + w->v = MK_RED(w->v); + x = p; + } + else + { + if ( IS_BLACK(w->r->v) ) + { + /* w->l is red => it cannot be null node. */ + w->l->v = MK_BLACK(w->l->v); + w->v = MK_RED(w->v); + right_rotate(w); + /* Old w is new w->r. Old w->l is new w.*/ + w = p->r; + } + + w->v = SET_COLOUR(GET_VALUE(w->v), GET_COLOUR(p->v)); + p->v = MK_BLACK(p->v); + w->r->v = MK_BLACK(w->r->v); + left_rotate(p); + break; + } + } + else /* SYMMETRIC CASE */ + { + w = p->l; + if ( IS_RED(w->v) ) + { + w->v = MK_BLACK(w->v); + p->v = MK_RED(p->v); + /* Node W will be new parent of P. */ + right_rotate(p); + /* Get new sibling W. */ + w = p->l; + } + + if ( IS_BLACK(w->l->v) && IS_BLACK(w->r->v) ) + { + w->v = MK_RED(w->v); + x = p; + } + else + { + if ( IS_BLACK(w->l->v) ) + { + /* w->r is red => it cannot be the null node. */ + w->r->v = MK_BLACK(w->r->v); + w->v = MK_RED(w->v); + left_rotate(w); + /* Old w is new w->l. Old w->r is new w.*/ + w = p->l; + } + + w->v = SET_COLOUR(GET_VALUE(w->v), GET_COLOUR(p->v)); + p->v = MK_BLACK(p->v); + w->l->v = MK_BLACK(w->l->v); + right_rotate(p); + break; + } + } + } + + x->v = MK_BLACK(x->v); +} + + +set_t *set_alloc(void) +{ + ptst_t *ptst; + set_t *set; + node_t *root; + + ptst = critical_enter(); + + set = (set_t *)malloc(sizeof(*set)); + + root = &set->root; + root->k = SENTINEL_KEYMIN; + root->v = MK_RED(NULL); + root->l = &null; + root->r = &null; + root->p = NULL; + mrsw_init(&root->lock); + + mcs_init(&set->writer_lock); + + critical_exit(ptst); + + return set; +} + + +setval_t set_update(set_t *s, setkey_t k, setval_t v, int overwrite) +{ + ptst_t *ptst; + node_t *x, *p, *g, *y, *new; + mrsw_qnode_t x_qn; + qnode_t writer_qn; + setval_t ov; + + k = CALLER_TO_INTERNAL_KEY(k); + + ptst = critical_enter(); + + mcs_lock(&s->writer_lock, &writer_qn); + + x = &s->root; + while ( (y = (k < x->k) ? x->l : x->r) != &null ) + { + x = y; + if ( k == x->k ) break; + } + + if ( k == x->k ) + { + ov = x->v; + /* Lock X to change mapping. */ + wr_lock(&x->lock, &x_qn); + if ( overwrite ) x->v = SET_COLOUR(v, GET_COLOUR(ov)); + wr_unlock(&x->lock, &x_qn); + ov = GET_VALUE(ov); + } + else + { + ov = NULL; + + new = (node_t *)gc_alloc(ptst, gc_id); + new->k = k; + new->v = MK_RED(v); + new->l = &null; + new->r = &null; + new->p = x; + mrsw_init(&new->lock); + + /* Lock X to change a child. */ + wr_lock(&x->lock, &x_qn); + if ( k < x->k ) x->l = new; else x->r = new; + wr_unlock(&x->lock, &x_qn); + + x = new; + + /* No locks held here. Colour changes safe. Rotations lock for us. */ + for ( ; ; ) + { + if ( (p = x->p) == &s->root ) + { + x->v = MK_BLACK(x->v); + break; + } + + if ( IS_BLACK(p->v) ) break; + + g = p->p; + if ( p == g->l ) + { + y = g->r; + if ( IS_RED(y->v) ) + { + p->v = MK_BLACK(p->v); + y->v = MK_BLACK(y->v); + g->v = MK_RED(g->v); + x = g; + } + else + { + if ( x == p->r ) + { + x = p; + left_rotate(x); + /* X and P switched round. */ + p = x->p; + } + p->v = MK_BLACK(p->v); + g->v = MK_RED(g->v); + right_rotate(g); + /* G no longer on the path. */ + } + } + else /* SYMMETRIC CASE */ + { + y = g->l; + if ( IS_RED(y->v) ) + { + p->v = MK_BLACK(p->v); + y->v = MK_BLACK(y->v); + g->v = MK_RED(g->v); + x = g; + } + else + { + if ( x == p->l ) + { + x = p; + right_rotate(x); + /* X and P switched round. */ + p = x->p; + } + p->v = MK_BLACK(p->v); + g->v = MK_RED(g->v); + left_rotate(g); + /* G no longer on the path. */ + } + } + } + } + + mcs_unlock(&s->writer_lock, &writer_qn); + + critical_exit(ptst); + + return ov; +} + + +setval_t set_remove(set_t *s, setkey_t k) +{ + ptst_t *ptst; + node_t *x, *y, *z; + mrsw_qnode_t qn[2], *y_pqn=qn+0, *yp_pqn=qn+1, *t_pqn; + mrsw_qnode_t z_qn, zp_qn; + qnode_t writer_qn; + setval_t ov = NULL; + + k = CALLER_TO_INTERNAL_KEY(k); + + ptst = critical_enter(); + + mcs_lock(&s->writer_lock, &writer_qn); + + z = &s->root; + while ( (z = (k < z->k) ? z->l : z->r) != &null ) + { + if ( k == z->k ) break; + } + + if ( k == z->k ) + { + ov = GET_VALUE(z->v); + + if ( (z->l != &null) && (z->r != &null) ) + { + /* Lock Z. It will get new key copied in. */ + wr_lock(&z->lock, &z_qn); + y = z->r; + /* + * Write-lock from Z to Y. We end up with (YP,Y) locked. + * Write-coupling is needed so we don't overtake searches for Y. + */ + wr_lock(&y->lock, y_pqn); + while ( y->l != &null ) + { + if ( y->p != z ) wr_unlock(&y->p->lock, yp_pqn); + y = y->l; + t_pqn = yp_pqn; + yp_pqn = y_pqn; + y_pqn = t_pqn; + wr_lock(&y->lock, y_pqn); + } + } + else + { + y = z; + /* Lock ZP. It will get new child. */ + wr_lock(&z->p->lock, &zp_qn); + /* Lock Z. It will be deleted. */ + wr_lock(&z->lock, &z_qn); + } + + /* No need to lock X. Only parent link is modified. */ + x = (y->l != &null) ? y->l : y->r; + x->p = y->p; + + if ( y == y->p->l ) y->p->l = x; else y->p->r = x; + + if ( y != z ) + { + z->k = y->k; + z->v = SET_COLOUR(GET_VALUE(y->v), GET_COLOUR(z->v)); + if ( y->p != z ) wr_unlock(&y->p->lock, yp_pqn); + wr_unlock(&y->lock, y_pqn); + } + else + { + wr_unlock(&z->p->lock, &zp_qn); + } + + wr_unlock(&z->lock, &z_qn); + + gc_free(ptst, y, gc_id); + + if ( IS_BLACK(y->v) ) delete_fixup(ptst, s, x); + } + + mcs_unlock(&s->writer_lock, &writer_qn); + + critical_exit(ptst); + + return ov; +} + + +setval_t set_lookup(set_t *s, setkey_t k) +{ + ptst_t *ptst; + node_t *m, *n; + mrsw_qnode_t qn[2], *m_pqn=&qn[0], *n_pqn=&qn[1], *t_pqn; + setval_t v = NULL; + + k = CALLER_TO_INTERNAL_KEY(k); + + ptst = critical_enter(); + + n = &s->root; + rd_lock(&n->lock, n_pqn); + + while ( (m = (k < n->k) ? n->l : n->r) != &null ) + { + rd_lock(&m->lock, m_pqn); + rd_unlock(&n->lock, n_pqn); + n = m; + t_pqn = m_pqn; + m_pqn = n_pqn; + n_pqn = t_pqn; + if ( k == n->k ) + { + v = GET_VALUE(n->v); + break; + } + } + + rd_unlock(&n->lock, n_pqn); + + critical_exit(ptst); + + return v; +} + + +void _init_set_subsystem(void) +{ + gc_id = gc_add_allocator(sizeof(node_t)); + + null.k = 0; + null.v = MK_BLACK(NULL); + null.l = NULL; + null.r = NULL; + null.p = NULL; + mrsw_init(&null.lock); +} diff --git a/src/mcas/rb_stm.c b/src/mcas/rb_stm.c new file mode 100644 index 000000000..26ad6a8d4 --- /dev/null +++ b/src/mcas/rb_stm.c @@ -0,0 +1,535 @@ +/****************************************************************************** + * rb_stm.c + * + * Lock-free red-black trees, based on STM. + * + * Copyright (c) 2002-2003, K A Fraser + * +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. Neither the name of the Keir Fraser + * nor the names of its contributors may be used to endorse or + * promote products derived from this software without specific + * prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#define __SET_IMPLEMENTATION__ + +#include +#include +#include +#include +#include "portable_defns.h" +#include "gc.h" +#include "stm.h" +#include "set.h" + +#define IS_BLACK(_v) ((int_addr_t)(_v)&1) +#define IS_RED(_v) (!IS_BLACK(_v)) +#define MK_BLACK(_v) ((setval_t)((int_addr_t)(_v)|1)) +#define MK_RED(_v) ((setval_t)((int_addr_t)(_v)&~1)) +#define GET_VALUE(_v) (MK_RED(_v)) +#define GET_COLOUR(_v) (IS_BLACK(_v)) +#define SET_COLOUR(_v,_c) ((setval_t)((unsigned long)(_v)|(unsigned long)(_c))) + +typedef struct node_st node_t; +typedef stm_blk set_t; + +struct node_st +{ + setkey_t k; + setval_t v; + stm_blk *l, *r, *p; +}; + +static struct { + CACHE_PAD(0); + stm *memory; /* read-only */ + stm_blk *nullb; /* read-only */ + CACHE_PAD(2); +} shared; + +#define MEMORY (shared.memory) +#define NULLB (shared.nullb) + +static void left_rotate(ptst_t *ptst, stm_tx *tx, stm_blk *xb, node_t *x) +{ + stm_blk *yb, *pb; + node_t *y, *p; + + yb = x->r; + pb = x->p; + + y = write_stm_blk(ptst, tx, yb); + p = write_stm_blk(ptst, tx, pb); + + if ( (x->r = y->l) != NULLB ) + { + node_t *xr = write_stm_blk(ptst, tx, x->r); + xr->p = xb; + } + + x->p = yb; + y->l = xb; + y->p = pb; + if ( xb == p->l ) p->l = yb; else p->r = yb; +} + + +static void right_rotate(ptst_t *ptst, stm_tx *tx, stm_blk *xb, node_t *x) +{ + stm_blk *yb, *pb; + node_t *y, *p; + + yb = x->l; + pb = x->p; + + y = write_stm_blk(ptst, tx, yb); + p = write_stm_blk(ptst, tx, pb); + + if ( (x->l = y->r) != NULLB ) + { + node_t *xl = write_stm_blk(ptst, tx, x->l); + xl->p = xb; + } + + x->p = yb; + y->r = xb; + y->p = pb; + if ( xb == p->l ) p->l = yb; else p->r = yb; +} + + +static void delete_fixup(ptst_t *ptst, stm_tx *tx, set_t *s, + stm_blk *xb, node_t *x) +{ + stm_blk *pb, *wb, *wlb, *wrb; + node_t *p, *w, *wl, *wr; + + while ( (x->p != s) && IS_BLACK(x->v) ) + { + pb = x->p; + p = write_stm_blk(ptst, tx, pb); + + if ( xb == p->l ) + { + wb = p->r; + w = write_stm_blk(ptst, tx, wb); + if ( IS_RED(w->v) ) + { + w->v = MK_BLACK(w->v); + p->v = MK_RED(p->v); + left_rotate(ptst, tx, pb, p); + wb = p->r; + w = write_stm_blk(ptst, tx, wb); + } + + wlb = w->l; + wl = read_stm_blk(ptst, tx, wlb); + wrb = w->r; + wr = read_stm_blk(ptst, tx, wrb); + if ( IS_BLACK(wl->v) && IS_BLACK(wr->v) ) + { + w->v = MK_RED(w->v); + xb = pb; + x = p; + } + else + { + if ( IS_BLACK(wr->v) ) + { + wl = write_stm_blk(ptst, tx, wlb); + wl->v = MK_BLACK(wl->v); + w->v = MK_RED(w->v); + right_rotate(ptst, tx, wb, w); + wb = p->r; + w = write_stm_blk(ptst, tx, wb); + } + + wrb = w->r; + wr = write_stm_blk(ptst, tx, wrb); + w->v = SET_COLOUR(GET_VALUE(w->v), GET_COLOUR(p->v)); + p->v = MK_BLACK(p->v); + wr->v = MK_BLACK(wr->v); + left_rotate(ptst, tx, pb, p); + break; + } + } + else /* SYMMETRIC CASE */ + { + wb = p->l; + w = write_stm_blk(ptst, tx, wb); + if ( IS_RED(w->v) ) + { + w->v = MK_BLACK(w->v); + p->v = MK_RED(p->v); + right_rotate(ptst, tx, pb, p); + wb = p->l; + w = write_stm_blk(ptst, tx, wb); + } + + wlb = w->l; + wl = read_stm_blk(ptst, tx, wlb); + wrb = w->r; + wr = read_stm_blk(ptst, tx, wrb); + if ( IS_BLACK(wl->v) && IS_BLACK(wr->v) ) + { + w->v = MK_RED(w->v); + xb = pb; + x = p; + } + else + { + if ( IS_BLACK(wl->v) ) + { + wr = write_stm_blk(ptst, tx, wrb); + wr->v = MK_BLACK(wr->v); + w->v = MK_RED(w->v); + left_rotate(ptst, tx, wb, w); + wb = p->l; + w = write_stm_blk(ptst, tx, wb); + } + + wlb = w->l; + wl = write_stm_blk(ptst, tx, wlb); + w->v = SET_COLOUR(GET_VALUE(w->v), GET_COLOUR(p->v)); + p->v = MK_BLACK(p->v); + wl->v = MK_BLACK(wl->v); + right_rotate(ptst, tx, pb, p); + break; + } + } + } + + x->v = MK_BLACK(x->v); +} + + +set_t *set_alloc(void) +{ + ptst_t *ptst; + set_t *set; + node_t *root; + + ptst = critical_enter(); + + set = new_stm_blk(ptst, MEMORY); + + root = init_stm_blk(ptst, MEMORY, set); + root->k = SENTINEL_KEYMIN; + root->v = MK_RED(NULL); + root->l = NULLB; + root->r = NULLB; + root->p = NULL; + + critical_exit(ptst); + + return set; +} + + +setval_t set_update(set_t *s, setkey_t k, setval_t v, int overwrite) +{ + ptst_t *ptst; + stm_tx *tx; + stm_blk *xb, *b, *pb, *gb, *yb, *newb; + node_t *x, *p, *g, *y, *new; + setval_t ov; + + k = CALLER_TO_INTERNAL_KEY(k); + + newb = NULL; + + ptst = critical_enter(); + + do { + new_stm_tx(tx, ptst, MEMORY); + + b = s; + while ( b != NULLB ) + { + xb = b; + x = read_stm_blk(ptst, tx, xb); + if ( k == x->k ) break; + b = (k < x->k) ? x->l : x->r; + } + + x = write_stm_blk(ptst, tx, xb); + + if ( k == x->k ) + { + ov = x->v; + if ( overwrite ) x->v = SET_COLOUR(v, GET_COLOUR(ov)); + ov = GET_VALUE(ov); + } + else + { + ov = NULL; + if ( newb == NULL ) + { + newb = new_stm_blk(ptst, MEMORY); + new = init_stm_blk(ptst, MEMORY, newb); + new->k = k; + } + + new->v = MK_RED(v); + new->l = NULLB; + new->r = NULLB; + new->p = xb; + + if ( k < x->k ) x->l = newb; else x->r = newb; + + xb = newb; + x = new; + + for ( ; ; ) + { + if ( (pb = x->p) == s ) + { + x->v = MK_BLACK(x->v); + break; + } + + p = read_stm_blk(ptst, tx, pb); + if ( IS_BLACK(p->v) ) break; + + gb = p->p; + g = read_stm_blk(ptst, tx, gb); + if ( pb == g->l ) + { + yb = g->r; + y = read_stm_blk(ptst, tx, yb); + if ( IS_RED(y->v) ) + { + p = write_stm_blk(ptst, tx, pb); + y = write_stm_blk(ptst, tx, yb); + g = write_stm_blk(ptst, tx, gb); + p->v = MK_BLACK(p->v); + y->v = MK_BLACK(y->v); + g->v = MK_RED(g->v); + xb = gb; + x = g; + } + else + { + if ( xb == p->r ) + { + xb = pb; + x = write_stm_blk(ptst, tx, pb); + left_rotate(ptst, tx, xb, x); + } + pb = x->p; + p = write_stm_blk(ptst, tx, pb); + gb = p->p; + g = write_stm_blk(ptst, tx, gb); + p->v = MK_BLACK(p->v); + g->v = MK_RED(g->v); + right_rotate(ptst, tx, gb, g); + } + } + else /* SYMMETRIC CASE */ + { + yb = g->l; + y = read_stm_blk(ptst, tx, yb); + if ( IS_RED(y->v) ) + { + p = write_stm_blk(ptst, tx, pb); + y = write_stm_blk(ptst, tx, yb); + g = write_stm_blk(ptst, tx, gb); + p->v = MK_BLACK(p->v); + y->v = MK_BLACK(y->v); + g->v = MK_RED(g->v); + xb = gb; + x = g; + } + else + { + if ( xb == p->l ) + { + xb = pb; + x = write_stm_blk(ptst, tx, pb); + right_rotate(ptst, tx, xb, x); + } + pb = x->p; + p = write_stm_blk(ptst, tx, pb); + gb = p->p; + g = write_stm_blk(ptst, tx, gb); + p->v = MK_BLACK(p->v); + g->v = MK_RED(g->v); + left_rotate(ptst, tx, gb, g); + } + } + } + } + + remove_from_tx(ptst, tx, NULLB); + } + while ( !commit_stm_tx(ptst, tx) ); + + /* Free unused new block. */ + if ( (ov != NULL) && (newb != NULL) ) free_stm_blk(ptst, MEMORY, newb); + + critical_exit(ptst); + + return ov; +} + + +setval_t set_remove(set_t *s, setkey_t k) +{ + ptst_t *ptst; + stm_tx *tx; + stm_blk *zb, *b, *xb, *yb; + node_t *z, *x, *y, *yp; + setval_t ov; + + k = CALLER_TO_INTERNAL_KEY(k); + + ptst = critical_enter(); + + do { + new_stm_tx(tx, ptst, MEMORY); + ov = NULL; + b = s; + + while ( b != NULLB ) + { + zb = b; + z = read_stm_blk(ptst, tx, zb); + if ( k == z->k ) + { + ov = GET_VALUE(z->v); + break; + } + b = (k < z->k) ? z->l : z->r; + } + + if ( ov != NULL ) + { + z = write_stm_blk(ptst, tx, zb); + + if ( (z->l != NULLB) && (z->r != NULLB) ) + { + /* Find successor of node z, and place in (yb,y). */ + yb = z->r; + y = read_stm_blk(ptst, tx, yb); + + while ( y->l != NULLB ) + { + yb = y->l; + y = read_stm_blk(ptst, tx, yb); + } + + y = write_stm_blk(ptst, tx, yb); + } + else + { + yb = zb; + y = z; + } + + xb = (y->l != NULLB) ? y->l : y->r; + x = write_stm_blk(ptst, tx, xb); + x->p = y->p; + + yp = write_stm_blk(ptst, tx, y->p); + if ( yb == yp->l ) yp->l = xb; else yp->r = xb; + + if ( y != z ) + { + z->k = y->k; + z->v = SET_COLOUR(GET_VALUE(y->v), GET_COLOUR(z->v)); + } + + if ( IS_BLACK(y->v) ) delete_fixup(ptst, tx, s, xb, x); + } + + remove_from_tx(ptst, tx, NULLB); + } + while ( !commit_stm_tx(ptst, tx) ); + + /* Free a deleted block. */ + if ( ov != NULL ) free_stm_blk(ptst, MEMORY, yb); + + critical_exit(ptst); + + return ov; +} + + +setval_t set_lookup(set_t *s, setkey_t k) +{ + ptst_t *ptst; + stm_tx *tx; + stm_blk *nb; + node_t *n; + setval_t v; + + k = CALLER_TO_INTERNAL_KEY(k); + + ptst = critical_enter(); + + do { + new_stm_tx(tx, ptst, MEMORY); + v = NULL; + nb = s; + + while ( nb != NULLB ) + { + n = read_stm_blk(ptst, tx, nb); + if ( k == n->k ) + { + v = GET_VALUE(n->v); + break; + } + nb = (k < n->k) ? n->l : n->r; + } + } + while ( !commit_stm_tx(ptst, tx) ); + + critical_exit(ptst); + + return v; +} + + +void _init_set_subsystem(void) +{ + node_t *null; + ptst_t *ptst; + + ptst = critical_enter(); + + _init_stm_subsystem(0); + + MEMORY = new_stm(ptst, sizeof(node_t)); + + NULLB = new_stm_blk(ptst, MEMORY); + null = init_stm_blk(ptst, MEMORY, NULLB); + null->k = 0; + null->v = MK_BLACK(NULL); + null->l = NULL; + null->r = NULL; + null->p = NULL; + + critical_exit(ptst); +} diff --git a/src/mcas/replay.c b/src/mcas/replay.c new file mode 100644 index 000000000..bc7000720 --- /dev/null +++ b/src/mcas/replay.c @@ -0,0 +1,474 @@ +/****************************************************************************** + * replay.c + * + * Replay the log output of search-structure runs. + * Must build set_harness.c with DO_WRITE_LOG defined. + * + * Copyright (c) 2002-2003, K A Fraser + * + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. Neither the name of the Keir Fraser + * nor the names of its contributors may be used to endorse or + * promote products derived from this software without specific + * prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "portable_defns.h" + +#define RMAX_THREADS 256 +#define VERIFY_ORDERINGS + +#define LOG_REPLAYED (1<<26) +#define LOG_KEY_MASK 0xffffff + +typedef struct log_st +{ + interval_t start, end; + unsigned int data; /* key, and replay flag */ + void *val, *old_val; /* op changed mapping from old_val to val */ +} log_t; + +#define REPLAYED(_l) ((_l)->data & LOG_REPLAYED) + +static log_t *global_log; +static int nr_threads, nr_updates, nr_keys; +static int *key_offsets; +static int *success; +static unsigned int next_key = 0; +static pthread_mutex_t key_lock; + + +/* + * GLOBAL LOGS SORTED ON: + * 1. Key value + * 2. Start time + * + * Replayer deals with each key value in turn. + */ +static int compare(const void *t1, const void *t2) +{ + const log_t *l1 = t1; + const log_t *l2 = t2; + const int k1 = l1->data & LOG_KEY_MASK; + const int k2 = l2->data & LOG_KEY_MASK; + + if ( k1 < k2 ) return(-1); + if ( k1 > k2 ) return(+1); + + if ( l1->start < l2->start ) return(-1); + + return(+1); +} + + +static int do_op(log_t *log, void **key_state) +{ + if ( REPLAYED(log) || (log->old_val != *key_state) ) return(0); + *key_state = log->val; + log->data |= LOG_REPLAYED; + return(1); +} + + +static void undo_op(log_t *log, void **key_state) +{ + assert(REPLAYED(log)); + log->data &= ~LOG_REPLAYED; + *key_state = log->old_val; +} + + +/* Sink down element @pos of @heap. */ +static void down_heap(log_t **heap, int *heap_offsets, log_t *log, int pos) +{ + int sz = (int)heap[0], nxt; + log_t *tmp; + while ( (nxt = (pos << 1)) <= sz ) + { + if ( ((nxt+1) <= sz) && (heap[nxt+1]->end < heap[nxt]->end) ) nxt++; + if ( heap[nxt]->end > heap[pos]->end ) break; + heap_offsets[heap[pos] - log] = nxt; + heap_offsets[heap[nxt] - log] = pos; + tmp = heap[pos]; + heap[pos] = heap[nxt]; + heap[nxt] = tmp; + pos = nxt; + } +} + +/* Float element @pos up @heap. */ +static void up_heap(log_t **heap, int *heap_offsets, log_t *log, int pos) +{ + log_t *tmp; + while ( pos > 1 ) + { + if ( heap[pos]->end > heap[pos>>1]->end ) break; + heap_offsets[heap[pos] - log] = pos >> 1; + heap_offsets[heap[pos>>1] - log] = pos; + tmp = heap[pos]; + heap[pos] = heap[pos>>1]; + heap[pos>>1] = tmp; + pos >>= 1; + } +} + + +/* Delete @entry from @heap. */ +static void remove_entry(log_t **heap, int *heap_offsets, + log_t *log, log_t *entry) +{ + int sz = (int)heap[0]; + int pos = heap_offsets[entry - log]; + heap_offsets[heap[sz] - log] = pos; + heap[pos] = heap[sz]; + heap[0] = (void *)(--sz); + if ( (pos > 1) && (heap[pos]->end < heap[pos>>1]->end) ) + { + up_heap(heap, heap_offsets, log, pos); + } + else + { + down_heap(heap, heap_offsets, log, pos); + } +} + + +/* Add new entry @new to @heap. */ +static void add_entry(log_t **heap, int *heap_offsets, log_t *log, log_t *new) +{ + int sz = (int)heap[0]; + heap[0] = (void *)(++sz); + heap_offsets[new - log] = sz; + heap[sz] = new; + up_heap(heap, heap_offsets, log, sz); +} + + +/* + * This linearisation algorithm is a depth-first search of all feasible + * orderings. At each step, the next available operation is selected. + * The set of "available" operations is those which: + * (1) have not already been selected on this search path + * (2) are operations whose results are correct given current state + * (eg. a failed delete couldn't be selected if the key is in the set!) + * (3) have start times <= the earliest end time in the set. + * (1) ensures that each operation happens only once. (2) ensures that + * abstract state is consistent between operations. (3) ensures that time + * ordering is conserved. + */ +static int linearise_ops_for_key( + log_t *log, int nr_items, log_t **stack, + log_t **cutoff_heap, int *heap_offsets, void **key_state) +{ + int i; + log_t **sp = stack; + interval_t cutoff; + + /* Construct cutoff heap. */ + cutoff_heap[0] = (void *)nr_items; + for ( i = 0; i < nr_items; i++ ) + { + cutoff_heap[i+1] = log + i; + heap_offsets[i] = i+1; + } + for ( i = nr_items>>1; i > 0; i-- ) + { + down_heap(cutoff_heap, heap_offsets, log, i); + } + + cutoff = cutoff_heap[1]->end; + + for ( i = 0; ; ) + { + while ( (i < nr_items) && (log[i].start <= cutoff) ) + { + if ( !do_op(&log[i], key_state) ) { i++; continue; } + + *sp++ = &log[i]; + + /* Done? */ + if ( (sp - stack) == nr_items ) goto success; + + remove_entry(cutoff_heap, heap_offsets, log, &log[i]); + cutoff = cutoff_heap[1]->end; + i = 0; + } + + /* Failure? */ + if ( (sp - stack) == 0 ) + { + for ( i = -3; i < nr_items + 3; i++ ) + { +#if 1 + printf("%08x -> %08x -- %d: %08x -> %08x\n", + (unsigned int)log[i].start, + (unsigned int)log[i].end, + log[i].data & LOG_KEY_MASK, + (unsigned int)log[i].old_val, + (unsigned int)log[i].val); +#endif + } + return(0); + } + + i = *--sp - log; + undo_op(&log[i], key_state); + add_entry(cutoff_heap, heap_offsets, log, &log[i]); + cutoff = cutoff_heap[1]->end; + i++; + } + + success: + return(1); +} + + +static void *thread_start(void *arg) +{ + unsigned long tid = (unsigned long)arg; + unsigned int our_key; + int ch_start, ch_end, start, end, nr_items, *heap_offsets; + log_t **stack; + log_t **cutoff_heap; + interval_t cutoff; + void *key_state; +#ifdef VERIFY_ORDERINGS + int i; +#endif + + stack = malloc((nr_threads*nr_updates+1)*sizeof(log_t*)); + cutoff_heap = malloc((nr_threads*nr_updates+1)*sizeof(*cutoff_heap)); + heap_offsets = malloc((nr_threads*nr_updates+1)*sizeof(*heap_offsets)); + if ( !stack || !cutoff_heap || !heap_offsets ) + { + fprintf(stderr, "Error allocating space for stacks\n"); + return(NULL); + } + + again: + pthread_mutex_lock(&key_lock); + our_key = next_key++; + pthread_mutex_unlock(&key_lock); + if ( our_key >= nr_keys ) goto out; + + start = key_offsets[our_key]; + end = key_offsets[our_key+1]; + nr_items = end - start; + + printf("[Thread %lu] ++ Linearising key %d (%d events)\n", + tid, our_key, nr_items); + +#if 0 + { + int i; + for ( i = start; i < end; i++ ) + { + printf("%04d/%04d -- %08x -> %08x -- %d: %08x -> %08x\n", + our_key, i - start, + (unsigned int)global_log[i].start, + (unsigned int)global_log[i].end, + global_log[i].data & LOG_KEY_MASK, + (unsigned int)global_log[i].old_val, + (unsigned int)global_log[i].val); + } + } +#endif + + /* + * We divide operations into independent chunks. A chunk is a maximal + * sequence of operations, ordered on start time, that does not + * overlap with any operation in any other chunk. Clearly, finding + * a linearisation for each chunk produces a total schedule. + */ + success[our_key] = 1; + key_state = 0; + for ( ch_start = start; ch_start < end; ch_start = ch_end ) + { + cutoff = global_log[ch_start].end; + for ( ch_end = ch_start; ch_end < end; ch_end++ ) + { + if ( global_log[ch_end].start > cutoff ) break; + if ( global_log[ch_end].end > cutoff ) + cutoff = global_log[ch_end].end; + } + + /* Linearise chunk ch_start -> ch_end. */ + success[our_key] = linearise_ops_for_key( + &global_log[ch_start], + ch_end - ch_start, + &stack[ch_start - start], + cutoff_heap, + heap_offsets, + &key_state); + + if ( !success[our_key] ) + { + printf("[Thread %lu] -- Linearisation FAILED for key %d\n", + tid, our_key); + goto again; + } + } + + printf("[Thread %lu] -- Linearisation %s for key %d\n", + tid, (success[our_key] ? "found" : "FAILED"), our_key); + +#ifdef VERIFY_ORDERINGS + printf("[Thread %lu] ++ Verifying key %d\n", tid, our_key); + cutoff = 0; + key_state = 0; + for ( i = 0; i < nr_items; i++ ) + { + stack[i]->data &= ~LOG_REPLAYED; /* stop valid_op() from choking */ + if ( !do_op(stack[i], &key_state) || (stack[i]->end < cutoff) ) + { + int j; + fprintf(stderr, "\t*** INTERNAL ERROR: " + "Assigned ordering is invalid!\n"); + for ( j = (i < 2) ? 0 : (i-2); j < i+6; j++ ) + { + printf("%08x -> %08x -- %d: %08x -> %08x\n", + (unsigned int)stack[j]->start, + (unsigned int)stack[j]->end, + stack[j]->data & LOG_KEY_MASK, + (unsigned int)stack[j]->old_val, + (unsigned int)stack[j]->val); + } + exit(-1); + } + if ( stack[i]->start > cutoff ) cutoff = stack[i]->start; + } + printf("[Thread %lu] -- Verified key %d\n", tid, our_key); +#endif + + goto again; + + out: + return(NULL); +} + + +int main(int argc, char **argv) +{ + pthread_t thread[RMAX_THREADS]; + int fd, i, j, failed = 0, nr_cpus; + unsigned long log_header[3]; + + if ( argc != 2 ) + { + fprintf(stderr, "%s \n", argv[0]); + exit(1); + } + + nr_cpus = (int)sysconf(_SC_NPROCESSORS_ONLN); + if ( nr_cpus > RMAX_THREADS ) nr_cpus = RMAX_THREADS; + + if ( (fd = open(argv[1], O_RDONLY, 0)) == -1 ) + { + fprintf(stderr, "Error opening log\n"); + exit(-1); + } + + /* Grok the log header. */ + read(fd, log_header, sizeof(log_header)); + nr_threads = log_header[0]; + nr_updates = log_header[1]; + nr_keys = log_header[2]; + printf("Read log header: nr_updates=%d, nr_threads=%d, nr_keys=%d\n", + nr_updates, nr_threads, nr_keys); + + /* Allocate state for processing log entries. */ + global_log = malloc((nr_threads*nr_updates+1)*sizeof(log_t)); + key_offsets = malloc((nr_keys+1)*sizeof(*key_offsets)); + success = malloc(nr_keys*sizeof(*success)); + if ( !global_log || !key_offsets || !success ) + { + fprintf(stderr, "Error allocating space for log\n"); + exit(-1); + } + + /* Read log entries, and sort into key and timestamp order. */ + read(fd, global_log, nr_threads*nr_updates*sizeof(log_t)); + global_log[nr_threads*nr_updates].data = LOG_KEY_MASK; /* sentinel */ + + printf("Sorting logs..."); fflush(stdout); + qsort(global_log, nr_threads*nr_updates, sizeof(log_t), compare); + printf(" done\n"); + + /* Find offsets of key regions in global table. */ + key_offsets[0] = 0; + nr_keys = 0; + for ( i = 0; i < (nr_threads * nr_updates); i = j ) + { + j = i+1; + while ( (global_log[j].data & LOG_KEY_MASK) == + (global_log[i].data & LOG_KEY_MASK) ) j++; + key_offsets[++nr_keys] = j; + } + + /* Set up a bunch of worker threads.... */ + pthread_mutex_init(&key_lock, NULL); + for ( i = 0; i < nr_cpus; i++ ) + { + if ( pthread_create(&thread[i], NULL, thread_start, (void *)i) ) + { + fprintf(stderr, "Error creating thread %d (%d)\n", i, errno); + exit(1); + } + } + + /* ...and wait for them all to complete. */ + for ( i = 0; i < nr_cpus; i++ ) + { + pthread_join(thread[i], NULL); + } + + /* Summarise results from worker threads. */ + for ( i = 0; i < nr_keys; i++ ) + { + if ( success[i] ) continue; + printf("FAILED on key %d\n", i); + failed++; + } + + if ( failed ) + { + printf("Failed on %d keys\n", failed); + return(1); + } + + printf("All assigned orderings are valid\n"); + return(0); +} diff --git a/src/mcas/set.h b/src/mcas/set.h new file mode 100644 index 000000000..8e521f2a5 --- /dev/null +++ b/src/mcas/set.h @@ -0,0 +1,102 @@ +#ifndef __SET_H__ +#define __SET_H__ + + +typedef unsigned long setkey_t; +typedef void *setval_t; + + +#ifdef __SET_IMPLEMENTATION__ + +/************************************* + * INTERNAL DEFINITIONS + */ + +/* Fine for 2^NUM_LEVELS nodes. */ +#define NUM_LEVELS 20 + + +/* Internal key values with special meanings. */ +#define INVALID_FIELD (0) /* Uninitialised field value. */ +#define SENTINEL_KEYMIN ( 1UL) /* Key value of first dummy node. */ +#define SENTINEL_KEYMAX (~0UL) /* Key value of last dummy node. */ + + +/* + * Used internally be set access functions, so that callers can use + * key values 0 and 1, without knowing these have special meanings. + */ +#define CALLER_TO_INTERNAL_KEY(_k) ((_k) + 2) + + +/* + * SUPPORT FOR WEAK ORDERING OF MEMORY ACCESSES + */ + +#ifdef WEAK_MEM_ORDER + +/* Read field @_f into variable @_x. */ +#define READ_FIELD(_x,_f) \ +do { \ + (_x) = (_f); \ + if ( (_x) == INVALID_FIELD ) { RMB(); (_x) = (_f); } \ + assert((_x) != INVALID_FIELD); \ +} while ( 0 ) + +#else + +/* Read field @_f into variable @_x. */ +#define READ_FIELD(_x,_f) ((_x) = (_f)) + +#endif + + +#else + +/************************************* + * PUBLIC DEFINITIONS + */ + +/* + * Key range accepted by set functions. + * We lose three values (conveniently at top end of key space). + * - Known invalid value to which all fields are initialised. + * - Sentinel key values for up to two dummy nodes. + */ +#define KEY_MIN ( 0U) +#define KEY_MAX ((~0U) - 3) + +typedef void set_t; /* opaque */ + +void _init_set_subsystem(void); + +/* + * Allocate an empty set. + */ +set_t *set_alloc(void); + +/* + * Add mapping (@k -> @v) into set @s. Return previous mapped value if + * one existed, or NULL if no previous mapping for @k existed. + * + * If @overwrite is FALSE, then if a mapping already exists it is not + * modified, and the existing value is returned unchanged. It is possible + * to see if the value was changed by observing if the return value is NULL. + */ +setval_t set_update(set_t *s, setkey_t k, setval_t v, int overwrite); + +/* + * Remove mapping for key @k from set @s. Return value associated with + * removed mapping, or NULL is there was no mapping to delete. + */ +setval_t set_remove(set_t *s, setkey_t k); + +/* + * Look up mapping for key @k in set @s. Return value if found, else NULL. + */ +setval_t set_lookup(set_t *s, setkey_t k); + +#endif /* __SET_IMPLEMENTATION__ */ + + +#endif /* __SET_H__ */ diff --git a/src/mcas/set_harness.c b/src/mcas/set_harness.c new file mode 100644 index 000000000..89078d7a2 --- /dev/null +++ b/src/mcas/set_harness.c @@ -0,0 +1,574 @@ +/****************************************************************************** + * set_harness.c + * + * Test harness for the various set implementations. + * + * Copyright (c) 2002-2003, K A Fraser + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. Neither the name of the Keir Fraser + * nor the names of its contributors may be used to endorse or + * promote products derived from this software without specific + * prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "portable_defns.h" +#include "set.h" +#include "ptst.h" + +/* This produces an operation log for the 'replay' checker. */ +/*#define DO_WRITE_LOG*/ + +#ifdef DO_WRITE_LOG +#define MAX_ITERATIONS 100000 +#define MAX_WALL_TIME 50 /* seconds */ +#else +#define MAX_ITERATIONS 100000000 +#define MAX_WALL_TIME 10 /* seconds */ +#endif + +/* + * ***************** LOGGING + */ + +#define MAX_LOG_RECORDS 256 + +#define LOG_KIND_INT 0 +#define LOG_KIND_STRING 1 +#define LOG_KIND_FLOAT 2 + +typedef struct { + char *name; + int kind; + int val_int; + char *val_string; + float val_float; +} log_record_t; + +static log_record_t log_records[MAX_LOG_RECORDS]; + +static int num_log_records = 0; + +static void log_int (char *name, int val) { + log_records[num_log_records].name = name; + log_records[num_log_records].kind = LOG_KIND_INT; + log_records[num_log_records].val_int = val; + num_log_records ++; +} + +static void log_string (char *name, char *val) { + log_records[num_log_records].name = name; + log_records[num_log_records].kind = LOG_KIND_STRING; + log_records[num_log_records].val_string = val; + num_log_records ++; +} + +static void log_float (char *name, float val) { + log_records[num_log_records].name = name; + log_records[num_log_records].kind = LOG_KIND_FLOAT; + log_records[num_log_records].val_float = val; + num_log_records ++; +} + +static void dump_log (void) { + int i; + + fprintf (stdout, "-------------------------------------------" + "---------------------------\n"); + for (i = 0; i < num_log_records; i ++) + { + char padding[40]; + strcpy(padding, " "); + if (30-strlen(log_records[i].name) >= 0){ + padding[30-strlen(log_records[i].name)] = '\0'; + } + fprintf (stdout, "%s%s = ", padding, log_records[i].name); + { + int kind = log_records [i].kind; + if (kind == LOG_KIND_INT) { + fprintf (stdout, "%d\n", log_records[i].val_int); + } else if (kind == LOG_KIND_STRING) { + fprintf (stdout, "%s\n", log_records[i].val_string); + } else if (kind == LOG_KIND_FLOAT) { + fprintf (stdout, "%.3f\n", log_records[i].val_float); + } + } + } + fprintf (stdout, "-------------------------------------------" + "---------------------------\n"); + + for (i = 0; i < num_log_records; i ++) + { + int kind = log_records [i].kind; + if (i != 0) { fprintf (stderr, " "); } + if (kind == LOG_KIND_INT) { + fprintf (stderr, "%d", log_records[i].val_int); + } else if (kind == LOG_KIND_STRING) { + fprintf (stderr, "%s", log_records[i].val_string); + } else if (kind == LOG_KIND_FLOAT) { + fprintf (stderr, "%.3f", log_records[i].val_float); + } + } + fprintf (stderr, " LOG\n"); +} + +/* + * ************** END OF LOGGING + */ + +#define TVAL(x) ((x.tv_sec * 1000000) + x.tv_usec) + +/* Log tables. Written out at end-of-day. */ +typedef struct log_st +{ + interval_t start, end; + unsigned int key; + void *val, *old_val; /* @old_val used by update() and remove() */ +} log_t; +#define SIZEOF_GLOBAL_LOG (num_threads*MAX_ITERATIONS*sizeof(log_t)) +static log_t *global_log; +static interval_t interval = 0; + +static bool_t go = FALSE; +static int threads_initialised1 = 0, max_key, log_max_key; +static int threads_initialised2 = 0; +static int threads_initialised3 = 0; +int num_threads; + +static unsigned long proportion; + +static struct timeval start_time, done_time; +static struct tms start_tms, done_tms; + +static int successes[MAX_THREADS]; + +#ifdef SPARC +static int processors[MAX_THREADS]; +#endif + +/* All the variables accessed in the critical main loop. */ +static struct { + CACHE_PAD(0); + bool_t alarm_time; + CACHE_PAD(1); + set_t *set; + CACHE_PAD(2); +} shared; + +#define nrand(_r) (((_r) = (_r) * 1103515245) + 12345) + +static void alarm_handler( int arg) +{ + shared.alarm_time = 1; +} + +/*int cntr[MAX_THREADS] = { 0 };*/ + +static void *thread_start(void *arg) +{ + unsigned long k; + int i; + void *ov, *v; + int id = (int)arg; +#ifdef DO_WRITE_LOG + log_t *log = global_log + id*MAX_ITERATIONS; + interval_t my_int; +#endif + unsigned long r = ((unsigned long)arg)+3; /*RDTICK();*/ + unsigned int prop = proportion; + unsigned int _max_key = max_key; + +#ifdef SPARC + i = processor_bind(P_LWPID, P_MYID, processors[id], NULL); + if ( i != 0 ) + { + printf("Failed to bind to processor %d! (%d)\n", processors[id], i); + abort(); + } +#endif + + if ( id == 0 ) + { + _init_ptst_subsystem(); + _init_gc_subsystem(); + _init_set_subsystem(); + shared.set = set_alloc(); + } + + /* BARRIER FOR ALL THREADS */ + { + int n_id, id = threads_initialised1; + while ( (n_id = CASIO(&threads_initialised1, id, id+1)) != id ) + id = n_id; + } + while ( threads_initialised1 != num_threads ) MB(); + +#ifndef DO_WRITE_LOG + /* Start search structure off with a well-distributed set of inital keys */ + for ( i = (_max_key / num_threads); i != 0; i >>= 1 ) + { + for ( k = i >> 1; k < (_max_key / num_threads); k += i ) + { + set_update(shared.set, + k + id * (_max_key / num_threads), + (void *)0xdeadbee0, 1); + } + } +#endif + + { + int n_id, id = threads_initialised2; + while ( (n_id = CASIO(&threads_initialised2, id, id+1)) != id ) + id = n_id; + } + while ( threads_initialised2 != num_threads ) MB(); + + if ( id == 0 ) + { + (void)signal(SIGALRM, &alarm_handler); + (void)alarm(MAX_WALL_TIME); + WMB(); + gettimeofday(&start_time, NULL); + times(&start_tms); + go = TRUE; + WMB(); + } + else + { + while ( !go ) MB(); + } + +#ifdef DO_WRITE_LOG + get_interval(my_int); +#endif + for ( i = 0; (i < MAX_ITERATIONS) && !shared.alarm_time; i++ ) + { + /* O-3: ignore ; 4-11: proportion ; 12: ins/del */ + k = (nrand(r) >> 4) & (_max_key - 1); + nrand(r); +#ifdef DO_WRITE_LOG + log->start = my_int; +#endif + if ( ((r>>4)&255) < prop ) + { + ov = v = set_lookup(shared.set, k); + } + else if ( ((r>>12)&1) ) + { + v = (void *)((r&~7)|0x8); + ov = set_update(shared.set, k, v, 1); + } + else + { + v = NULL; + ov = set_remove(shared.set, k); + } + +#ifdef DO_WRITE_LOG + get_interval(my_int); + log->key = k; + log->val = v; + log->old_val = ov; + log->end = my_int; + log++; +#endif + } + + /* BARRIER FOR ALL THREADS */ + { + int n_id, id = threads_initialised3; + while ( (n_id = CASIO(&threads_initialised3, id, id+1)) != id ) + id = n_id; + } + while ( threads_initialised3 != num_threads ) MB(); + +#if 0 + if ( id == 0 ) + { + extern void check_tree(set_t *); + check_tree(shared.set); + } +#endif + + if ( id == num_threads - 1 ) + { + gettimeofday(&done_time, NULL); + times(&done_tms); + WMB(); + _destroy_gc_subsystem(); + } + + successes[id] = i; + + return(NULL); +} + +#define THREAD_TEST thread_start +#define THREAD_FLAGS THR_BOUND + +#ifdef PPC +static pthread_attr_t attr; +#endif + +static void test_multithreaded (void) +{ + int i, fd; + pthread_t thrs[MAX_THREADS]; + int num_successes; + int min_successes, max_successes; + int ticksps = sysconf(_SC_CLK_TCK); + float wall_time, user_time, sys_time; + + if ( num_threads == 1 ) goto skip_thread_creation; + +#ifdef PPC + i = pthread_attr_init (&attr); + if (i !=0) { + fprintf (stderr, "URK! pthread_attr_init rc=%d\n", i); + } + i = pthread_attr_setscope (&attr, PTHREAD_SCOPE_SYSTEM); + if (i !=0) { + fprintf (stderr, "URK! pthread_attr_setscope rc=%d\n", i); + } +#endif + +#ifdef MIPS + pthread_setconcurrency(num_threads + 1); +#else + pthread_setconcurrency(num_threads); +#endif + + for (i = 0; i < num_threads; i ++) + { + MB(); +#ifdef PPC + pthread_create (&thrs[i], &attr, THREAD_TEST, (void *)i); +#else + pthread_create (&thrs[i], NULL, THREAD_TEST, (void *)i); +#endif + } + + skip_thread_creation: + if ( num_threads == 1 ) + { + thread_start(0); + } + else + { + for (i = 0; i < num_threads; i ++) + { + (void)pthread_join (thrs[i], NULL); + } + } + + wall_time = (float)(TVAL(done_time) - TVAL(start_time))/ 1000000; + user_time = ((float)(done_tms.tms_utime - start_tms.tms_utime))/ticksps; + sys_time = ((float)(done_tms.tms_stime - start_tms.tms_stime))/ticksps; + + log_float ("wall_time_s", wall_time); + log_float ("user_time_s", user_time); + log_float ("system_time_s", sys_time); + + num_successes = 0; + min_successes = INT_MAX; + max_successes = INT_MIN; + for ( i = 0; i < num_threads; i++ ) + { + num_successes += successes[i]; + if ( successes[i] < min_successes ) min_successes = successes[i]; + if ( successes[i] > max_successes ) max_successes = successes[i]; + } + + log_int ("min_successes", min_successes); + log_int ("max_successes", max_successes); + log_int ("num_successes", num_successes); + + log_float("us_per_success", (num_threads*wall_time*1000000.0)/num_successes); + + log_int("log max key", log_max_key); +} + +#if defined(INTEL) +static void tstp_handler(int sig, siginfo_t *info, ucontext_t *uc) +{ + static unsigned int sem = 0; + unsigned long *esp = (unsigned long *)(uc->uc_mcontext.gregs[7]); + int pid = getpid(); + + while ( CASIO(&sem, 0, 1) != 0 ) sched_yield(); + + printf("Signal %d for pid %d\n", sig, pid); + printf("%d: EIP=%08x EAX=%08x EBX=%08x ECX=%08x EDX=%08x\n", pid, + uc->uc_mcontext.gregs[14], uc->uc_mcontext.gregs[11], + uc->uc_mcontext.gregs[ 8], uc->uc_mcontext.gregs[10], + uc->uc_mcontext.gregs[ 9]); + printf("%d: ESP=%08x EBP=%08x ESI=%08x EDI=%08x EFL=%08x\n", pid, + uc->uc_mcontext.gregs[ 7], uc->uc_mcontext.gregs[ 6], + uc->uc_mcontext.gregs[ 5], uc->uc_mcontext.gregs[ 4], + uc->uc_mcontext.gregs[16]); + printf("\n"); + + sem = 0; + + for ( ; ; ) sched_yield(); +} +#endif + +int main (int argc, char **argv) +{ +#ifdef DO_WRITE_LOG + int fd; + unsigned long log_header[] = { 0, MAX_ITERATIONS, 0 }; + + if ( argc != 5 ) + { + printf("%s \n" + "(0 <= read_proportion <= 256)\n", argv[0]); + exit(1); + } +#else + if ( argc != 4 ) + { + printf("%s \n" + "(0 <= read_proportion <= 256)\n", argv[0]); + exit(1); + } +#endif + + memset(&shared, 0, sizeof(shared)); + + num_threads = atoi(argv[1]); + log_int ("num_threads", num_threads); + + proportion = atoi(argv[2]); + log_float ("frac_reads", (float)proportion/256.0); + + log_max_key = atoi(argv[3]); + max_key = 1 << atoi(argv[3]); + log_int("max_key", max_key); + + log_int ("max_iterations", MAX_ITERATIONS); + + log_int ("wall_time_limit_s", MAX_WALL_TIME); + +#ifdef SPARC + { + int st, maxcpu = sysconf(_SC_CPUID_MAX), i, j=0; + + /* Favour processors that don't handle I/O interrupts. */ + for ( i = 0; i <= maxcpu; i++ ) + { + st = p_online(i, P_STATUS); + if ( st == P_NOINTR ) + { + if ( j == num_threads ) break; + processors[j++] = i; + if ( j == num_threads ) break; + } + } + + /* Fall back to the system quads if necessary. */ + for ( i = 0; i <= maxcpu; i++ ) + { + st = p_online(i, P_STATUS); + if ( st == P_ONLINE ) + { + if ( j == num_threads ) break; + processors[j++] = i; + if ( j == num_threads ) break; + } + } + + if ( j != num_threads ) + { + printf("Urk! Not enough CPUs for threads (%d < %d)\n", + j, num_threads); + abort(); + } + } +#endif + +#ifdef DO_WRITE_LOG + log_header[0] = num_threads; + log_header[2] = max_key; + global_log = malloc(SIZEOF_GLOBAL_LOG); +#endif + +#if defined(INTEL) + { + struct sigaction act; + memset(&act, 0, sizeof(act)); + act.sa_handler = (void *)tstp_handler; + act.sa_flags = SA_SIGINFO; + sigaction(SIGTSTP, &act, NULL); + sigaction(SIGQUIT, &act, NULL); + sigaction(SIGSEGV, &act, NULL); + } +#endif + + test_multithreaded (); + + dump_log (); + +#ifdef DO_WRITE_LOG + printf("Writing log...\n"); + /* Write logs to data file */ + fd = open(argv[4], O_WRONLY | O_CREAT | O_TRUNC, 0644); + if ( fd == -1 ) + { + fprintf(stderr, "Error writing log!\n"); + exit(-1); + } + + if ( (write(fd, log_header, sizeof(log_header)) != sizeof(log_header)) || + (write(fd, global_log, SIZEOF_GLOBAL_LOG) != SIZEOF_GLOBAL_LOG) ) + { + fprintf(stderr, "Log write truncated or erroneous\n"); + close(fd); + exit(-1); + } + + close(fd); +#endif + + exit(0); +} diff --git a/src/mcas/skip_cas.c b/src/mcas/skip_cas.c new file mode 100644 index 000000000..706ed3c92 --- /dev/null +++ b/src/mcas/skip_cas.c @@ -0,0 +1,497 @@ +/****************************************************************************** + * skip_cas.c + * + * Skip lists, allowing concurrent update by use of CAS primitives. + * + * Copyright (c) 2001-2003, K A Fraser + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. Neither the name of the Keir Fraser + * nor the names of its contributors may be used to endorse or + * promote products derived from this software without specific + * prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#define __SET_IMPLEMENTATION__ + +#include +#include +#include +#include "portable_defns.h" +#include "ptst.h" +#include "set.h" + + +/* + * SKIP LIST + */ + +typedef struct node_st node_t; +typedef struct set_st set_t; +typedef VOLATILE node_t *sh_node_pt; + +struct node_st +{ + int level; +#define LEVEL_MASK 0x0ff +#define READY_FOR_FREE 0x100 + setkey_t k; + setval_t v; + sh_node_pt next[1]; +}; + +struct set_st +{ + node_t head; +}; + +static int gc_id[NUM_LEVELS]; + +/* + * PRIVATE FUNCTIONS + */ + +/* + * Random level generator. Drop-off rate is 0.5 per level. + * Returns value 1 <= level <= NUM_LEVELS. + */ +static int get_level(ptst_t *ptst) +{ + unsigned long r = rand_next(ptst); + int l = 1; + r = (r >> 4) & ((1 << (NUM_LEVELS-1)) - 1); + while ( (r & 1) ) { l++; r >>= 1; } + return(l); +} + + +/* + * Allocate a new node, and initialise its @level field. + * NB. Initialisation will eventually be pushed into garbage collector, + * because of dependent read reordering. + */ +static node_t *alloc_node(ptst_t *ptst) +{ + int l; + node_t *n; + l = get_level(ptst); + n = gc_alloc(ptst, gc_id[l - 1]); + n->level = l; + return(n); +} + + +/* Free a node to the garbage collector. */ +static void free_node(ptst_t *ptst, sh_node_pt n) +{ + gc_free(ptst, (void *)n, gc_id[(n->level & LEVEL_MASK) - 1]); +} + + +/* + * Search for first non-deleted node, N, with key >= @k at each level in @l. + * RETURN VALUES: + * Array @pa: @pa[i] is non-deleted predecessor of N at level i + * Array @na: @na[i] is N itself, which should be pointed at by @pa[i] + * MAIN RETURN VALUE: same as @na[0]. + */ +static sh_node_pt strong_search_predecessors( + set_t *l, setkey_t k, sh_node_pt *pa, sh_node_pt *na) +{ + sh_node_pt x, x_next, old_x_next, y, y_next; + setkey_t y_k; + int i; + + retry: + RMB(); + + x = &l->head; + for ( i = NUM_LEVELS - 1; i >= 0; i-- ) + { + /* We start our search at previous level's unmarked predecessor. */ + READ_FIELD(x_next, x->next[i]); + /* If this pointer's marked, so is @pa[i+1]. May as well retry. */ + if ( is_marked_ref(x_next) ) goto retry; + + for ( y = x_next; ; y = y_next ) + { + /* Shift over a sequence of marked nodes. */ + for ( ; ; ) + { + READ_FIELD(y_next, y->next[i]); + if ( !is_marked_ref(y_next) ) break; + y = get_unmarked_ref(y_next); + } + + READ_FIELD(y_k, y->k); + if ( y_k >= k ) break; + + /* Update estimate of predecessor at this level. */ + x = y; + x_next = y_next; + } + + /* Swing forward pointer over any marked nodes. */ + if ( x_next != y ) + { + old_x_next = CASPO(&x->next[i], x_next, y); + if ( old_x_next != x_next ) goto retry; + } + + if ( pa ) pa[i] = x; + if ( na ) na[i] = y; + } + + return(y); +} + + +/* This function does not remove marked nodes. Use it optimistically. */ +static sh_node_pt weak_search_predecessors( + set_t *l, setkey_t k, sh_node_pt *pa, sh_node_pt *na) +{ + sh_node_pt x, x_next; + setkey_t x_next_k; + int i; + + x = &l->head; + for ( i = NUM_LEVELS - 1; i >= 0; i-- ) + { + for ( ; ; ) + { + READ_FIELD(x_next, x->next[i]); + x_next = get_unmarked_ref(x_next); + + READ_FIELD(x_next_k, x_next->k); + if ( x_next_k >= k ) break; + + x = x_next; + } + + if ( pa ) pa[i] = x; + if ( na ) na[i] = x_next; + } + + return(x_next); +} + + +/* + * Mark @x deleted at every level in its list from @level down to level 1. + * When all forward pointers are marked, node is effectively deleted. + * Future searches will properly remove node by swinging predecessors' + * forward pointers. + */ +static void mark_deleted(sh_node_pt x, int level) +{ + sh_node_pt x_next; + + while ( --level >= 0 ) + { + x_next = x->next[level]; + while ( !is_marked_ref(x_next) ) + { + x_next = CASPO(&x->next[level], x_next, get_marked_ref(x_next)); + } + WEAK_DEP_ORDER_WMB(); /* mark in order */ + } +} + + +static int check_for_full_delete(sh_node_pt x) +{ + int level = x->level; + return ((level & READY_FOR_FREE) || + (CASIO(&x->level, level, level | READY_FOR_FREE) != level)); +} + + +static void do_full_delete(ptst_t *ptst, set_t *l, sh_node_pt x, int level) +{ + int k = x->k; +#ifdef WEAK_MEM_ORDER + sh_node_pt preds[NUM_LEVELS]; + int i = level; + retry: + (void)strong_search_predecessors(l, k, preds, NULL); + /* + * Above level 1, references to @x can disappear if a node is inserted + * immediately before and we see an old value for its forward pointer. This + * is a conservative way of checking for that situation. + */ + if ( i > 0 ) RMB(); + while ( i > 0 ) + { + node_t *n = get_unmarked_ref(preds[i]->next[i]); + while ( n->k < k ) + { + n = get_unmarked_ref(n->next[i]); + RMB(); /* we don't want refs to @x to "disappear" */ + } + if ( n == x ) goto retry; + i--; /* don't need to check this level again, even if we retry. */ + } +#else + (void)strong_search_predecessors(l, k, NULL, NULL); +#endif + free_node(ptst, x); +} + + +/* + * PUBLIC FUNCTIONS + */ + +set_t *set_alloc(void) +{ + set_t *l; + node_t *n; + int i; + + n = malloc(sizeof(*n) + (NUM_LEVELS-1)*sizeof(node_t *)); + memset(n, 0, sizeof(*n) + (NUM_LEVELS-1)*sizeof(node_t *)); + n->k = SENTINEL_KEYMAX; + + /* + * Set the forward pointers of final node to other than NULL, + * otherwise READ_FIELD() will continually execute costly barriers. + * Note use of 0xfe -- that doesn't look like a marked value! + */ + memset(n->next, 0xfe, NUM_LEVELS*sizeof(node_t *)); + + l = malloc(sizeof(*l) + (NUM_LEVELS-1)*sizeof(node_t *)); + l->head.k = SENTINEL_KEYMIN; + l->head.level = NUM_LEVELS; + for ( i = 0; i < NUM_LEVELS; i++ ) + { + l->head.next[i] = n; + } + + return(l); +} + + +setval_t set_update(set_t *l, setkey_t k, setval_t v, int overwrite) +{ + setval_t ov, new_ov; + ptst_t *ptst; + sh_node_pt preds[NUM_LEVELS], succs[NUM_LEVELS]; + sh_node_pt pred, succ, new = NULL, new_next, old_next; + int i, level; + + k = CALLER_TO_INTERNAL_KEY(k); + + ptst = critical_enter(); + + succ = weak_search_predecessors(l, k, preds, succs); + + retry: + ov = NULL; + + if ( succ->k == k ) + { + /* Already a @k node in the list: update its mapping. */ + new_ov = succ->v; + do { + if ( (ov = new_ov) == NULL ) + { + /* Finish deleting the node, then retry. */ + READ_FIELD(level, succ->level); + mark_deleted(succ, level & LEVEL_MASK); + succ = strong_search_predecessors(l, k, preds, succs); + goto retry; + } + } + while ( overwrite && ((new_ov = CASPO(&succ->v, ov, v)) != ov) ); + + if ( new != NULL ) free_node(ptst, new); + goto out; + } + +#ifdef WEAK_MEM_ORDER + /* Free node from previous attempt, if this is a retry. */ + if ( new != NULL ) + { + free_node(ptst, new); + new = NULL; + } +#endif + + /* Not in the list, so initialise a new node for insertion. */ + if ( new == NULL ) + { + new = alloc_node(ptst); + new->k = k; + new->v = v; + } + level = new->level; + + /* If successors don't change, this saves us some CAS operations. */ + for ( i = 0; i < level; i++ ) + { + new->next[i] = succs[i]; + } + + /* We've committed when we've inserted at level 1. */ + WMB_NEAR_CAS(); /* make sure node fully initialised before inserting */ + old_next = CASPO(&preds[0]->next[0], succ, new); + if ( old_next != succ ) + { + succ = strong_search_predecessors(l, k, preds, succs); + goto retry; + } + + /* Insert at each of the other levels in turn. */ + i = 1; + while ( i < level ) + { + pred = preds[i]; + succ = succs[i]; + + /* Someone *can* delete @new under our feet! */ + new_next = new->next[i]; + if ( is_marked_ref(new_next) ) goto success; + + /* Ensure forward pointer of new node is up to date. */ + if ( new_next != succ ) + { + old_next = CASPO(&new->next[i], new_next, succ); + if ( is_marked_ref(old_next) ) goto success; + assert(old_next == new_next); + } + + /* Ensure we have unique key values at every level. */ + if ( succ->k == k ) goto new_world_view; + assert((pred->k < k) && (succ->k > k)); + + /* Replumb predecessor's forward pointer. */ + old_next = CASPO(&pred->next[i], succ, new); + if ( old_next != succ ) + { + new_world_view: + RMB(); /* get up-to-date view of the world. */ + (void)strong_search_predecessors(l, k, preds, succs); + continue; + } + + /* Succeeded at this level. */ + i++; + } + + success: + /* Ensure node is visible at all levels before punting deletion. */ + WEAK_DEP_ORDER_WMB(); + if ( check_for_full_delete(new) ) + { + MB(); /* make sure we see all marks in @new. */ + do_full_delete(ptst, l, new, level - 1); + } + out: + critical_exit(ptst); + return(ov); +} + + +setval_t set_remove(set_t *l, setkey_t k) +{ + setval_t v = NULL, new_v; + ptst_t *ptst; + sh_node_pt preds[NUM_LEVELS], x; + int level, i; + + k = CALLER_TO_INTERNAL_KEY(k); + + ptst = critical_enter(); + + x = weak_search_predecessors(l, k, preds, NULL); + if ( x->k > k ) goto out; + READ_FIELD(level, x->level); + level = level & LEVEL_MASK; + + /* Once we've marked the value field, the node is effectively deleted. */ + new_v = x->v; + do { + v = new_v; + if ( v == NULL ) goto out; + } + while ( (new_v = CASPO(&x->v, v, NULL)) != v ); + + /* Committed to @x: mark lower-level forward pointers. */ + WEAK_DEP_ORDER_WMB(); /* enforce above as linearisation point */ + mark_deleted(x, level); + + /* + * We must swing predecessors' pointers, or we can end up with + * an unbounded number of marked but not fully deleted nodes. + * Doing this creates a bound equal to number of threads in the system. + * Furthermore, we can't legitimately call 'free_node' until all shared + * references are gone. + */ + for ( i = level - 1; i >= 0; i-- ) + { + if ( CASPO(&preds[i]->next[i], x, get_unmarked_ref(x->next[i])) != x ) + { + if ( (i != (level - 1)) || check_for_full_delete(x) ) + { + MB(); /* make sure we see node at all levels. */ + do_full_delete(ptst, l, x, i); + } + goto out; + } + } + + free_node(ptst, x); + + out: + critical_exit(ptst); + return(v); +} + + +setval_t set_lookup(set_t *l, setkey_t k) +{ + setval_t v = NULL; + ptst_t *ptst; + sh_node_pt x; + + k = CALLER_TO_INTERNAL_KEY(k); + + ptst = critical_enter(); + + x = weak_search_predecessors(l, k, NULL, NULL); + if ( x->k == k ) READ_FIELD(v, x->v); + + critical_exit(ptst); + return(v); +} + + +void _init_set_subsystem(void) +{ + int i; + + for ( i = 0; i < NUM_LEVELS; i++ ) + { + gc_id[i] = gc_add_allocator(sizeof(node_t) + i*sizeof(node_t *)); + } +} diff --git a/src/mcas/skip_lock.c b/src/mcas/skip_lock.c new file mode 100644 index 000000000..4eb728c83 --- /dev/null +++ b/src/mcas/skip_lock.c @@ -0,0 +1,435 @@ +/****************************************************************************** + * skip_lock.c (Variable-granularity Mutexes) + * + * Mutex only taken for write operations (reads are unprotected). Write + * mutexes come in three flavours, selected by a compile-time flag. + * + * If FAT_MTX is defined: + * A skip list is protected by one mutex for the entire list. Note that this + * differs from skip_bm.c, which takes the mutex for read operations as well. + * + * If TINY_MTX is defined: + * Mutex per forward pointer in each node. + * + * If neither flag is defined: + * Mutex per node. + * + * Copyright (c) 2001-2003, K A Fraser + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. Neither the name of the Keir Fraser + * nor the names of its contributors may be used to endorse or + * promote products derived from this software without specific + * prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#define __SET_IMPLEMENTATION__ + +#include +#include +#include +#include "portable_defns.h" +#include "ptst.h" +#include "set.h" + + +/* + * SKIP LIST + */ + +typedef struct node_st node_t; +typedef struct set_st set_t; +typedef VOLATILE node_t *sh_node_pt; + +typedef struct ptr_st ptr_t; +struct ptr_st +{ +#ifdef TINY_MTX /* mutex per forward pointer */ + mcs_lock_t m; +#endif + sh_node_pt p; +}; + +struct node_st +{ + int level; + setkey_t k; + setval_t v; +#ifndef FAT_MTX + mcs_lock_t m; +#endif + ptr_t next[1]; +}; + +struct set_st +{ +#ifdef FAT_MTX + mcs_lock_t m; +#endif + node_t head; +}; + +static int gc_id[NUM_LEVELS]; + +/* + * LOCKING + */ + +#ifdef FAT_MTX + +#define LIST_LOCK(_l,_qn) ((void)mcs_lock((void*)&(_l)->m, (_qn))) +#define LIST_UNLOCK(_l,_qn) ((void)mcs_unlock((void*)&(_l)->m, (_qn))) +#define NODE_LOCK(_x,_qn) ((void)0) +#define NODE_UNLOCK(_x,_qn) ((void)0) +#define PTR_UPDATE_LOCK(_x,_i,_qn) ((void)0) +#define PTR_UPDATE_UNLOCK(_x,_i,_qn) ((void)0) +#define PTR_DELETE_LOCK(_x,_i,_qn) ((void)0) +#define PTR_DELETE_UNLOCK(_x,_i,_qn) ((void)0) + +#else + +#define LIST_LOCK(_l,_qn) ((void)0) +#define LIST_UNLOCK(_l,_qn) ((void)0) + +/* We take the main node lock to get exclusive rights on insert/delete ops. */ +#define NODE_LOCK(_x,_qn) ((void)mcs_lock((void*)&(_x)->m, (_qn))) +#define NODE_UNLOCK(_x,_qn) ((void)mcs_unlock((void*)&(_x)->m, (_qn))) + +#ifdef TINY_MTX + +/* + * Predecessor's pointer is locked before swinging (on delete), or + * replumbing (on insert). + */ +#define PTR_UPDATE_LOCK(_x, _i, _qn) \ + ((void)mcs_lock((void*)&(_x)->next[(_i)].m, (_qn))) +#define PTR_UPDATE_UNLOCK(_x, _i, _qn) \ + ((void)mcs_unlock((void*)&(_x)->next[(_i)].m, (_qn))) +/* + * When deleting a node, we take the lock on each of its pointers in turn, + * to prevent someone from inserting a new node directly after, or deleting + * immediate successor. + */ +#define PTR_DELETE_LOCK(_x, _i, _qn) PTR_UPDATE_LOCK(_x,_i,(_qn)) +#define PTR_DELETE_UNLOCK(_x, _i, _qn) PTR_UPDATE_UNLOCK(_x,_i,(_qn)) + +#else /* LITTLE_MTX */ + +/* + * Predecessor must certainly be locked for insert/delete ops. So we take + * the only lock we can. + */ +#define PTR_UPDATE_LOCK(_x, _i, _qn) NODE_LOCK(_x,(_qn)) +#define PTR_UPDATE_UNLOCK(_x, _i, _qn) NODE_UNLOCK(_x,(_qn)) +/* + * We can't lock individual pointers. There's no need anyway, since we have + * the node's lock already (to allow us exclusive delete rights). + */ +#define PTR_DELETE_LOCK(_x, _i, _qn) ((void)0) +#define PTR_DELETE_UNLOCK(_x, _i, _qn) ((void)0) + +#endif + +#endif + + +/* + * PRIVATE FUNCTIONS + */ + +/* + * Random level generator. Drop-off rate is 0.5 per level. + * Returns value 1 <= level <= NUM_LEVELS. + */ +static int get_level(ptst_t *ptst) +{ + unsigned long r = rand_next(ptst); + int l = 1; + r = (r >> 4) & ((1 << (NUM_LEVELS-1)) - 1); + while ( (r & 1) ) { l++; r >>= 1; } + return(l); +} + + +/* + * Allocate a new node, and initialise its @level field. + * NB. Initialisation will eventually be pushed into garbage collector, + * because of dependent read reordering. + */ +static node_t *alloc_node(ptst_t *ptst) +{ + int l; + node_t *n; + l = get_level(ptst); + n = gc_alloc(ptst, gc_id[l - 1]); + n->level = l; +#ifndef FAT_MTX + mcs_init(&n->m); +#endif +#ifdef TINY_MTX + for ( l = 0; l < n->level; l++ ) + { + mcs_init(&n->next[l].m); + } +#endif + return(n); +} + + +/* Free a node to the garbage collector. */ +static void free_node(ptst_t *ptst, sh_node_pt n) +{ + gc_free(ptst, (void *)n, gc_id[n->level - 1]); +} + + +/* + * Find and lock predecessor at level @i of node with key @k. This + * predecessor must have key >= @x->k. + */ +#ifndef FAT_MTX +static sh_node_pt get_lock(sh_node_pt x, setkey_t k, int i, qnode_t *qn) +{ + sh_node_pt y; + setkey_t y_k; + + for ( ; ; ) + { + READ_FIELD(y, x->next[i].p); + READ_FIELD(y_k, y->k); + if ( y_k >= k ) break; + retry: + x = y; + } + + PTR_UPDATE_LOCK(x, i, qn); /* MB => no need for READ_FIELD on x or y. */ + y = x->next[i].p; + if ( y->k < k ) + { + PTR_UPDATE_UNLOCK(x, i, qn); + goto retry; + } + + return(x); +} +#else +#define get_lock(_x,_k,_i,_qn) (_x) +#endif + + +/* + * Search for first non-deleted node, N, with key >= @k at each level in @l. + * RETURN VALUES: + * Array @pa: @pa[i] is non-deleted predecessor of N at level i + * MAIN RETURN VALUE: N at level 0. + */ +static sh_node_pt search_predecessors(set_t *l, setkey_t k, sh_node_pt *pa) +{ + sh_node_pt x, y; + setkey_t y_k; + int i; + + x = &l->head; + for ( i = NUM_LEVELS - 1; i >= 0; i-- ) + { + for ( ; ; ) + { + READ_FIELD(y, x->next[i].p); + READ_FIELD(y_k, y->k); + if ( y_k >= k ) break; + x = y; /* remember largest predecessor so far */ + } + + if ( pa ) pa[i] = x; + } + + return(y); +} + + +/* + * PUBLIC FUNCTIONS + */ + +set_t *set_alloc(void) +{ + set_t *l; + node_t *n; + int i; + + n = malloc(sizeof(*n) + (NUM_LEVELS-1)*sizeof(ptr_t)); + memset(n, 0, sizeof(*n) + (NUM_LEVELS-1)*sizeof(ptr_t)); + n->k = SENTINEL_KEYMAX; + + l = malloc(sizeof(*l) + (NUM_LEVELS-1)*sizeof(ptr_t)); + l->head.k = SENTINEL_KEYMIN; + l->head.level = NUM_LEVELS; +#ifdef FAT_MTX + mcs_init(&l->m); +#else + mcs_init(&l->head.m); +#endif + for ( i = 0; i < NUM_LEVELS; i++ ) + { + l->head.next[i].p = n; +#ifdef TINY_MTX + mcs_init(&l->head.next[i].m); +#endif + } + + return(l); +} + + +setval_t set_update(set_t *l, setkey_t k, setval_t v, int overwrite) +{ + setval_t ov = NULL; + ptst_t *ptst; + sh_node_pt update[NUM_LEVELS]; + sh_node_pt x, y; + int i; + qnode_t l_qn, x_qn, y_qn; + + k = CALLER_TO_INTERNAL_KEY(k); + + ptst = critical_enter(); + LIST_LOCK(l, &l_qn); + + (void)search_predecessors(l, k, update); + + x = get_lock(update[0], k, 0, &x_qn); + y = x->next[0].p; + if ( y->k == k ) + { + ov = y->v; + if ( overwrite ) y->v = v; + PTR_UPDATE_UNLOCK(x, 0, &x_qn); + goto out; + } + + /* Not in the list, so do the insertion. */ + y = alloc_node(ptst); + y->k = k; + y->v = v; + NODE_LOCK(y, &y_qn); + + for ( i = 0; i < y->level; i++ ) + { + if ( i != 0 ) x = get_lock(update[i], k, i, &x_qn); + y->next[i].p = x->next[i].p; + WMB(); + x->next[i].p = y; + PTR_UPDATE_UNLOCK(x, i, &x_qn); + } + + NODE_UNLOCK(y, &y_qn); + + out: + LIST_UNLOCK(l, &l_qn); + critical_exit(ptst); + return(ov); +} + + +setval_t set_remove(set_t *l, setkey_t k) +{ + setval_t v = NULL; + ptst_t *ptst; + sh_node_pt update[NUM_LEVELS]; + sh_node_pt x, y; + int i; + qnode_t l_qn, x_qn, y_qn, yd_qn; + + k = CALLER_TO_INTERNAL_KEY(k); + + ptst = critical_enter(); + LIST_LOCK(l, &l_qn); + + y = search_predecessors(l, k, update); + +#ifdef FAT_MTX + if ( y->k != k ) goto out; +#else + y = update[0]; + for ( ; ; ) + { + setkey_t y_k; + y = y->next[0].p; /* no need for READ_FIELD() */ + READ_FIELD(y_k, y->k); + if ( y_k > k ) goto out; + NODE_LOCK(y, &y_qn); + if ( (y_k == k) && (y_k <= y->next[0].p->k) ) break; + NODE_UNLOCK(y, &y_qn); + } +#endif + + /* @y is the correct node, and we have it locked, so now delete it. */ + for ( i = y->level - 1; i >= 0; i-- ) + { + x = get_lock(update[i], k, i, &x_qn); + PTR_DELETE_LOCK(y, i, &yd_qn); + x->next[i].p = y->next[i].p; + WMB(); + y->next[i].p = x; + PTR_DELETE_UNLOCK(y, i, &yd_qn); + PTR_UPDATE_UNLOCK(x, i, &x_qn); + } + + v = y->v; + free_node(ptst, y); + NODE_UNLOCK(y, &y_qn); + + out: + LIST_UNLOCK(l, &l_qn); + critical_exit(ptst); + return(v); +} + + +setval_t set_lookup(set_t *l, setkey_t k) +{ + setval_t v = NULL; + ptst_t *ptst; + sh_node_pt x; + + k = CALLER_TO_INTERNAL_KEY(k); + + ptst = critical_enter(); + + x = search_predecessors(l, k, NULL); + if ( x->k == k ) READ_FIELD(v, x->v); + + critical_exit(ptst); + return(v); +} + + +void _init_set_subsystem(void) +{ + int i; + + for ( i = 0; i < NUM_LEVELS; i++ ) + { + gc_id[i] = gc_add_allocator(sizeof(node_t) + i*sizeof(ptr_t)); + } +} diff --git a/src/mcas/skip_mcas.c b/src/mcas/skip_mcas.c new file mode 100644 index 000000000..846e67136 --- /dev/null +++ b/src/mcas/skip_mcas.c @@ -0,0 +1,374 @@ +/****************************************************************************** + * skip_mcas.c + * + * Skip lists, allowing concurrent update by use of MCAS primitive. + * + * Copyright (c) 2001-2003, K A Fraser + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. Neither the name of the Keir Fraser + * nor the names of its contributors may be used to endorse or + * promote products derived from this software without specific + * prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#define __SET_IMPLEMENTATION__ + +#include +#include +#include +#include +#include "portable_defns.h" +#include "ptst.h" +#include "set.h" + +#define MCAS_MARK(_v) ((unsigned long)(_v) & 3) + +#define PROCESS(_v, _pv) \ + while ( MCAS_MARK(_v) ) { \ + mcas_fixup((void **)(_pv), _v); \ + (_v) = *(_pv); \ + } + +#define WALK_THRU(_v, _pv) \ + if ( MCAS_MARK(_v) ) (_v) = read_barrier_lite((void **)(_pv)); + +/* Pull in the MCAS implementation. */ +#include "mcas.c" + +/* + * SKIP LIST + */ + +typedef struct node_st node_t; +typedef struct set_st set_t; +typedef VOLATILE node_t *sh_node_pt; + +struct node_st +{ + int level; + setkey_t k; + setval_t v; + sh_node_pt next[1]; +}; + +struct set_st +{ + node_t head; +}; + +static int gc_id[NUM_LEVELS]; + +/* + * PRIVATE FUNCTIONS + */ + +/* + * Random level generator. Drop-off rate is 0.5 per level. + * Returns value 1 <= level <= NUM_LEVELS. + */ +static int get_level(ptst_t *ptst) +{ + unsigned long r = rand_next(ptst); + int l = 1; + r = (r >> 4) & ((1 << (NUM_LEVELS-1)) - 1); + while ( (r & 1) ) { l++; r >>= 1; } + return(l); +} + + +/* + * Allocate a new node, and initialise its @level field. + * NB. Initialisation will eventually be pushed into garbage collector, + * because of dependent read reordering. + */ +static node_t *alloc_node(ptst_t *ptst) +{ + int l; + node_t *n; + l = get_level(ptst); + n = gc_alloc(ptst, gc_id[l - 1]); + n->level = l; + return(n); +} + + +/* Free a node to the garbage collector. */ +static void free_node(ptst_t *ptst, sh_node_pt n) +{ + gc_free(ptst, (void *)n, gc_id[n->level - 1]); +} + + +/* + * Search for first non-deleted node, N, with key >= @k at each level in @l. + * RETURN VALUES: + * Array @pa: @pa[i] is non-deleted predecessor of N at level i + * Array @na: @na[i] is N itself, which should be pointed at by @pa[i] + * MAIN RETURN VALUE: same as @na[0]. + */ +static sh_node_pt search_predecessors( + set_t *l, setkey_t k, sh_node_pt *pa, sh_node_pt *na) +{ + sh_node_pt x, x_next; + setkey_t x_next_k; + int i; + + RMB(); + + x = &l->head; + for ( i = NUM_LEVELS - 1; i >= 0; i-- ) + { + for ( ; ; ) + { + READ_FIELD(x_next, x->next[i]); + WALK_THRU(x_next, &x->next[i]); + + READ_FIELD(x_next_k, x_next->k); + if ( x_next_k >= k ) break; + + x = x_next; + } + + if ( pa ) pa[i] = x; + if ( na ) na[i] = x_next; + } + + return(x_next); +} + +static setval_t finish_delete(sh_node_pt x, sh_node_pt *preds) +{ + per_thread_state_t *mcas_ptst = get_ptst(); + CasDescriptor_t *cd; + int level, i, ret = FALSE; + sh_node_pt x_next; + setkey_t x_next_k; + setval_t v; + + READ_FIELD(level, x->level); + + cd = new_descriptor(mcas_ptst, (level << 1) + 1); + cd->status = STATUS_IN_PROGRESS; + cd->length = (level << 1) + 1; + + /* First, the deleted node's value field. */ + READ_FIELD(v, x->v); + PROCESS(v, &x->v); + if ( v == NULL ) goto fail; + cd->entries[0].ptr = (void **)&x->v; + cd->entries[0].old = v; + cd->entries[0].new = NULL; + + for ( i = 0; i < level; i++ ) + { + READ_FIELD(x_next, x->next[i]); + PROCESS(x_next, &x->next[i]); + READ_FIELD(x_next_k, x_next->k); + if ( x->k > x_next_k ) { v = NULL; goto fail; } + cd->entries[i +1].ptr = (void **)&x->next[i]; + cd->entries[i +1].old = x_next; + cd->entries[i +1].new = preds[i]; + cd->entries[i+level+1].ptr = (void **)&preds[i]->next[i]; + cd->entries[i+level+1].old = x; + cd->entries[i+level+1].new = x_next; + } + + ret = mcas0(mcas_ptst, cd); + if ( ret == 0 ) v = NULL; + + fail: + rc_down_descriptor(cd); + return v; +} + + +/* + * PUBLIC FUNCTIONS + */ + +set_t *set_alloc(void) +{ + set_t *l; + node_t *n; + int i; + + static int mcas_inited = 0; + if ( !CASIO(&mcas_inited, 0, 1) ) mcas_init(); + + n = malloc(sizeof(*n) + (NUM_LEVELS-1)*sizeof(node_t *)); + memset(n, 0, sizeof(*n) + (NUM_LEVELS-1)*sizeof(node_t *)); + n->k = SENTINEL_KEYMAX; + + /* + * Set the forward pointers of final node to other than NULL, + * otherwise READ_FIELD() will continually execute costly barriers. + * Note use of 0xfc -- that doesn't look like a marked value! + */ + memset(n->next, 0xfc, NUM_LEVELS*sizeof(node_t *)); + + l = malloc(sizeof(*l) + (NUM_LEVELS-1)*sizeof(node_t *)); + l->head.k = SENTINEL_KEYMIN; + l->head.level = NUM_LEVELS; + for ( i = 0; i < NUM_LEVELS; i++ ) + { + l->head.next[i] = n; + } + + return(l); +} + + +setval_t set_update(set_t *l, setkey_t k, setval_t v, int overwrite) +{ + setval_t ov, new_ov; + ptst_t *ptst; + sh_node_pt preds[NUM_LEVELS], succs[NUM_LEVELS]; + sh_node_pt succ, new = NULL; + int i, ret; + per_thread_state_t *mcas_ptst = NULL; + CasDescriptor_t *cd; + + k = CALLER_TO_INTERNAL_KEY(k); + + ptst = critical_enter(); + + do { + retry: + ov = NULL; + + succ = search_predecessors(l, k, preds, succs); + + if ( succ->k == k ) + { + /* Already a @k node in the list: update its mapping. */ + READ_FIELD(new_ov, succ->v); + do { + ov = new_ov; + PROCESS(ov, &succ->v); + if ( ov == NULL ) goto retry; + } + while ( overwrite && ((new_ov = CASPO(&succ->v, ov, v)) != ov) ); + + if ( new != NULL ) free_node(ptst, new); + goto out; + } + +#ifdef WEAK_MEM_ORDER + /* Free node from previous attempt, if this is a retry. */ + if ( new != NULL ) + { + free_node(ptst, new); + new = NULL; + } +#endif + + /* Not in the list, so initialise a new node for insertion. */ + if ( new == NULL ) + { + new = alloc_node(ptst); + new->k = k; + new->v = v; + } + + for ( i = 0; i < new->level; i++ ) + { + new->next[i] = succs[i]; + } + + if ( !mcas_ptst ) mcas_ptst = get_ptst(); + cd = new_descriptor(mcas_ptst, new->level); + cd->status = STATUS_IN_PROGRESS; + cd->length = new->level; + for ( i = 0; i < new->level; i++ ) + { + cd->entries[i].ptr = (void **)&preds[i]->next[i]; + cd->entries[i].old = succs[i]; + cd->entries[i].new = new; + } + ret = mcas0(mcas_ptst, cd); + rc_down_descriptor(cd); + } + while ( !ret ); + + out: + critical_exit(ptst); + return(ov); +} + + +setval_t set_remove(set_t *l, setkey_t k) +{ + setval_t v = NULL; + ptst_t *ptst; + sh_node_pt preds[NUM_LEVELS], x; + + k = CALLER_TO_INTERNAL_KEY(k); + + ptst = critical_enter(); + + do { + x = search_predecessors(l, k, preds, NULL); + if ( x->k > k ) goto out; + } while ( (v = finish_delete(x, preds)) == NULL ); + + free_node(ptst, x); + + out: + critical_exit(ptst); + return(v); +} + + +setval_t set_lookup(set_t *l, setkey_t k) +{ + setval_t v = NULL; + ptst_t *ptst; + sh_node_pt x; + + k = CALLER_TO_INTERNAL_KEY(k); + + ptst = critical_enter(); + + x = search_predecessors(l, k, NULL, NULL); + if ( x->k == k ) + { + READ_FIELD(v, x->v); + WALK_THRU(v, &x->v); + } + + critical_exit(ptst); + return(v); +} + + +void _init_set_subsystem(void) +{ + int i; + + for ( i = 0; i < NUM_LEVELS; i++ ) + { + gc_id[i] = gc_add_allocator(sizeof(node_t) + i*sizeof(node_t *)); + } + +} diff --git a/src/mcas/skip_stm.c b/src/mcas/skip_stm.c new file mode 100644 index 000000000..86e19b223 --- /dev/null +++ b/src/mcas/skip_stm.c @@ -0,0 +1,273 @@ +/****************************************************************************** + * skip_stm.c + * + * Skip lists, allowing concurrent update by use of the STM abstraction. + * + * Copyright (c) 2003, K A Fraser + * + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. Neither the name of the Keir Fraser + * nor the names of its contributors may be used to endorse or + * promote products derived from this software without specific + * prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#define __SET_IMPLEMENTATION__ + +#include +#include +#include +#include +#include "portable_defns.h" +#include "gc.h" +#include "stm.h" +#include "set.h" + +typedef struct node_st node_t; +typedef stm_blk set_t; + +struct node_st +{ + int level; + setkey_t k; + setval_t v; + stm_blk *next[NUM_LEVELS]; +}; + +static struct { + CACHE_PAD(0); + stm *memory; /* read-only */ + CACHE_PAD(2); +} shared; + +#define MEMORY (shared.memory) + +/* + * Random level generator. Drop-off rate is 0.5 per level. + * Returns value 1 <= level <= NUM_LEVELS. + */ +static int get_level(ptst_t *ptst) +{ + unsigned long r = rand_next(ptst); + int l = 1; + r = (r >> 4) & ((1 << (NUM_LEVELS-1)) - 1); + while ( (r & 1) ) { l++; r >>= 1; } + return l; +} + + +/* + * Search for first non-deleted node, N, with key >= @k at each level in @l. + * RETURN VALUES: + * Array @pa: @pa[i] is non-deleted predecessor of N at level i + * Array @na: @na[i] is N itself, which should be pointed at by @pa[i] + * MAIN RETURN VALUE: same as @na[0], direct pointer open for reading. + */ +static node_t *search_predecessors( + ptst_t *ptst, stm_tx *tx, set_t *l, setkey_t k, stm_blk **pa, stm_blk **na) +{ + stm_blk *xb, *x_nextb; + node_t *x, *x_next; + int i; + + xb = l; + x = read_stm_blk(ptst, tx, l); + for ( i = NUM_LEVELS - 1; i >= 0; i-- ) + { + for ( ; ; ) + { + x_nextb = x->next[i]; + x_next = read_stm_blk(ptst, tx, x_nextb); + if ( x_next->k >= k ) break; + xb = x_nextb; + x = x_next; + } + + if ( pa ) pa[i] = xb; + if ( na ) na[i] = x_nextb; + } + + return x_next; +} + + +/* + * PUBLIC FUNCTIONS + */ + +set_t *set_alloc(void) +{ + ptst_t *ptst; + stm_blk *hb, *tb; + node_t *h, *t; + int i; + + ptst = critical_enter(); + + tb = new_stm_blk(ptst, MEMORY); + t = init_stm_blk(ptst, MEMORY, tb); + memset(t, 0, sizeof(*t)); + t->k = SENTINEL_KEYMAX; + + hb = new_stm_blk(ptst, MEMORY); + h = init_stm_blk(ptst, MEMORY, hb); + memset(h, 0, sizeof(*h)); + h->k = SENTINEL_KEYMIN; + h->level = NUM_LEVELS; + for ( i = 0; i < NUM_LEVELS; i++ ) + h->next[i] = tb; + + critical_exit(ptst); + + return hb; +} + + +setval_t set_update(set_t *l, setkey_t k, setval_t v, int overwrite) +{ + ptst_t *ptst; + stm_tx *tx; + setval_t ov; + stm_blk *bpreds[NUM_LEVELS], *bsuccs[NUM_LEVELS], *newb = NULL; + node_t *x, *p, *new; + int i; + + k = CALLER_TO_INTERNAL_KEY(k); + + ptst = critical_enter(); + + do { + new_stm_tx(tx, ptst, MEMORY); + x = search_predecessors(ptst, tx, l, k, bpreds, bsuccs); + + if ( x->k == k ) + { + x = write_stm_blk(ptst, tx, bsuccs[0]); + ov = x->v; + x->v = v; + } + else + { + ov = NULL; + + if ( newb == NULL ) + { + newb = new_stm_blk(ptst, MEMORY); + new = init_stm_blk(ptst, MEMORY, newb); + new->k = k; + new->v = v; + new->level = get_level(ptst); + } + + for ( i = 0; i < new->level; i++ ) + { + new->next[i] = bsuccs[i]; + p = write_stm_blk(ptst, tx, bpreds[i]); + p->next[i] = newb; + } + } + } + while ( !commit_stm_tx(ptst, tx) ); + + if ( (ov != NULL) && (newb != NULL) ) + free_stm_blk(ptst, MEMORY, newb); + + critical_exit(ptst); + + return ov; +} + + +setval_t set_remove(set_t *l, setkey_t k) +{ + setval_t v; + ptst_t *ptst; + stm_tx *tx; + stm_blk *bpreds[NUM_LEVELS], *bsuccs[NUM_LEVELS]; + node_t *p, *x; + int i; + + k = CALLER_TO_INTERNAL_KEY(k); + + ptst = critical_enter(); + + do { + new_stm_tx(tx, ptst, MEMORY); + x = search_predecessors(ptst, tx, l, k, bpreds, bsuccs); + if ( x->k == k ) + { + v = x->v; + for ( i = 0; i < x->level; i++ ) + { + p = write_stm_blk(ptst, tx, bpreds[i]); + p->next[i] = x->next[i]; + } + } + else + { + v = NULL; + } + } + while ( !commit_stm_tx(ptst, tx) ); + + if ( v != NULL ) + free_stm_blk(ptst, MEMORY, bsuccs[0]); + + critical_exit(ptst); + + return v; +} + + +setval_t set_lookup(set_t *l, setkey_t k) +{ + setval_t v; + ptst_t *ptst; + stm_tx *tx; + node_t *x; + + k = CALLER_TO_INTERNAL_KEY(k); + + ptst = critical_enter(); + + do { + new_stm_tx(tx, ptst, MEMORY); + x = search_predecessors(ptst, tx, l, k, NULL, NULL); + v = (x->k == k) ? x->v : NULL; + } + while ( !commit_stm_tx(ptst, tx) ); + + critical_exit(ptst); + + return v; +} + + +void _init_set_subsystem(void) +{ + ptst_t *ptst = critical_enter(); + _init_stm_subsystem(0); + MEMORY = new_stm(ptst, sizeof(node_t)); + critical_exit(ptst); +} diff --git a/src/mcas/sparc_defns.h b/src/mcas/sparc_defns.h new file mode 100644 index 000000000..e4767c171 --- /dev/null +++ b/src/mcas/sparc_defns.h @@ -0,0 +1,108 @@ +#ifndef __SPARC_DEFNS_H__ +#define __SPARC_DEFNS_H__ + +#ifndef SPARC +#define SPARC +#endif + +#include +#include +#include +#include +#include + +#define CACHE_LINE_SIZE 64 + +#if 1 +#include +#define pthread_mutex_t mutex_t +#define pthread_cond_t cond_t +#define pthread_t thread_t +#define pthread_key_t thread_key_t +#define pthread_create(_a,_b,_c,_d) thr_create(NULL,0,_c,_d,THR_BOUND|THR_NEW_LWP,_a) +#define pthread_join(_a,_b) thr_join(_a,NULL,NULL) +#define pthread_key_create(_a,_b) thr_keycreate(_a,_b) +#define pthread_setspecific(_a,_b) thr_setspecific(_a,_b) +static void *pthread_getspecific(pthread_key_t _a) +{ + void *__x; + thr_getspecific(_a,&__x); + return __x; +} +#define pthread_setconcurrency(_x) thr_setconcurrency(_x) +#define pthread_mutex_init(_a,_b) mutex_init(_a,USYNC_THREAD,NULL) +#define pthread_mutex_lock(_a) mutex_lock(_a) +#define pthread_mutex_unlock(_a) mutex_unlock(_a) +#define pthread_cond_init(_a,_b) cond_init(_a,USYNC_THREAD,NULL) +#define pthread_cond_wait(_a,_b) cond_wait(_a,_b) +#define pthread_cond_broadcast(_a) cond_broadcast(_a) +#else +#include +#endif + + +/* + * I. Compare-and-swap. + */ + +typedef unsigned long long _u64; + +extern int CASIO_internal(int *, int, int); +extern void * CASPO_internal(void *, void *, void *); +extern _u64 CAS64O_internal(_u64 *, _u64, _u64); +#define CASIO(_a,_o,_n) (CASIO_internal((int*)(_a),(int)(_o),(int)(_n))) +#define CASPO(_a,_o,_n) (CASPO_internal((void*)(_a),(void*)(_o),(void*)(_n))) +#define CAS32O(_a,_o,_n) (_u32)(CASIO_internal((int *)_a,(int)_o,(int)_n)) +#define CAS64O(_a,_o,_n) (CAS64O_internal((_u64 *)_a,(_u64)_o,(_u64)_n)) + +static int FASIO(int *a, int n) +{ + int no, o = *a; + while ( (no = CASIO(a, o, n)) != o ) o = no; + return o; +} + +static void *FASPO(void *a, void *n) +{ + void *no, *o = *(void **)a; + while ( (no = CASPO(a, o, n)) != o ) o = no; + return o; +} + + +/* + * II. Memory barriers. + * WMB(): All preceding write operations must commit before any later writes. + * RMB(): All preceding read operations must commit before any later reads. + * MB(): All preceding memory accesses must commit before any later accesses. + * + * If the compiler does not observe these barriers (but any sane compiler + * will!), then VOLATILE should be defined as 'volatile'. + */ + +extern void MEMBAR_ALL(void); +extern void MEMBAR_STORESTORE(void); +extern void MEMBAR_LOADLOAD(void); +#define MB() MEMBAR_ALL() +#define WMB() MEMBAR_STORESTORE() +#define RMB() MEMBAR_LOADLOAD() +#define VOLATILE /*volatile*/ + + +/* + * III. Cycle counter access. + */ + +typedef unsigned long tick_t; +extern tick_t RDTICK(void); + + +/* + * IV. Types. + */ + +typedef unsigned char _u8; +typedef unsigned short _u16; +typedef unsigned int _u32; + +#endif /* __SPARC_DEFNS_H__ */ diff --git a/src/mcas/sparc_mcas.il b/src/mcas/sparc_mcas.il new file mode 100644 index 000000000..07d4072fc --- /dev/null +++ b/src/mcas/sparc_mcas.il @@ -0,0 +1,30 @@ +.inline MEMBAR_ALL, 0 + membar #StoreStore | #LoadLoad | #LoadStore | #StoreLoad +.end + +.inline MEMBAR_STORESTORE, 0 + membar #StoreStore +.end + +.inline MEMBAR_LOADLOAD, 0 + membar #LoadLoad +.end + +.inline CASPO_internal + casx [%o0], %o1, %o2 + mov %o2, %o0 +.end + +.inline CAS64O_internal + casx [%o0], %o1, %o2 + mov %o2, %o0 +.end + +.inline CASIO_internal + cas [%o0], %o1, %o2 + mov %o2, %o0 +.end + +.inline RDTICK + rd %tick, %o0 +.end diff --git a/src/mcas/stm.h b/src/mcas/stm.h new file mode 100644 index 000000000..4d2e25f38 --- /dev/null +++ b/src/mcas/stm.h @@ -0,0 +1,42 @@ +/****************************************************************************** + * stm.h + * + * Interface definitions for software transactional memory (STM). + * + * Copyright (c) 2002-2003, K A Fraser + */ + +#include "ptst.h" +#include + +typedef struct stm_st stm; +typedef struct stm_blk_st stm_blk; +typedef struct stm_tx_st stm_tx; + +stm *new_stm(ptst_t *ptst, int blk_size); +void free_stm(ptst_t *ptst, stm *mem); + +stm_blk *new_stm_blk(ptst_t *ptst, stm *mem); +void free_stm_blk(ptst_t *ptst, stm *mem, stm_blk *b); +void *init_stm_blk(ptst_t *ptst, stm *mem, stm_blk *b); +int sizeof_stm_blk(ptst_t *ptst, stm *mem, stm_blk *b); + +stm_tx *new_stm_tx(ptst_t *ptst, stm *mem, sigjmp_buf *penv); +bool_t commit_stm_tx(ptst_t *ptst, stm_tx *t); +bool_t validate_stm_tx(ptst_t *ptst, stm_tx *t); +/* NB. Must still call commit after abort, but it's guaranteed to fail. */ +void abort_stm_tx(ptst_t *ptst, stm_tx *t); + +void *read_stm_blk(ptst_t *ptst, stm_tx *t, stm_blk *b); +void *write_stm_blk(ptst_t *ptst, stm_tx *t, stm_blk *b); + +void remove_from_tx(ptst_t *ptst, stm_tx *t, stm_blk *b); + +void _init_stm_subsystem(int pad_data); + +#define new_stm_tx(_tx, _ptst, _mem) \ + do { \ + sigjmp_buf env; \ + sigsetjmp(env, 1); \ + (_tx) = new_stm_tx((_ptst), (_mem), &env); \ + } while ( 0 ) diff --git a/src/mcas/stm_fraser.c b/src/mcas/stm_fraser.c new file mode 100644 index 000000000..fe6f89fb6 --- /dev/null +++ b/src/mcas/stm_fraser.c @@ -0,0 +1,661 @@ +/****************************************************************************** + * stm_fraser.c + * + * Lock-free software transactional memory (STM). + * + * Copyright (c) 2002-2003, K A Fraser + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. Neither the name of the Keir Fraser + * nor the names of its contributors may be used to endorse or + * promote products derived from this software without specific + * prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "portable_defns.h" +#include "ptst.h" +#include "gc.h" +#include +#include +#include +#include +#include +#include +#include + +typedef struct stm_blk_st stm_blk; +typedef struct stm_tx_entry_st stm_tx_entry; +typedef struct stm_tx_st stm_tx; +typedef struct stm_st stm; + +struct stm_blk_st { + void *data; +}; + +struct stm_tx_entry_st { + stm_blk *b; + void *old; + void *new; + stm_tx_entry *next; +}; + +struct stm_tx_st { + int status; + int rc; + stm_tx *next_free; + stm_tx_entry *reads; + stm_tx_entry *writes; + stm_tx_entry *alloc_ptr, *check; + int gc_data_id, blk_size; /* copied from 'stm' structure */ + sigjmp_buf *penv; +}; + +struct stm_st { + int gc_data_id; + int blk_size; +}; + +/* Private per-thread state. The array is indexed off ptst->id. */ +typedef struct { + void *arena, *arena_lim; + stm_tx *next_descriptor; + stm_tx *cur_tx; + CACHE_PAD(0); +} priv_t; + +static priv_t priv_ptst[MAX_THREADS]; +static int gc_blk_id; /* Allocation id for block descriptors. */ +static int do_padding; /* Should all allocations be padded to a cache line? */ + +#define ALLOCATOR_SIZE(_s) (do_padding ? CACHE_LINE_SIZE : (_s)) + +#define ARENA_SIZE 40960 +#define DESCRIPTOR_SIZE 4096 + +#define TXS_IN_PROGRESS 0 +#define TXS_READ_PHASE 1 +#define TXS_FAILED 2 +#define TXS_SUCCESSFUL 3 + +#define is_descriptor(_p) ((unsigned long)(_p) & 1) +#define ptr_to_descriptor(_p) ((stm_tx *)((unsigned long)(_p) & ~1)) +#define make_marked_ptr(_p) ((void *)((unsigned long)(_p) | 1)) + +/* Is transaction read-only? */ +#define read_only(_t) ((_t)->writes == NULL) + +bool_t commit_stm_tx(ptst_t *ptst, stm_tx *t); + +static void new_arena (priv_t *priv, int size) +{ + priv->arena = malloc(size); + if ( priv->arena == NULL ) abort(); + priv->arena_lim = (((char *) priv->arena) + size); +} + +static void release_descriptor(ptst_t *ptst, stm_tx *t) +{ + stm_tx_entry *ent; + priv_t *priv = &priv_ptst[ptst->id]; + void *data; + + assert(t->status >= TXS_FAILED); + + t->next_free = priv->next_descriptor; + priv->next_descriptor = t; + + if ( t->status == TXS_SUCCESSFUL ) + { + for ( ent = t->writes; ent != NULL; ent = ent->next ) + { + gc_free(ptst, ent->old, t->gc_data_id); + } + } + else + { + for ( ent = t->writes; ent != NULL; ent = ent->next ) + { + gc_unsafe_free(ptst, ent->new, t->gc_data_id); + } + } +} + +static int rc_delta_descriptor(stm_tx *t, int delta) +{ + int rc, new_rc = t->rc; + + do { rc = new_rc; } + while ( (new_rc = CASIO (&t->rc, rc, rc + delta)) != rc ); + + return rc; +} + +static void rc_up_descriptor(stm_tx *t) +{ + rc_delta_descriptor(t, 2); + MB(); +} + +static void rc_down_descriptor(ptst_t *ptst, stm_tx *t) +{ + int old_rc, new_rc, cur_rc = t->rc; + + WMB(); + + do { + old_rc = cur_rc; + new_rc = old_rc - 2; + if ( new_rc == 0 ) new_rc = 1; + } + while ( (cur_rc = CASIO (&t->rc, old_rc, new_rc)) != old_rc ); + + if ( old_rc == 2 ) release_descriptor(ptst, t); +} + +static stm_tx *new_descriptor(priv_t *priv) +{ + stm_tx *t; + + t = priv->next_descriptor; + + if ( t != NULL ) + { + priv->next_descriptor = t->next_free; + /* 'Unfree' descriptor, if it was previously freed. */ + if ( (t->rc & 1) == 1 ) rc_delta_descriptor(t, 1); + } + else + { + t = (stm_tx *) priv->arena; + priv->arena = ((char *) (priv->arena)) + DESCRIPTOR_SIZE; + + if ( priv->arena >= priv->arena_lim ) + { + new_arena(priv, ARENA_SIZE); + t = (stm_tx *) priv->arena; + priv->arena = ((char *) (priv->arena)) + DESCRIPTOR_SIZE; + } + + t->next_free = NULL; + t->rc = 2; + } + + return t; +} + + +static stm_tx_entry *alloc_stm_tx_entry(stm_tx *t) +{ + stm_tx_entry *ent = t->alloc_ptr++; + assert(((unsigned long)t->alloc_ptr - (unsigned long)t) <= + DESCRIPTOR_SIZE); + return ent; +} + + +static stm_tx_entry **search_stm_tx_entry(stm_tx_entry **pnext, stm_blk *b) +{ + stm_tx_entry *next = *pnext; + + while ( (next != NULL) && ((unsigned long)next->b < (unsigned long)b) ) + { + pnext = &next->next; + next = *pnext; + } + + return pnext; +} + + +static void *read_blk_data(ptst_t *ptst, stm_blk *b) +{ + void *data; + stm_tx *t; + int status; + stm_tx_entry **pent; + + for ( ; ; ) + { + data = b->data; + if ( !is_descriptor(data) ) return data; + + t = ptr_to_descriptor(data); + rc_up_descriptor(t); + if ( b->data != data ) + { + rc_down_descriptor(ptst, t); + continue; + } + + /* + * Commit even when we could just read from descriptor, as it gets + * the descriptor out of the way in future. + */ + commit_stm_tx(ptst, t); + } +} + + +stm *new_stm(ptst_t *ptst, int blk_size) +{ + stm *mem = malloc(CACHE_LINE_SIZE); + mem->blk_size = blk_size; + mem->gc_data_id = gc_add_allocator(ALLOCATOR_SIZE(blk_size)); + return mem; +} + + +void free_stm(ptst_t *ptst, stm *mem) +{ + gc_remove_allocator(mem->gc_data_id); + free(mem); +} + + +stm_blk *new_stm_blk(ptst_t *ptst, stm *mem) +{ + stm_blk *b; + b = gc_alloc(ptst, gc_blk_id); + b->data = gc_alloc(ptst, mem->gc_data_id); + return b; +} + + +void free_stm_blk(ptst_t *ptst, stm *mem, stm_blk *b) +{ + /* + * We have to use read_stm_blk(), as some doomed transaction may still + * install a marked pointer here while in its write phase. + */ + void *data = read_blk_data(ptst, b); + assert(!is_descriptor(data)); + gc_free(ptst, data, mem->gc_data_id); + gc_free(ptst, b, gc_blk_id); +} + + +void *init_stm_blk(ptst_t *ptst, stm *mem, stm_blk *b) +{ + return b->data; +} + + +int sizeof_stm_blk(ptst_t *ptst, stm *mem, stm_blk *b) +{ + return mem->blk_size; +} + + +stm_tx *new_stm_tx(ptst_t *ptst, stm *mem, sigjmp_buf *penv) +{ + priv_t *priv = &priv_ptst[ptst->id]; + stm_tx *t; + + if ( priv->cur_tx != NULL ) goto nesting; + t = new_descriptor(priv); + t->status = TXS_IN_PROGRESS; + t->reads = t->writes = NULL; + t->alloc_ptr = t->check = (stm_tx_entry *)(t + 1); + t->gc_data_id = mem->gc_data_id; + t->blk_size = mem->blk_size; + t->penv = penv; + priv->cur_tx = t; + return t; + + nesting: + fprintf(stderr, "No nesting of transactions is allowed\n"); + return NULL; +} + + +bool_t commit_stm_tx(ptst_t *ptst, stm_tx *t) +{ + int desired_status, other_status, old_status, new_status, final_status; + void *marked_tx, *data; + stm_tx *other; + stm_tx_entry **other_pent, *ent; + priv_t *priv = &priv_ptst[ptst->id]; + + if ( priv->cur_tx == t ) priv->cur_tx = NULL; + + marked_tx = make_marked_ptr(t); + desired_status = TXS_FAILED; + + /* + * PHASE 1: WRITE-CHECKING PHASE. + */ + if ( (t->status == TXS_IN_PROGRESS) && ((ent = t->writes) != NULL) ) + { + /* Others should see up-to-date contents of descriptor. */ + WMB(); + + do { + for ( ; ; ) + { + data = CASPO(&ent->b->data, ent->old, marked_tx); + if ( (data == ent->old) || (data == marked_tx) ) break; + + if ( !is_descriptor(data) ) goto fail; + + other = ptr_to_descriptor(data); + rc_up_descriptor(other); + if ( ent->b->data != data ) + { + rc_down_descriptor(ptst, other); + continue; + } + + commit_stm_tx(ptst, other); + } + } + while ( (ent = ent->next) != NULL ); + } + + /* On success we linearise at this point. */ + WEAK_DEP_ORDER_WMB(); + + /* + * PHASE 2: READ-CHECKING PHASE. + */ + if ( (t->status <= TXS_READ_PHASE) && (t->reads != NULL) ) + { + if ( !read_only(t) ) + { + CASIO(&t->status, TXS_IN_PROGRESS, TXS_READ_PHASE); + MB_NEAR_CAS(); + } + else MB(); + + for ( ent = t->reads; ent != NULL; ent = ent->next ) + { + for ( ; ; ) + { + data = ent->b->data; + if ( data == ent->old ) break; + + /* Someone else made progress at our expense. */ + if ( !is_descriptor(data) ) goto fail; + other = ptr_to_descriptor(data); + + /* + * Descriptor always belongs to a contending operation. + * Before continuing, we must increment the reference count. + */ + assert(other != t); + rc_up_descriptor(other); + if ( ent->b->data != data ) + { + rc_down_descriptor(ptst, other); + continue; + } + + /* + * What we do now depends on the status of the contending + * operation. This is easy for any status other than + * TXS_READ_PHASE -- usually we just check against the + * appropriate 'old' or 'new' data pointer. Transactions + * in their read-checking phase must be aborted, or helped + * to completion, depending on relative ordering of the + * transaction descriptors. + */ + while ( (other_status = other->status) == TXS_READ_PHASE ) + { + if ( t < other ) + { + CASIO(&other->status, TXS_READ_PHASE, TXS_FAILED); + } + else + { + rc_up_descriptor(other); + commit_stm_tx(ptst, other); + } + } + + other_pent = search_stm_tx_entry(&other->writes, ent->b); + assert(*other_pent != NULL); + data = (other_status == TXS_SUCCESSFUL) ? + (*other_pent)->new : (*other_pent)->old; + rc_down_descriptor(ptst, other); + if ( data != ent->old ) goto fail; + + break; + } + } + } + + desired_status = TXS_SUCCESSFUL; + + fail: + if ( read_only(t) ) + { + /* A very fast path: we can immediately reuse the descriptor. */ + t->next_free = priv->next_descriptor; + priv->next_descriptor = t; + return desired_status == TXS_SUCCESSFUL; + } + + /* Loop until we push the status to a "final decision" value. */ + old_status = t->status; + while ( old_status <= TXS_READ_PHASE ) + { + new_status = CASIO(&t->status, old_status, desired_status); + if ( old_status == new_status ) break; + old_status = new_status; + } + WMB_NEAR_CAS(); + + /* + * PHASE 3: CLEAN-UP. + */ + final_status = t->status; + for ( ent = t->writes; ent != NULL; ent = ent->next ) + { + /* If CAS fails, someone did it for us already. */ + (void)CASPO(&ent->b->data, marked_tx, + (final_status == TXS_SUCCESSFUL) ? ent->new: ent->old); + } + + rc_down_descriptor(ptst, t); + return final_status == TXS_SUCCESSFUL; +} + + +bool_t validate_stm_tx(ptst_t *ptst, stm_tx *t) +{ + stm_tx_entry *ent; + + RMB(); + + for ( ent = t->reads; ent != NULL; ent = ent->next ) + { + if ( read_blk_data(ptst, ent->b) != ent->old ) goto fail; + } + + for ( ent = t->writes; ent != NULL; ent = ent->next ) + { + if ( read_blk_data(ptst, ent->b) != ent->old ) goto fail; + } + + return TRUE; + + fail: + t->status = TXS_FAILED; + return FALSE; +} + + +void abort_stm_tx(ptst_t *ptst, stm_tx *t) +{ + t->status = TXS_FAILED; +} + + +void *read_stm_blk(ptst_t *ptst, stm_tx *t, stm_blk *b) +{ + stm_tx_entry **pent, *ent; + sigjmp_buf *penv; + void *result; + + pent = search_stm_tx_entry(&t->writes, b); + ent = *pent; + if ( (ent != NULL) && (ent->b == b) ) goto found; + + pent = search_stm_tx_entry(&t->reads, b); + ent = *pent; + if ( (ent != NULL) && (ent->b == b) ) goto found; + + ent = alloc_stm_tx_entry(t); + ent->b = b; + ent->old = read_blk_data(ptst, b); + ent->new = ent->old; + ent->next = *pent; + *pent = ent; + + assert(!is_descriptor(ent->new)); + return ent->new; + + found: + result = ent->new; + ent = t->check; + if ( read_blk_data(ptst, ent->b) != ent->old ) goto fail; + if ( ++t->check == t->alloc_ptr ) t->check = (stm_tx_entry *)(t + 1); + return result; + + fail: + penv = t->penv; + abort_stm_tx(ptst, t); + commit_stm_tx(ptst, t); + siglongjmp(*penv, 0); + assert(0); + return NULL; +} + + +void *write_stm_blk(ptst_t *ptst, stm_tx *t, stm_blk *b) +{ + stm_tx_entry **r_pent, **w_pent, *ent; + sigjmp_buf *penv; + void *result; + + w_pent = search_stm_tx_entry(&t->writes, b); + ent = *w_pent; + if ( (ent != NULL) && (ent->b == b) ) goto found; + + r_pent = search_stm_tx_entry(&t->reads, b); + ent = *r_pent; + if ( (ent != NULL) && (ent->b == b) ) + { + *r_pent = ent->next; + } + else + { + ent = alloc_stm_tx_entry(t); + ent->b = b; + ent->old = read_blk_data(ptst, b); + } + + ent->new = gc_alloc(ptst, t->gc_data_id); + ent->next = *w_pent; + *w_pent = ent; + memcpy(ent->new, ent->old, t->blk_size); + + assert(!is_descriptor(ent->old)); + assert(!is_descriptor(ent->new)); + return ent->new; + + found: + result = ent->new; + ent = t->check; + if ( read_blk_data(ptst, ent->b) != ent->old ) goto fail; + if ( ++t->check == t->alloc_ptr ) t->check = (stm_tx_entry *)(t + 1); + return result; + + fail: + penv = t->penv; + abort_stm_tx(ptst, t); + commit_stm_tx(ptst, t); + siglongjmp(*penv, 0); + assert(0); + return NULL; +} + + +void remove_from_tx(ptst_t *ptst, stm_tx *t, stm_blk *b) +{ + stm_tx_entry **pent, *ent; + void *data; + + pent = search_stm_tx_entry(&t->writes, b); + ent = *pent; + if ( (ent != NULL) && (ent->b == b) ) + { + *pent = ent->next; + data = ent->new; + assert(!is_descriptor(data)); + gc_free(ptst, data, t->gc_data_id); + return; + } + + pent = search_stm_tx_entry(&t->reads, b); + ent = *pent; + if ( (ent != NULL) && (ent->b == b) ) + { + *pent = ent->next; + } +} + + +static void handle_fault(int sig) +{ + ptst_t *ptst; + stm_tx *t; + + ptst = critical_enter(); + t = priv_ptst[ptst->id].cur_tx; + if ( (t != NULL) && !validate_stm_tx(ptst, t) ) + { + sigjmp_buf *penv = t->penv; + commit_stm_tx(ptst, t); + critical_exit(ptst); + siglongjmp(*penv, 0); + } + + fail: + fprintf(stderr, "Error: unhandleable SIGSEGV!\n"); + abort(); +} + + +void _init_stm_subsystem(int pad_data) +{ + struct sigaction act; + + do_padding = pad_data; + gc_blk_id = gc_add_allocator(ALLOCATOR_SIZE(sizeof(stm_blk))); + memset(priv_ptst, 0, sizeof(priv_ptst)); + + act.sa_handler = handle_fault; + sigemptyset(&act.sa_mask); + act.sa_flags = 0; + sigaction(SIGSEGV, &act, NULL); +} diff --git a/src/mcas/stm_herlihy.c b/src/mcas/stm_herlihy.c new file mode 100644 index 000000000..3410198df --- /dev/null +++ b/src/mcas/stm_herlihy.c @@ -0,0 +1,688 @@ +/****************************************************************************** + * stm_herlihy.c + * + * Obstruction-free software transactional memory (STM). + * + * For more information see: + * Software Transactional Memory for Dynamic-sized Data Structures + * Maurice Herlihy, Victor Luchangco, Mark Moir, and William Scherer III + * Proceedings of 2003 ACM Symposium on Principles of Distributed Computing + * + * Copyright (c) 2003, K A Fraser + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. Neither the name of the Keir Fraser + * nor the names of its contributors may be used to endorse or + * promote products derived from this software without specific + * prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "portable_defns.h" +#include "ptst.h" +#include "gc.h" +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef SPARC +#include +#include +#endif + +#define POLITE + +typedef struct stm_loc_st stm_loc; +typedef struct stm_blk_st stm_blk; +typedef struct stm_tx_entry_st stm_tx_entry; +typedef struct stm_tx_st stm_tx; +typedef struct stm_st stm; + +struct stm_loc_st { + unsigned long status; /* TXS_FAILED, TXS_SUCCESSFUL, descriptor. */ + void *old; + void *new; +}; + +struct stm_blk_st { + stm_loc *loc; +}; + +struct stm_tx_entry_st { + stm_blk *b; + stm_loc *l; + void *data; + stm_tx_entry *next; +}; + +struct stm_tx_st { + unsigned int status; + int rc; + stm_tx *next_free; + stm_tx_entry *reads; + stm_tx_entry *writes; + stm_tx_entry *alloc_ptr, *check; + void *dummy; + int gc_data_id, blk_size; /* copied from 'stm' structure */ + sigjmp_buf *penv; +}; + +struct stm_st { + int gc_data_id; + int blk_size; +}; + +/* Private per-thread state. The array is indexed off ptst->id. */ +typedef struct { + void *arena, *arena_lim; + stm_tx *next_descriptor; + stm_tx *cur_tx; +#ifdef SPARC + unsigned int random_counter; +#endif + CACHE_PAD(0); +} priv_t; + +static priv_t priv_ptst[MAX_THREADS]; +static int gc_blk_id; /* Allocation id for block descriptors. */ +static int gc_loc_id; /* Allocation id for locators. */ +static int do_padding; /* Should all allocations be padded to a cache line? */ + +#ifdef POLITE +#define MAX_RETRIES 8 +#ifdef SPARC +#define MIN_LOG_BACKOFF 4 +#define MAX_LOG_BACKOFF 31 +#define RANDOM_BITS 8 +#define RANDOM_SIZE (1 << RANDOM_BITS) +#define RANDOM_MASK (RANDOM_SIZE - 1) +static unsigned int rand_arr[RANDOM_SIZE]; +#endif +#endif + +static stm_blk *dummy_obj; /* Dummy object (used by red-black trees). */ +static void *dummy_data; + +#define ALLOCATOR_SIZE(_s) (do_padding ? CACHE_LINE_SIZE : (_s)) + +#define ARENA_SIZE 40960 +#define DESCRIPTOR_SIZE 4096 + +#define TXS_IN_PROGRESS 0U +#define TXS_FAILED 1U +#define TXS_SUCCESSFUL 2U + +#define is_descriptor(_p) (((unsigned long)(_p) & 3) == 0) +#define mk_descriptor(_p) ((stm_tx *)(_p)) + +/* Is transaction read-only? */ +#define read_only(_t) ((_t)->writes == NULL) + +/* Is transaction definitely doomed to fail? */ +#define is_stale(_t, _e) \ + (((_t)->status != TXS_IN_PROGRESS) || ((_e)->b->loc != (_e)->l)) + +bool_t commit_stm_tx(ptst_t *ptst, stm_tx *t); + +static void new_arena (priv_t *priv, int size) +{ + priv->arena = malloc(size); + if ( priv->arena == NULL ) abort(); + priv->arena_lim = (((char *) priv->arena) + size); +} + +static void release_descriptor(ptst_t *ptst, stm_tx *t) +{ + stm_tx_entry *ent; + priv_t *priv = &priv_ptst[ptst->id]; + void *data; + + t->next_free = priv->next_descriptor; + priv->next_descriptor = t; +} + +static int rc_delta_descriptor(stm_tx *t, int delta) +{ + int rc, new_rc = t->rc; + + do { rc = new_rc; } + while ( (new_rc = CASIO (&t->rc, rc, rc + delta)) != rc ); + + return rc; +} + +static void rc_up_descriptor(stm_tx *t) +{ + rc_delta_descriptor(t, 2); + MB(); +} + +static void rc_down_descriptor(ptst_t *ptst, stm_tx *t) +{ + int old_rc, new_rc, cur_rc = t->rc; + + WMB(); + + do { + old_rc = cur_rc; + new_rc = old_rc - 2; + if ( new_rc == 0 ) new_rc = 1; + } + while ( (cur_rc = CASIO (&t->rc, old_rc, new_rc)) != old_rc ); + + if ( old_rc == 2 ) release_descriptor(ptst, t); +} + +static stm_tx *new_descriptor(priv_t *priv) +{ + stm_tx *t; + + t = priv->next_descriptor; + + if ( t != NULL ) + { + priv->next_descriptor = t->next_free; + /* 'Unfree' descriptor, if it was previously freed. */ + if ( (t->rc & 1) == 1 ) rc_delta_descriptor(t, 1); + } + else + { + t = (stm_tx *) priv->arena; + priv->arena = ((char *) (priv->arena)) + DESCRIPTOR_SIZE; + + if ( priv->arena >= priv->arena_lim ) + { + new_arena(priv, ARENA_SIZE); + t = (stm_tx *) priv->arena; + priv->arena = ((char *) (priv->arena)) + DESCRIPTOR_SIZE; + } + + t->next_free = NULL; + t->rc = 2; + } + + return t; +} + + +static stm_tx_entry *alloc_stm_tx_entry(stm_tx *t) +{ + stm_tx_entry *ent = t->alloc_ptr++; + assert(((unsigned long)t->alloc_ptr - (unsigned long)t) <= + DESCRIPTOR_SIZE); + return ent; +} + + +static stm_tx_entry **search_stm_tx_entry(stm_tx_entry **pnext, stm_blk *b) +{ + stm_tx_entry *next = *pnext; + + while ( (next != NULL) && ((unsigned long)next->b < (unsigned long)b) ) + { + pnext = &next->next; + next = *pnext; + } + + return pnext; +} + + +static int contention_wait(ptst_t *ptst, int attempts) +{ +#ifdef POLITE + if ( (attempts > 1) && (attempts <= MAX_RETRIES) ) + { +#ifdef SPARC /* Exactly as it was done by the original authors. */ + priv_t *priv = &priv_ptst[ptst->id]; + struct timespec rqtp; + unsigned int log_backoff, mask; + log_backoff = attempts - 2 + MIN_LOG_BACKOFF; + if ( log_backoff > MAX_LOG_BACKOFF ) + log_backoff = MAX_LOG_BACKOFF; + mask = (1 << log_backoff) - 1; + rqtp.tv_nsec = rand_arr[priv->random_counter++ & RANDOM_MASK] & mask; + rqtp.tv_sec = 0; + while ( nanosleep(&rqtp, NULL) != 0 ) continue; +#else + usleep(1); +#endif + } + + return attempts < MAX_RETRIES; +#else + return FALSE; +#endif +} + + +static void *read_loc_data(ptst_t *ptst, stm_loc *l) +{ + void *data; + stm_tx *t; + unsigned long st; + stm_tx_entry **pent; + int attempts = 0; + + for ( ; ; ) + { + switch ( (st = l->status) ) + { + case TXS_SUCCESSFUL: + return l->new; + case TXS_FAILED: + return l->old; + default: + t = mk_descriptor(st); + rc_up_descriptor(t); + if ( l->status == st ) + { + switch ( t->status ) + { + case TXS_SUCCESSFUL: + rc_down_descriptor(ptst, t); + l->status = TXS_SUCCESSFUL; + return l->new; + case TXS_FAILED: + rc_down_descriptor(ptst, t); + l->status = TXS_FAILED; + return l->old; + default: + if ( !contention_wait(ptst, ++attempts) ) + { + attempts = 0; + CASIO(&t->status, TXS_IN_PROGRESS, TXS_FAILED); + } + } + } + rc_down_descriptor(ptst, t); + } + } +} + + +static stm_loc *install_loc(ptst_t *ptst, stm_tx *t, + stm_blk *b, stm_loc *old_loc) +{ + stm_loc *new_loc = gc_alloc(ptst, gc_loc_id); + + new_loc->status = (unsigned long)t; + new_loc->new = gc_alloc(ptst, t->gc_data_id); + new_loc->old = read_loc_data(ptst, old_loc); + memcpy(new_loc->new, new_loc->old, t->blk_size); + + if ( CASPO(&b->loc, old_loc, new_loc) != old_loc ) + { + gc_unsafe_free(ptst, new_loc->new, t->gc_data_id); + gc_unsafe_free(ptst, new_loc , gc_loc_id); + new_loc = NULL; + } + else + { + gc_free(ptst, old_loc, gc_loc_id); + } + + return new_loc; +} + + +stm *new_stm(ptst_t *ptst, int blk_size) +{ + stm *mem = malloc(CACHE_LINE_SIZE); + mem->blk_size = blk_size; + mem->gc_data_id = gc_add_allocator(ALLOCATOR_SIZE(blk_size)); + return mem; +} + + +void free_stm(ptst_t *ptst, stm *mem) +{ + gc_remove_allocator(mem->gc_data_id); + free(mem); +} + + +stm_blk *new_stm_blk(ptst_t *ptst, stm *mem) +{ + stm_blk *b = gc_alloc(ptst, gc_blk_id); + stm_loc *l = gc_alloc(ptst, gc_loc_id); + b->loc = l; + l->status = TXS_SUCCESSFUL; + l->old = NULL; + l->new = gc_alloc(ptst, mem->gc_data_id); + return b; +} + + +void free_stm_blk(ptst_t *ptst, stm *mem, stm_blk *b) +{ + stm_loc *l; + void *data; + + l = FASPO(&b->loc, NULL); + data = read_loc_data(ptst, l); + + gc_free(ptst, data, mem->gc_data_id); + gc_free(ptst, l, gc_loc_id); + gc_free(ptst, b, gc_blk_id); +} + + +void *init_stm_blk(ptst_t *ptst, stm *mem, stm_blk *b) +{ + return b->loc->new; +} + + +int sizeof_stm_blk(ptst_t *ptst, stm *mem, stm_blk *b) +{ + return mem->blk_size; +} + + +stm_tx *new_stm_tx(ptst_t *ptst, stm *mem, sigjmp_buf *penv) +{ + priv_t *priv = &priv_ptst[ptst->id]; + stm_tx *t; + + if ( priv->cur_tx != NULL ) goto nesting; + t = new_descriptor(priv); + t->status = TXS_IN_PROGRESS; + t->reads = t->writes = NULL; + t->alloc_ptr = t->check = (stm_tx_entry *)(t + 1); + t->gc_data_id = mem->gc_data_id; + t->blk_size = mem->blk_size; + t->penv = penv; + t->dummy = NULL; + priv->cur_tx = t; + return t; + + nesting: + fprintf(stderr, "No nesting of transactions is allowed\n"); + return NULL; +} + + +bool_t commit_stm_tx(ptst_t *ptst, stm_tx *t) +{ + unsigned int desired_st = TXS_SUCCESSFUL, st; + stm_tx_entry *ent; + priv_t *priv = &priv_ptst[ptst->id]; + + priv->cur_tx = NULL; + + MB(); + + for ( ent = t->reads; ent != NULL; ent = ent->next ) + { + if ( ent->b->loc != ent->l ) + desired_st = TXS_FAILED; + } + + if ( read_only(t) ) + { + /* A very fast path: we can immediately reuse the descriptor. */ + if ( t->dummy != NULL ) + gc_unsafe_free(ptst, t->dummy, t->gc_data_id); + t->next_free = priv->next_descriptor; + priv->next_descriptor = t; + return desired_st == TXS_SUCCESSFUL; + } + + st = CASIO(&t->status, TXS_IN_PROGRESS, desired_st); + if ( st == TXS_IN_PROGRESS ) + st = desired_st; + + assert((st == TXS_FAILED) || (st == TXS_SUCCESSFUL)); + + WMB_NEAR_CAS(); + + for ( ent = t->writes; ent != NULL; ent = ent->next ) + { + ent->l->status = (unsigned long)st; + gc_free(ptst, + (st == TXS_SUCCESSFUL) ? ent->l->old : ent->l->new, + t->gc_data_id); + } + + if ( t->dummy != NULL ) + gc_unsafe_free(ptst, t->dummy, t->gc_data_id); + + rc_down_descriptor(ptst, t); + + return st == TXS_SUCCESSFUL; +} + + +bool_t validate_stm_tx(ptst_t *ptst, stm_tx *t) +{ + stm_tx_entry *ent; + + RMB(); + + /* A conflict on a pending update will cause us to get failed. */ + if ( t->status == TXS_FAILED ) + goto fail; + + /* Reads must be explicitly checked. */ + for ( ent = t->reads; ent != NULL; ent = ent->next ) + { + if ( ent->b->loc != ent->l ) + goto fail; + } + + return TRUE; + + fail: + t->status = TXS_FAILED; + return FALSE; +} + + +void abort_stm_tx(ptst_t *ptst, stm_tx *t) +{ + t->status = TXS_FAILED; +} + + +void *read_stm_blk(ptst_t *ptst, stm_tx *t, stm_blk *b) +{ + stm_tx_entry **pent, *ent; + sigjmp_buf *penv; + void *result; + + if ( b == dummy_obj ) + { + if ( t->dummy == NULL ) + { + t->dummy = gc_alloc(ptst, t->gc_data_id); + memcpy(t->dummy, dummy_data, t->blk_size); + } + return t->dummy; + } + + pent = search_stm_tx_entry(&t->writes, b); + ent = *pent; + if ( (ent != NULL) && (ent->b == b) ) goto found; + + pent = search_stm_tx_entry(&t->reads, b); + ent = *pent; + if ( (ent != NULL) && (ent->b == b) ) goto found; + + ent = alloc_stm_tx_entry(t); + ent->b = b; + if ( (ent->l = b->loc) == NULL ) + goto fail; + ent->data = read_loc_data(ptst, ent->l); + ent->next = *pent; + *pent = ent; + + return ent->data; + + found: + result = ent->data; + ent = t->check; + if ( is_stale(t, ent) ) goto fail; + if ( ++t->check == t->alloc_ptr ) t->check = (stm_tx_entry *)(t + 1); + return result; + + fail: + penv = t->penv; + abort_stm_tx(ptst, t); + commit_stm_tx(ptst, t); + siglongjmp(*penv, 0); + assert(0); + return NULL; +} + + +void *write_stm_blk(ptst_t *ptst, stm_tx *t, stm_blk *b) +{ + stm_tx_entry **r_pent, **w_pent, *ent; + stm_loc *loc; + sigjmp_buf *penv; + void *result; + + if ( b == dummy_obj ) + { + if ( t->dummy == NULL ) + { + t->dummy = gc_alloc(ptst, t->gc_data_id); + memcpy(t->dummy, dummy_data, t->blk_size); + } + return t->dummy; + } + + w_pent = search_stm_tx_entry(&t->writes, b); + ent = *w_pent; + if ( (ent != NULL) && (ent->b == b) ) goto found; + + r_pent = search_stm_tx_entry(&t->reads, b); + ent = *r_pent; + if ( (ent != NULL) && (ent->b == b) ) + { + *r_pent = ent->next; + } + else + { + ent = alloc_stm_tx_entry(t); + ent->b = b; + if ( (ent->l = b->loc) == NULL ) + goto fail; + } + + loc = install_loc(ptst, t, b, ent->l); + if ( loc == NULL ) goto fail; + + ent->l = loc; + ent->data = loc->new; + ent->next = *w_pent; + *w_pent = ent; + + return ent->data; + + found: + result = ent->data; + ent = t->check; + if ( is_stale(t, ent) ) goto fail; + if ( ++t->check == t->alloc_ptr ) t->check = (stm_tx_entry *)(t + 1); + return result; + + fail: + penv = t->penv; + abort_stm_tx(ptst, t); + commit_stm_tx(ptst, t); + siglongjmp(*penv, 0); + assert(0); + return NULL; +} + + +void remove_from_tx(ptst_t *ptst, stm_tx *t, stm_blk *b) +{ + if ( dummy_obj == NULL ) + { + dummy_obj = b; + dummy_data = read_loc_data(ptst, b->loc); + } +} + + +static void handle_fault(int sig) +{ + ptst_t *ptst; + stm_tx *t; + + ptst = critical_enter(); + t = priv_ptst[ptst->id].cur_tx; + if ( (t != NULL) && !validate_stm_tx(ptst, t) ) + { + sigjmp_buf *penv = t->penv; + commit_stm_tx(ptst, t); + critical_exit(ptst); + siglongjmp(*penv, 0); + } + + fail: + fprintf(stderr, "Error: unhandleable SIGSEGV!\n"); + abort(); +} + + +void _init_stm_subsystem(int pad_data) +{ + struct sigaction act; + +#ifdef SPARC + int i; + struct timespec rqtp; + + rqtp.tv_sec = 0; + rqtp.tv_nsec = 1000; + + while ( nanosleep(&rqtp, NULL) != 0 ) + { + if ( errno != EINTR ) + { + printf("Urk! Nanosleep not supported!\n"); + exit(1); + } + } + + for ( i = 0; i < RANDOM_SIZE; i++ ) + rand_arr[i] = (unsigned int)random(); +#endif + + do_padding = pad_data; + gc_blk_id = gc_add_allocator(ALLOCATOR_SIZE(sizeof(stm_blk))); + gc_loc_id = gc_add_allocator(ALLOCATOR_SIZE(sizeof(stm_loc))); + memset(priv_ptst, 0, sizeof(priv_ptst)); + + act.sa_handler = handle_fault; + sigemptyset(&act.sa_mask); + act.sa_flags = 0; + sigaction(SIGSEGV, &act, NULL); +} diff --git a/src/mcas/stm_lock.c b/src/mcas/stm_lock.c new file mode 100644 index 000000000..ce65ea0d5 --- /dev/null +++ b/src/mcas/stm_lock.c @@ -0,0 +1,464 @@ +/****************************************************************************** + * stm_lock.c + * + * Lock-based software transactional memory (STM). + * Uses two-phase locking with multi-reader locks. + * + * Copyright (c) 2002-2003, K A Fraser + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. Neither the name of the Keir Fraser + * nor the names of its contributors may be used to endorse or + * promote products derived from this software without specific + * prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "portable_defns.h" +#include "ptst.h" +#include "gc.h" +#include +#include +#include +#include +#include +#include +#include + +typedef struct stm_blk_st stm_blk; +typedef struct stm_tx_entry_st stm_tx_entry; +typedef struct stm_tx_st stm_tx; +typedef struct stm_st stm; + +struct stm_blk_st { + void *data; + mrsw_lock_t lock; +}; + +struct stm_tx_entry_st { + stm_blk *b; + void *old; + void *new; + stm_tx_entry *next; +}; + +struct stm_tx_st { + int status; + stm_tx_entry *blocks; + stm_tx_entry *alloc_ptr, *check; + int gc_data_id, blk_size; /* copied from 'stm' structure */ + sigjmp_buf *penv; +}; + +struct stm_st { + int gc_data_id; + int blk_size; +}; + +#define DESCRIPTOR_IN_USE(_t) ((_t)->penv != NULL) + +#define DESCRIPTOR_SIZE 4096 +#define MAX_TX_ENTS (DESCRIPTOR_SIZE / sizeof(stm_tx_entry)) + +/* Private per-thread state. The array is indexed off ptst->id. */ +typedef struct { + char desc[DESCRIPTOR_SIZE]; +} priv_t; + +static priv_t priv_ptst[MAX_THREADS]; +static int gc_blk_id; /* Allocation id for block descriptors. */ +static int do_padding; /* Should all allocations be padded to a cache line? */ + +#define ALLOCATOR_SIZE(_s) (do_padding ? CACHE_LINE_SIZE : (_s)) + +#define TXS_IN_PROGRESS 0 +#define TXS_FAILED 1 +#define TXS_SUCCESSFUL 2 + +bool_t commit_stm_tx(ptst_t *ptst, stm_tx *t); + +static stm_tx_entry *alloc_stm_tx_entry(stm_tx *t) +{ + stm_tx_entry *ent = t->alloc_ptr++; + assert(((unsigned long)t->alloc_ptr - (unsigned long)t) <= + DESCRIPTOR_SIZE); + return ent; +} + + +static stm_tx_entry **search_stm_tx_entry(stm_tx_entry **pnext, stm_blk *b) +{ + stm_tx_entry *next = *pnext; + + while ( (next != NULL) && ((unsigned long)next->b < (unsigned long)b) ) + { + pnext = &next->next; + next = *pnext; + } + + return pnext; +} + + +stm *new_stm(ptst_t *ptst, int blk_size) +{ + stm *mem = malloc(CACHE_LINE_SIZE); + mem->blk_size = blk_size; + mem->gc_data_id = gc_add_allocator(ALLOCATOR_SIZE(blk_size)); + return mem; +} + + +void free_stm(ptst_t *ptst, stm *mem) +{ + gc_remove_allocator(mem->gc_data_id); + free(mem); +} + + +stm_blk *new_stm_blk(ptst_t *ptst, stm *mem) +{ + stm_blk *b; + b = gc_alloc(ptst, gc_blk_id); + b->data = gc_alloc(ptst, mem->gc_data_id); + mrsw_init(&b->lock); + return b; +} + + +void free_stm_blk(ptst_t *ptst, stm *mem, stm_blk *b) +{ + gc_free(ptst, b->data, mem->gc_data_id); + gc_free(ptst, b, gc_blk_id); +} + + +void *init_stm_blk(ptst_t *ptst, stm *mem, stm_blk *b) +{ + return b->data; +} + + +int sizeof_stm_blk(ptst_t *ptst, stm *mem, stm_blk *b) +{ + return mem->blk_size; +} + + +stm_tx *new_stm_tx(ptst_t *ptst, stm *mem, sigjmp_buf *penv) +{ + stm_tx *t = (stm_tx *)&priv_ptst[ptst->id]; + if ( DESCRIPTOR_IN_USE(t) ) goto nesting; + t->status = TXS_IN_PROGRESS; + t->blocks = NULL; + t->alloc_ptr = t->check = (stm_tx_entry *)(t + 1); + t->gc_data_id = mem->gc_data_id; + t->blk_size = mem->blk_size; + t->penv = penv; + return t; + + nesting: + fprintf(stderr, "No nesting of transactions is allowed\n"); + return NULL; +} + + +bool_t commit_stm_tx(ptst_t *ptst, stm_tx *t) +{ + stm_tx_entry *ent, *last_ent; + mrsw_qnode_t qn[MAX_TX_ENTS]; + stm_blk *b; + void *old; + int i; + + t->penv = NULL; + + /* Outcome may have been decided by an 'abort' or 'validate' operation. */ + if ( t->status != TXS_IN_PROGRESS ) goto out; + + /* We start by taking locks in order, and checking old values. */ + for ( i = 0, ent = t->blocks; ent != NULL; i++, ent = ent->next ) + { + b = ent->b; + if ( (old = ent->old) == ent->new ) + { + rd_lock(&b->lock, &qn[i]); + } + else + { + wr_lock(&b->lock, &qn[i]); + } + /* Check old value, and shortcut to failure if we mismatch. */ + if ( b->data != old ) goto fail; + } + + /* + * LINEARISATION POINT FOR SUCCESS: + * We haven't written new values yet, but that's okay as we have write + * locks on those locations. Noone can see old value now and yet still + * commit (as they'll be waiting for the read lock). + */ + t->status = TXS_SUCCESSFUL; + + /* We definitely succeed now: release locks and write new values. */ + for ( i = 0, ent = t->blocks; ent != NULL; i++, ent = ent->next ) + { + b = ent->b; + if ( ent->old == ent->new ) + { + rd_unlock(&b->lock, &qn[i]); + } + else + { + b->data = ent->new; + wr_unlock(&b->lock, &qn[i]); + } + } + + out: + if ( t->status == TXS_SUCCESSFUL ) + { + for ( ent = t->blocks; ent != NULL; ent = ent->next ) + { + if ( ent->old == ent->new ) continue; + gc_free(ptst, ent->old, t->gc_data_id); + } + return TRUE; + } + else + { + for ( ent = t->blocks; ent != NULL; ent = ent->next ) + { + if ( ent->old == ent->new ) continue; + gc_unsafe_free(ptst, ent->new, t->gc_data_id); + } + return FALSE; + } + + /* + * We put (hopefully rare) failure case out-of-line here. + * This is also the LINEARISTAION POINT FOR FAILURE. + */ + fail: + last_ent = ent->next; + t->status = TXS_FAILED; + for ( i = 0, ent = t->blocks; ent != last_ent; i++, ent = ent->next ) + { + b = ent->b; + if ( ent->old == ent->new ) + { + rd_unlock(&b->lock, &qn[i]); + } + else + { + wr_unlock(&b->lock, &qn[i]); + } + } + goto out; +} + + +bool_t validate_stm_tx(ptst_t *ptst, stm_tx *t) +{ + stm_tx_entry *ent, *last_ent = NULL; + mrsw_qnode_t qn[MAX_TX_ENTS]; + stm_blk *b; + void *old; + int i; + + RMB(); + + /* Lock-acquire phase */ + for ( i = 0, ent = t->blocks; ent != NULL; i++, ent = ent->next ) + { + b = ent->b; + + if ( (old = ent->old) == ent->new ) + { + rd_lock(&b->lock, &qn[i]); + } + else + { + wr_lock(&b->lock, &qn[i]); + } + + if ( b->data != old ) + { + t->status = TXS_FAILED; + last_ent = ent->next; + break; + } + } + + /* Lock-release phase */ + for ( i = 0, ent = t->blocks; ent != last_ent; i++, ent = ent->next ) + { + b = ent->b; + if ( ent->old == ent->new ) + { + rd_unlock(&b->lock, &qn[i]); + } + else + { + wr_unlock(&b->lock, &qn[i]); + } + } + + return t->status != TXS_FAILED; +} + + +void abort_stm_tx(ptst_t *ptst, stm_tx *t) +{ + t->status = TXS_FAILED; +} + + +void *read_stm_blk(ptst_t *ptst, stm_tx *t, stm_blk *b) +{ + stm_tx_entry **pent, *ent; + sigjmp_buf *penv; + void *result; + + pent = search_stm_tx_entry(&t->blocks, b); + ent = *pent; + if ( (ent != NULL) && (ent->b == b) ) goto found; + + ent = alloc_stm_tx_entry(t); + ent->b = b; + ent->old = b->data; + ent->new = ent->old; + ent->next = *pent; + *pent = ent; + return ent->new; + + found: + result = ent->new; + ent = t->check; + if ( ent->b->data != ent->old ) goto fail; + if ( ++t->check == t->alloc_ptr ) t->check = (stm_tx_entry *)(t + 1); + return result; + + fail: + penv = t->penv; + abort_stm_tx(ptst, t); + commit_stm_tx(ptst, t); + siglongjmp(*penv, 0); + assert(0); + return NULL; +} + + +void *write_stm_blk(ptst_t *ptst, stm_tx *t, stm_blk *b) +{ + stm_tx_entry **pent, *ent; + sigjmp_buf *penv; + void *result; + + pent = search_stm_tx_entry(&t->blocks, b); + ent = *pent; + if ( (ent != NULL) && (ent->b == b) ) + { + if ( ent->old != ent->new ) goto found; + } + else + { + ent = alloc_stm_tx_entry(t); + ent->b = b; + ent->old = b->data; + ent->next = *pent; + *pent = ent; + } + + ent->new = gc_alloc(ptst, t->gc_data_id); + memcpy(ent->new, ent->old, t->blk_size); + return ent->new; + + found: + result = ent->new; + ent = t->check; + if ( ent->b->data != ent->old ) goto fail; + if ( ++t->check == t->alloc_ptr ) t->check = (stm_tx_entry *)(t + 1); + return result; + + fail: + penv = t->penv; + abort_stm_tx(ptst, t); + commit_stm_tx(ptst, t); + siglongjmp(*penv, 0); + assert(0); + return NULL; +} + + +void remove_from_tx(ptst_t *ptst, stm_tx *t, stm_blk *b) +{ + stm_tx_entry **pent, *ent; + void *data; + + pent = search_stm_tx_entry(&t->blocks, b); + ent = *pent; + if ( (ent != NULL) && (ent->b == b) ) + { + *pent = ent->next; + if ( (data = ent->new) != ent->old ) + { + gc_free(ptst, data, t->gc_data_id); + } + } +} + + +static void handle_fault(int sig) +{ + ptst_t *ptst; + stm_tx *t; + + ptst = critical_enter(); + t = (stm_tx *)&priv_ptst[ptst->id]; + if ( DESCRIPTOR_IN_USE(t) && !validate_stm_tx(ptst, t) ) + { + sigjmp_buf *penv = t->penv; + commit_stm_tx(ptst, t); + critical_exit(ptst); + siglongjmp(*penv, 0); + } + + fail: + fprintf(stderr, "Error: unhandleable SIGSEGV!\n"); + abort(); +} + + +void _init_stm_subsystem(int pad_data) +{ + struct sigaction act; + + do_padding = pad_data; + gc_blk_id = gc_add_allocator(ALLOCATOR_SIZE(sizeof(stm_blk))); + memset(priv_ptst, 0, sizeof(priv_ptst)); + + act.sa_handler = handle_fault; + sigemptyset(&act.sa_mask); + act.sa_flags = 0; + sigaction(SIGSEGV, &act, NULL); +} -- 2.39.5