Home | History | Annotate | Line # | Download | only in kern
subr_pcq.c revision 1.20
      1  1.20  riastrad /*	$NetBSD: subr_pcq.c,v 1.20 2023/02/24 11:02:27 riastradh Exp $	*/
      2   1.3     rmind 
      3   1.1      matt /*-
      4  1.11        ad  * Copyright (c) 2009, 2019 The NetBSD Foundation, Inc.
      5   1.1      matt  * All rights reserved.
      6   1.1      matt  *
      7   1.1      matt  * This code is derived from software contributed to The NetBSD Foundation
      8   1.4     rmind  * by Andrew Doran.
      9   1.1      matt  *
     10   1.1      matt  * Redistribution and use in source and binary forms, with or without
     11   1.1      matt  * modification, are permitted provided that the following conditions
     12   1.1      matt  * are met:
     13   1.1      matt  * 1. Redistributions of source code must retain the above copyright
     14   1.1      matt  *    notice, this list of conditions and the following disclaimer.
     15   1.1      matt  * 2. Redistributions in binary form must reproduce the above copyright
     16   1.1      matt  *    notice, this list of conditions and the following disclaimer in the
     17   1.1      matt  *    documentation and/or other materials provided with the distribution.
     18   1.1      matt  *
     19   1.1      matt  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20   1.1      matt  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21   1.1      matt  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22   1.1      matt  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23   1.1      matt  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24   1.1      matt  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25   1.1      matt  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26   1.1      matt  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27   1.1      matt  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28   1.1      matt  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29   1.1      matt  * POSSIBILITY OF SUCH DAMAGE.
     30   1.1      matt  */
     31   1.3     rmind 
     32   1.4     rmind /*
     33   1.4     rmind  * Lockless producer/consumer queue.
     34  1.18  riastrad  *
     35  1.18  riastrad  * Summary of the producer algorithm in pcq_put (may run many in
     36  1.18  riastrad  * parallel with each other and with a consumer):
     37  1.18  riastrad  *
     38  1.18  riastrad  *	P1. initialize an item
     39  1.18  riastrad  *
     40  1.18  riastrad  *	P2. atomic_cas(&pcq->pcq_pc) loop to advance the producer
     41  1.18  riastrad  *	    pointer, reserving a space at c (fails if not enough space)
     42  1.18  riastrad  *
     43  1.18  riastrad  *	P3. atomic_store_release(&pcq->pcq_items[c], item) to publish
     44  1.18  riastrad  *	    the item in the space it reserved
     45  1.18  riastrad  *
     46  1.18  riastrad  * Summary of the consumer algorithm in pcq_get (must be serialized by
     47  1.18  riastrad  * caller with other consumers, may run in parallel with any number of
     48  1.18  riastrad  * producers):
     49  1.18  riastrad  *
     50  1.18  riastrad  *	C1. atomic_load_relaxed(&pcq->pcq_pc) to get the consumer
     51  1.18  riastrad  *	    pointer and a snapshot of the producer pointer, which may
     52  1.18  riastrad  *	    point to null items or point to initialized items (fails if
     53  1.18  riastrad  *	    no space reserved for published items yet)
     54  1.18  riastrad  *
     55  1.18  riastrad  *	C2. atomic_load_consume(&pcq->pcq_items[c]) to get the next
     56  1.18  riastrad  *	    unconsumed but potentially published item (fails if item
     57  1.18  riastrad  *	    not published yet)
     58  1.18  riastrad  *
     59  1.18  riastrad  *	C3. pcq->pcq_items[c] = NULL to consume the next unconsumed but
     60  1.18  riastrad  *	    published item
     61  1.18  riastrad  *
     62  1.18  riastrad  *	C4. membar_producer
     63  1.18  riastrad  *
     64  1.18  riastrad  *	C5. atomic_cas(&pcq->pcq_pc) loop to advance the consumer
     65  1.18  riastrad  *	    pointer
     66  1.18  riastrad  *
     67  1.18  riastrad  *	C6. use the item
     68  1.18  riastrad  *
     69  1.18  riastrad  * Note that there is a weird bare membar_producer which is not matched
     70  1.18  riastrad  * by membar_consumer.  This is one of the rare cases of a memory
     71  1.18  riastrad  * barrier on one side that is not matched by a memory barrier on
     72  1.18  riastrad  * another side, but the ordering works out, with a somewhat more
     73  1.18  riastrad  * involved proof.
     74  1.18  riastrad  *
     75  1.18  riastrad  * Some properties that need to be proved:
     76  1.18  riastrad  *
     77  1.18  riastrad  *	Theorem 1.  For pcq_put call that leads into pcq_get:
     78  1.18  riastrad  *	Initializing item at P1 is dependency-ordered before usage of
     79  1.18  riastrad  *	item at C6, so items placed by pcq_put can be safely used by
     80  1.18  riastrad  *	the caller of pcq_get.
     81  1.18  riastrad  *
     82  1.18  riastrad  *	Proof sketch.
     83  1.18  riastrad  *
     84  1.18  riastrad  *		Assume load/store P2 synchronizes with load/store C1
     85  1.18  riastrad  *		(if not, pcq_get fails in `if (p == c) return NULL').
     86  1.18  riastrad  *
     87  1.18  riastrad  *		Assume store-release P3 synchronizes with load-consume
     88  1.18  riastrad  *		C2 (if not, pcq_get fails in `if (item == NULL) return
     89  1.18  riastrad  *		NULL').
     90  1.18  riastrad  *
     91  1.18  riastrad  *		Then:
     92  1.18  riastrad  *
     93  1.18  riastrad  *		- P1 is sequenced before store-release P3
     94  1.18  riastrad  *		- store-release P3 synchronizes with load-consume C2
     95  1.18  riastrad  *		- load-consume C2 is dependency-ordered before C6
     96  1.18  riastrad  *
     97  1.18  riastrad  *		Hence transitively, P1 is dependency-ordered before C6,
     98  1.18  riastrad  *		QED.
     99  1.18  riastrad  *
    100  1.18  riastrad  *	Theorem 2.  For pcq_get call followed by pcq_put: Nulling out
    101  1.18  riastrad  *	location at store C3 happens before placing a new item in the
    102  1.18  riastrad  *	same location at store P3, so items are not lost.
    103  1.18  riastrad  *
    104  1.18  riastrad  *	Proof sketch.
    105  1.18  riastrad  *
    106  1.18  riastrad  *		Assume load/store C5 synchronizes with load/store P2
    107  1.18  riastrad  *		(otherwise pcq_peek starts over the CAS loop or fails).
    108  1.18  riastrad  *
    109  1.18  riastrad  *		Then:
    110  1.18  riastrad  *
    111  1.18  riastrad  *		- store C3 is sequenced before membar_producer C4
    112  1.18  riastrad  *		- membar_producer C4 is sequenced before load/store C5
    113  1.18  riastrad  *		- load/store C5 synchronizes with load/store P2 at &pcq->pcq_pc
    114  1.18  riastrad  *		- P2 is sequenced before store-release P3
    115  1.18  riastrad  *
    116  1.18  riastrad  *		Hence transitively, store C3 happens before
    117  1.18  riastrad  *		store-release P3, QED.
    118   1.4     rmind  */
    119   1.4     rmind 
    120   1.1      matt #include <sys/cdefs.h>
    121  1.20  riastrad __KERNEL_RCSID(0, "$NetBSD: subr_pcq.c,v 1.20 2023/02/24 11:02:27 riastradh Exp $");
    122   1.1      matt 
    123   1.1      matt #include <sys/param.h>
    124   1.1      matt #include <sys/types.h>
    125   1.1      matt #include <sys/atomic.h>
    126   1.1      matt #include <sys/kmem.h>
    127   1.1      matt 
    128   1.1      matt #include <sys/pcq.h>
    129   1.1      matt 
    130   1.4     rmind /*
    131   1.4     rmind  * Internal producer-consumer queue structure.  Note: providing a separate
    132   1.4     rmind  * cache-line both for pcq_t::pcq_pc and pcq_t::pcq_items.
    133   1.4     rmind  */
    134   1.1      matt struct pcq {
    135   1.4     rmind 	u_int			pcq_nitems;
    136   1.4     rmind 	uint8_t			pcq_pad1[COHERENCY_UNIT - sizeof(u_int)];
    137   1.4     rmind 	volatile uint32_t	pcq_pc;
    138   1.4     rmind 	uint8_t			pcq_pad2[COHERENCY_UNIT - sizeof(uint32_t)];
    139   1.4     rmind 	void * volatile		pcq_items[];
    140   1.1      matt };
    141   1.1      matt 
    142   1.4     rmind /*
    143   1.4     rmind  * Producer (p) - stored in the lower 16 bits of pcq_t::pcq_pc.
    144   1.4     rmind  * Consumer (c) - in the higher 16 bits.
    145   1.4     rmind  *
    146   1.4     rmind  * We have a limitation of 16 bits i.e. 0xffff items in the queue.
    147   1.8     rmind  * The PCQ_MAXLEN constant is set accordingly.
    148   1.4     rmind  */
    149   1.4     rmind 
    150   1.4     rmind static inline void
    151   1.4     rmind pcq_split(uint32_t v, u_int *p, u_int *c)
    152   1.1      matt {
    153   1.3     rmind 
    154   1.4     rmind 	*p = v & 0xffff;
    155   1.4     rmind 	*c = v >> 16;
    156   1.4     rmind }
    157   1.1      matt 
    158   1.4     rmind static inline uint32_t
    159   1.4     rmind pcq_combine(u_int p, u_int c)
    160   1.4     rmind {
    161   1.4     rmind 
    162   1.4     rmind 	return p | (c << 16);
    163   1.4     rmind }
    164   1.4     rmind 
    165   1.4     rmind static inline u_int
    166   1.4     rmind pcq_advance(pcq_t *pcq, u_int pc)
    167   1.4     rmind {
    168   1.4     rmind 
    169   1.4     rmind 	if (__predict_false(++pc == pcq->pcq_nitems)) {
    170   1.4     rmind 		return 0;
    171   1.4     rmind 	}
    172   1.4     rmind 	return pc;
    173   1.1      matt }
    174   1.1      matt 
    175   1.4     rmind /*
    176   1.4     rmind  * pcq_put: place an item at the end of the queue.
    177   1.4     rmind  */
    178   1.1      matt bool
    179   1.1      matt pcq_put(pcq_t *pcq, void *item)
    180   1.1      matt {
    181   1.4     rmind 	uint32_t v, nv;
    182   1.4     rmind 	u_int op, p, c;
    183   1.1      matt 
    184   1.1      matt 	KASSERT(item != NULL);
    185   1.1      matt 
    186   1.4     rmind 	do {
    187  1.15  riastrad 		v = atomic_load_relaxed(&pcq->pcq_pc);
    188   1.4     rmind 		pcq_split(v, &op, &c);
    189   1.4     rmind 		p = pcq_advance(pcq, op);
    190   1.4     rmind 		if (p == c) {
    191   1.4     rmind 			/* Queue is full. */
    192   1.4     rmind 			return false;
    193   1.4     rmind 		}
    194   1.4     rmind 		nv = pcq_combine(p, c);
    195   1.4     rmind 	} while (atomic_cas_32(&pcq->pcq_pc, v, nv) != v);
    196   1.4     rmind 
    197   1.1      matt 	/*
    198   1.4     rmind 	 * Ensure that the update to pcq_pc is globally visible before the
    199   1.4     rmind 	 * data item.  See pcq_get().  This also ensures that any changes
    200   1.4     rmind 	 * that the caller made to the data item are globally visible
    201   1.4     rmind 	 * before we put it onto the list.
    202   1.1      matt 	 */
    203  1.14  riastrad 	atomic_store_release(&pcq->pcq_items[op], item);
    204   1.4     rmind 
    205   1.4     rmind 	/*
    206   1.4     rmind 	 * Synchronization activity to wake up the consumer will ensure
    207   1.4     rmind 	 * that the update to pcq_items[] is visible before the wakeup
    208  1.13       wiz 	 * arrives.  So, we do not need an additional memory barrier here.
    209   1.4     rmind 	 */
    210   1.4     rmind 	return true;
    211   1.4     rmind }
    212   1.1      matt 
    213   1.4     rmind /*
    214   1.4     rmind  * pcq_peek: return the next item from the queue without removal.
    215   1.4     rmind  */
    216   1.4     rmind void *
    217   1.4     rmind pcq_peek(pcq_t *pcq)
    218   1.4     rmind {
    219  1.15  riastrad 	const uint32_t v = atomic_load_relaxed(&pcq->pcq_pc);
    220   1.4     rmind 	u_int p, c;
    221   1.1      matt 
    222   1.4     rmind 	pcq_split(v, &p, &c);
    223   1.1      matt 
    224   1.4     rmind 	/* See comment on race below in pcq_get(). */
    225  1.15  riastrad 	return (p == c) ? NULL : atomic_load_consume(&pcq->pcq_items[c]);
    226   1.1      matt }
    227   1.1      matt 
    228   1.1      matt /*
    229   1.4     rmind  * pcq_get: remove and return the next item for consumption or NULL if empty.
    230   1.4     rmind  *
    231  1.10  dholland  * => The caller must prevent concurrent gets from occurring.
    232   1.1      matt  */
    233   1.1      matt void *
    234   1.1      matt pcq_get(pcq_t *pcq)
    235   1.1      matt {
    236   1.4     rmind 	uint32_t v, nv;
    237   1.4     rmind 	u_int p, c;
    238   1.1      matt 	void *item;
    239   1.1      matt 
    240  1.15  riastrad 	v = atomic_load_relaxed(&pcq->pcq_pc);
    241   1.4     rmind 	pcq_split(v, &p, &c);
    242   1.4     rmind 	if (p == c) {
    243   1.4     rmind 		/* Queue is empty: nothing to return. */
    244   1.4     rmind 		return NULL;
    245   1.4     rmind 	}
    246  1.15  riastrad 	item = atomic_load_consume(&pcq->pcq_items[c]);
    247   1.4     rmind 	if (item == NULL) {
    248   1.4     rmind 		/*
    249   1.4     rmind 		 * Raced with sender: we rely on a notification (e.g. softint
    250   1.4     rmind 		 * or wakeup) being generated after the producer's pcq_put(),
    251   1.4     rmind 		 * causing us to retry pcq_get() later.
    252   1.4     rmind 		 */
    253   1.1      matt 		return NULL;
    254   1.4     rmind 	}
    255  1.16  riastrad 	/*
    256  1.16  riastrad 	 * We have exclusive access to this slot, so no need for
    257  1.16  riastrad 	 * atomic_store_*.
    258  1.16  riastrad 	 */
    259   1.4     rmind 	pcq->pcq_items[c] = NULL;
    260   1.4     rmind 	c = pcq_advance(pcq, c);
    261   1.4     rmind 	nv = pcq_combine(p, c);
    262   1.1      matt 
    263   1.1      matt 	/*
    264  1.17  riastrad 	 * Ensure that update to pcq_items[c] becomes globally visible
    265  1.12     skrll 	 * before the update to pcq_pc.  If it were reordered to occur
    266   1.4     rmind 	 * after it, we could in theory wipe out a modification made
    267  1.17  riastrad 	 * to pcq_items[c] by pcq_put().
    268  1.17  riastrad 	 *
    269  1.17  riastrad 	 * No need for load-before-store ordering of membar_release
    270  1.17  riastrad 	 * because the only load we need to ensure happens first is the
    271  1.17  riastrad 	 * load of pcq->pcq_items[c], but that necessarily happens
    272  1.17  riastrad 	 * before the store to pcq->pcq_items[c] to null it out because
    273  1.18  riastrad 	 * it is at the same memory location.  Yes, this is a bare
    274  1.18  riastrad 	 * membar_producer with no matching membar_consumer.
    275   1.1      matt 	 */
    276   1.1      matt 	membar_producer();
    277   1.4     rmind 	while (__predict_false(atomic_cas_32(&pcq->pcq_pc, v, nv) != v)) {
    278  1.15  riastrad 		v = atomic_load_relaxed(&pcq->pcq_pc);
    279   1.4     rmind 		pcq_split(v, &p, &c);
    280   1.4     rmind 		c = pcq_advance(pcq, c);
    281   1.4     rmind 		nv = pcq_combine(p, c);
    282   1.4     rmind 	}
    283   1.1      matt 	return item;
    284   1.1      matt }
    285   1.1      matt 
    286   1.1      matt pcq_t *
    287   1.4     rmind pcq_create(size_t nitems, km_flag_t kmflags)
    288   1.1      matt {
    289   1.1      matt 	pcq_t *pcq;
    290   1.1      matt 
    291  1.19  riastrad 	KASSERT(nitems > 0);
    292  1.19  riastrad 	KASSERT(nitems <= PCQ_MAXLEN);
    293   1.1      matt 
    294   1.6     alnsn 	pcq = kmem_zalloc(offsetof(pcq_t, pcq_items[nitems]), kmflags);
    295  1.11        ad 	if (pcq != NULL) {
    296  1.11        ad 		pcq->pcq_nitems = nitems;
    297   1.4     rmind 	}
    298   1.1      matt 	return pcq;
    299   1.1      matt }
    300   1.1      matt 
    301   1.1      matt void
    302   1.1      matt pcq_destroy(pcq_t *pcq)
    303   1.1      matt {
    304   1.3     rmind 
    305   1.4     rmind 	kmem_free(pcq, offsetof(pcq_t, pcq_items[pcq->pcq_nitems]));
    306   1.4     rmind }
    307   1.4     rmind 
    308   1.4     rmind size_t
    309   1.4     rmind pcq_maxitems(pcq_t *pcq)
    310   1.4     rmind {
    311   1.1      matt 
    312   1.4     rmind 	return pcq->pcq_nitems;
    313   1.1      matt }
    314