Home | History | Annotate | Line # | Download | only in i915
      1 /*	$NetBSD: i915_syncmap.c,v 1.2 2021/12/18 23:45:28 riastradh Exp $	*/
      2 
      3 /*
      4  * Copyright  2017 Intel Corporation
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the "Software"),
      8  * to deal in the Software without restriction, including without limitation
      9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     10  * and/or sell copies of the Software, and to permit persons to whom the
     11  * Software is furnished to do so, subject to the following conditions:
     12  *
     13  * The above copyright notice and this permission notice (including the next
     14  * paragraph) shall be included in all copies or substantial portions of the
     15  * Software.
     16  *
     17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     22  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     23  * IN THE SOFTWARE.
     24  *
     25  */
     26 
     27 #include <sys/cdefs.h>
     28 __KERNEL_RCSID(0, "$NetBSD: i915_syncmap.c,v 1.2 2021/12/18 23:45:28 riastradh Exp $");
     29 
     30 #include <linux/slab.h>
     31 
     32 #include "i915_syncmap.h"
     33 
     34 #include "i915_gem.h" /* GEM_BUG_ON() */
     35 #include "i915_selftest.h"
     36 
     37 #define SHIFT ilog2(KSYNCMAP)
     38 #define MASK (KSYNCMAP - 1)
     39 
     40 /*
     41  * struct i915_syncmap is a layer of a radixtree that maps a u64 fence
     42  * context id to the last u32 fence seqno waited upon from that context.
     43  * Unlike lib/radixtree it uses a parent pointer that allows traversal back to
     44  * the root. This allows us to access the whole tree via a single pointer
     45  * to the most recently used layer. We expect fence contexts to be dense
     46  * and most reuse to be on the same i915_gem_context but on neighbouring
     47  * engines (i.e. on adjacent contexts) and reuse the same leaf, a very
     48  * effective lookup cache. If the new lookup is not on the same leaf, we
     49  * expect it to be on the neighbouring branch.
     50  *
     51  * A leaf holds an array of u32 seqno, and has height 0. The bitmap field
     52  * allows us to store whether a particular seqno is valid (i.e. allows us
     53  * to distinguish unset from 0).
     54  *
     55  * A branch holds an array of layer pointers, and has height > 0, and always
     56  * has at least 2 layers (either branches or leaves) below it.
     57  *
     58  * For example,
     59  *	for x in
     60  *	  0 1 2 0x10 0x11 0x200 0x201
     61  *	  0x500000 0x500001 0x503000 0x503001
     62  *	  0xE<<60:
     63  *		i915_syncmap_set(&sync, x, lower_32_bits(x));
     64  * will build a tree like:
     65  *	0xXXXXXXXXXXXXXXXX
     66  *	0-> 0x0000000000XXXXXX
     67  *	|   0-> 0x0000000000000XXX
     68  *	|   |   0-> 0x00000000000000XX
     69  *	|   |   |   0-> 0x000000000000000X 0:0, 1:1, 2:2
     70  *	|   |   |   1-> 0x000000000000001X 0:10, 1:11
     71  *	|   |   2-> 0x000000000000020X 0:200, 1:201
     72  *	|   5-> 0x000000000050XXXX
     73  *	|       0-> 0x000000000050000X 0:500000, 1:500001
     74  *	|       3-> 0x000000000050300X 0:503000, 1:503001
     75  *	e-> 0xe00000000000000X e:e
     76  */
     77 
     78 struct i915_syncmap {
     79 	u64 prefix;
     80 	unsigned int height;
     81 	unsigned int bitmap;
     82 	struct i915_syncmap *parent;
     83 	/*
     84 	 * Following this header is an array of either seqno or child pointers:
     85 	 * union {
     86 	 *	u32 seqno[KSYNCMAP];
     87 	 *	struct i915_syncmap *child[KSYNCMAP];
     88 	 * };
     89 	 */
     90 };
     91 
     92 /**
     93  * i915_syncmap_init -- initialise the #i915_syncmap
     94  * @root: pointer to the #i915_syncmap
     95  */
     96 void i915_syncmap_init(struct i915_syncmap **root)
     97 {
     98 	BUILD_BUG_ON_NOT_POWER_OF_2(KSYNCMAP);
     99 	BUILD_BUG_ON_NOT_POWER_OF_2(SHIFT);
    100 	BUILD_BUG_ON(KSYNCMAP > BITS_PER_TYPE((*root)->bitmap));
    101 	*root = NULL;
    102 }
    103 
    104 static inline u32 *__sync_seqno(struct i915_syncmap *p)
    105 {
    106 	GEM_BUG_ON(p->height);
    107 	return (u32 *)(p + 1);
    108 }
    109 
    110 static inline struct i915_syncmap **__sync_child(struct i915_syncmap *p)
    111 {
    112 	GEM_BUG_ON(!p->height);
    113 	return (struct i915_syncmap **)(p + 1);
    114 }
    115 
    116 static inline unsigned int
    117 __sync_branch_idx(const struct i915_syncmap *p, u64 id)
    118 {
    119 	return (id >> p->height) & MASK;
    120 }
    121 
    122 static inline unsigned int
    123 __sync_leaf_idx(const struct i915_syncmap *p, u64 id)
    124 {
    125 	GEM_BUG_ON(p->height);
    126 	return id & MASK;
    127 }
    128 
    129 static inline u64 __sync_branch_prefix(const struct i915_syncmap *p, u64 id)
    130 {
    131 	return id >> p->height >> SHIFT;
    132 }
    133 
    134 static inline u64 __sync_leaf_prefix(const struct i915_syncmap *p, u64 id)
    135 {
    136 	GEM_BUG_ON(p->height);
    137 	return id >> SHIFT;
    138 }
    139 
    140 static inline bool seqno_later(u32 a, u32 b)
    141 {
    142 	return (s32)(a - b) >= 0;
    143 }
    144 
    145 /**
    146  * i915_syncmap_is_later -- compare against the last know sync point
    147  * @root: pointer to the #i915_syncmap
    148  * @id: the context id (other timeline) we are synchronising to
    149  * @seqno: the sequence number along the other timeline
    150  *
    151  * If we have already synchronised this @root timeline with another (@id) then
    152  * we can omit any repeated or earlier synchronisation requests. If the two
    153  * timelines are already coupled, we can also omit the dependency between the
    154  * two as that is already known via the timeline.
    155  *
    156  * Returns true if the two timelines are already synchronised wrt to @seqno,
    157  * false if not and the synchronisation must be emitted.
    158  */
    159 bool i915_syncmap_is_later(struct i915_syncmap **root, u64 id, u32 seqno)
    160 {
    161 	struct i915_syncmap *p;
    162 	unsigned int idx;
    163 
    164 	p = *root;
    165 	if (!p)
    166 		return false;
    167 
    168 	if (likely(__sync_leaf_prefix(p, id) == p->prefix))
    169 		goto found;
    170 
    171 	/* First climb the tree back to a parent branch */
    172 	do {
    173 		p = p->parent;
    174 		if (!p)
    175 			return false;
    176 
    177 		if (__sync_branch_prefix(p, id) == p->prefix)
    178 			break;
    179 	} while (1);
    180 
    181 	/* And then descend again until we find our leaf */
    182 	do {
    183 		if (!p->height)
    184 			break;
    185 
    186 		p = __sync_child(p)[__sync_branch_idx(p, id)];
    187 		if (!p)
    188 			return false;
    189 
    190 		if (__sync_branch_prefix(p, id) != p->prefix)
    191 			return false;
    192 	} while (1);
    193 
    194 	*root = p;
    195 found:
    196 	idx = __sync_leaf_idx(p, id);
    197 	if (!(p->bitmap & BIT(idx)))
    198 		return false;
    199 
    200 	return seqno_later(__sync_seqno(p)[idx], seqno);
    201 }
    202 
    203 static struct i915_syncmap *
    204 __sync_alloc_leaf(struct i915_syncmap *parent, u64 id)
    205 {
    206 	struct i915_syncmap *p;
    207 
    208 	p = kmalloc(sizeof(*p) + KSYNCMAP * sizeof(u32), GFP_KERNEL);
    209 	if (unlikely(!p))
    210 		return NULL;
    211 
    212 	p->parent = parent;
    213 	p->height = 0;
    214 	p->bitmap = 0;
    215 	p->prefix = __sync_leaf_prefix(p, id);
    216 	return p;
    217 }
    218 
    219 static inline void __sync_set_seqno(struct i915_syncmap *p, u64 id, u32 seqno)
    220 {
    221 	unsigned int idx = __sync_leaf_idx(p, id);
    222 
    223 	p->bitmap |= BIT(idx);
    224 	__sync_seqno(p)[idx] = seqno;
    225 }
    226 
    227 static inline void __sync_set_child(struct i915_syncmap *p,
    228 				    unsigned int idx,
    229 				    struct i915_syncmap *child)
    230 {
    231 	p->bitmap |= BIT(idx);
    232 	__sync_child(p)[idx] = child;
    233 }
    234 
    235 static noinline int __sync_set(struct i915_syncmap **root, u64 id, u32 seqno)
    236 {
    237 	struct i915_syncmap *p = *root;
    238 	unsigned int idx;
    239 
    240 	if (!p) {
    241 		p = __sync_alloc_leaf(NULL, id);
    242 		if (unlikely(!p))
    243 			return -ENOMEM;
    244 
    245 		goto found;
    246 	}
    247 
    248 	/* Caller handled the likely cached case */
    249 	GEM_BUG_ON(__sync_leaf_prefix(p, id) == p->prefix);
    250 
    251 	/* Climb back up the tree until we find a common prefix */
    252 	do {
    253 		if (!p->parent)
    254 			break;
    255 
    256 		p = p->parent;
    257 
    258 		if (__sync_branch_prefix(p, id) == p->prefix)
    259 			break;
    260 	} while (1);
    261 
    262 	/*
    263 	 * No shortcut, we have to descend the tree to find the right layer
    264 	 * containing this fence.
    265 	 *
    266 	 * Each layer in the tree holds 16 (KSYNCMAP) pointers, either fences
    267 	 * or lower layers. Leaf nodes (height = 0) contain the fences, all
    268 	 * other nodes (height > 0) are internal layers that point to a lower
    269 	 * node. Each internal layer has at least 2 descendents.
    270 	 *
    271 	 * Starting at the top, we check whether the current prefix matches. If
    272 	 * it doesn't, we have gone past our target and need to insert a join
    273 	 * into the tree, and a new leaf node for the target as a descendent
    274 	 * of the join, as well as the original layer.
    275 	 *
    276 	 * The matching prefix means we are still following the right branch
    277 	 * of the tree. If it has height 0, we have found our leaf and just
    278 	 * need to replace the fence slot with ourselves. If the height is
    279 	 * not zero, our slot contains the next layer in the tree (unless
    280 	 * it is empty, in which case we can add ourselves as a new leaf).
    281 	 * As descend the tree the prefix grows (and height decreases).
    282 	 */
    283 	do {
    284 		struct i915_syncmap *next;
    285 
    286 		if (__sync_branch_prefix(p, id) != p->prefix) {
    287 			unsigned int above;
    288 
    289 			/* Insert a join above the current layer */
    290 			next = kzalloc(sizeof(*next) + KSYNCMAP * sizeof(next),
    291 				       GFP_KERNEL);
    292 			if (unlikely(!next))
    293 				return -ENOMEM;
    294 
    295 			/* Compute the height at which these two diverge */
    296 			above = fls64(__sync_branch_prefix(p, id) ^ p->prefix);
    297 			above = round_up(above, SHIFT);
    298 			next->height = above + p->height;
    299 			next->prefix = __sync_branch_prefix(next, id);
    300 
    301 			/* Insert the join into the parent */
    302 			if (p->parent) {
    303 				idx = __sync_branch_idx(p->parent, id);
    304 				__sync_child(p->parent)[idx] = next;
    305 				GEM_BUG_ON(!(p->parent->bitmap & BIT(idx)));
    306 			}
    307 			next->parent = p->parent;
    308 
    309 			/* Compute the idx of the other branch, not our id! */
    310 			idx = p->prefix >> (above - SHIFT) & MASK;
    311 			__sync_set_child(next, idx, p);
    312 			p->parent = next;
    313 
    314 			/* Ascend to the join */
    315 			p = next;
    316 		} else {
    317 			if (!p->height)
    318 				break;
    319 		}
    320 
    321 		/* Descend into the next layer */
    322 		GEM_BUG_ON(!p->height);
    323 		idx = __sync_branch_idx(p, id);
    324 		next = __sync_child(p)[idx];
    325 		if (!next) {
    326 			next = __sync_alloc_leaf(p, id);
    327 			if (unlikely(!next))
    328 				return -ENOMEM;
    329 
    330 			__sync_set_child(p, idx, next);
    331 			p = next;
    332 			break;
    333 		}
    334 
    335 		p = next;
    336 	} while (1);
    337 
    338 found:
    339 	GEM_BUG_ON(p->prefix != __sync_leaf_prefix(p, id));
    340 	__sync_set_seqno(p, id, seqno);
    341 	*root = p;
    342 	return 0;
    343 }
    344 
    345 /**
    346  * i915_syncmap_set -- mark the most recent syncpoint between contexts
    347  * @root: pointer to the #i915_syncmap
    348  * @id: the context id (other timeline) we have synchronised to
    349  * @seqno: the sequence number along the other timeline
    350  *
    351  * When we synchronise this @root timeline with another (@id), we also know
    352  * that we have synchronized with all previous seqno along that timeline. If
    353  * we then have a request to synchronise with the same seqno or older, we can
    354  * omit it, see i915_syncmap_is_later()
    355  *
    356  * Returns 0 on success, or a negative error code.
    357  */
    358 int i915_syncmap_set(struct i915_syncmap **root, u64 id, u32 seqno)
    359 {
    360 	struct i915_syncmap *p = *root;
    361 
    362 	/*
    363 	 * We expect to be called in sequence following is_later(id), which
    364 	 * should have preloaded the root for us.
    365 	 */
    366 	if (likely(p && __sync_leaf_prefix(p, id) == p->prefix)) {
    367 		__sync_set_seqno(p, id, seqno);
    368 		return 0;
    369 	}
    370 
    371 	return __sync_set(root, id, seqno);
    372 }
    373 
    374 static void __sync_free(struct i915_syncmap *p)
    375 {
    376 	if (p->height) {
    377 		unsigned int i;
    378 
    379 		while ((i = ffs(p->bitmap))) {
    380 			p->bitmap &= ~0u << i;
    381 			__sync_free(__sync_child(p)[i - 1]);
    382 		}
    383 	}
    384 
    385 	kfree(p);
    386 }
    387 
    388 /**
    389  * i915_syncmap_free -- free all memory associated with the syncmap
    390  * @root: pointer to the #i915_syncmap
    391  *
    392  * Either when the timeline is to be freed and we no longer need the sync
    393  * point tracking, or when the fences are all known to be signaled and the
    394  * sync point tracking is redundant, we can free the #i915_syncmap to recover
    395  * its allocations.
    396  *
    397  * Will reinitialise the @root pointer so that the #i915_syncmap is ready for
    398  * reuse.
    399  */
    400 void i915_syncmap_free(struct i915_syncmap **root)
    401 {
    402 	struct i915_syncmap *p;
    403 
    404 	p = *root;
    405 	if (!p)
    406 		return;
    407 
    408 	while (p->parent)
    409 		p = p->parent;
    410 
    411 	__sync_free(p);
    412 	*root = NULL;
    413 }
    414 
    415 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
    416 #include "selftests/i915_syncmap.c"
    417 #endif
    418