Home | History | Annotate | Line # | Download | only in gt
      1 /*	$NetBSD: intel_lrc.c,v 1.8 2021/12/19 12:32:15 riastradh Exp $	*/
      2 
      3 /*
      4  * Copyright  2014 Intel Corporation
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the "Software"),
      8  * to deal in the Software without restriction, including without limitation
      9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     10  * and/or sell copies of the Software, and to permit persons to whom the
     11  * Software is furnished to do so, subject to the following conditions:
     12  *
     13  * The above copyright notice and this permission notice (including the next
     14  * paragraph) shall be included in all copies or substantial portions of the
     15  * Software.
     16  *
     17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     22  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     23  * IN THE SOFTWARE.
     24  *
     25  * Authors:
     26  *    Ben Widawsky <ben (at) bwidawsk.net>
     27  *    Michel Thierry <michel.thierry (at) intel.com>
     28  *    Thomas Daniel <thomas.daniel (at) intel.com>
     29  *    Oscar Mateo <oscar.mateo (at) intel.com>
     30  *
     31  */
     32 
     33 /**
     34  * DOC: Logical Rings, Logical Ring Contexts and Execlists
     35  *
     36  * Motivation:
     37  * GEN8 brings an expansion of the HW contexts: "Logical Ring Contexts".
     38  * These expanded contexts enable a number of new abilities, especially
     39  * "Execlists" (also implemented in this file).
     40  *
     41  * One of the main differences with the legacy HW contexts is that logical
     42  * ring contexts incorporate many more things to the context's state, like
     43  * PDPs or ringbuffer control registers:
     44  *
     45  * The reason why PDPs are included in the context is straightforward: as
     46  * PPGTTs (per-process GTTs) are actually per-context, having the PDPs
     47  * contained there mean you don't need to do a ppgtt->switch_mm yourself,
     48  * instead, the GPU will do it for you on the context switch.
     49  *
     50  * But, what about the ringbuffer control registers (head, tail, etc..)?
     51  * shouldn't we just need a set of those per engine command streamer? This is
     52  * where the name "Logical Rings" starts to make sense: by virtualizing the
     53  * rings, the engine cs shifts to a new "ring buffer" with every context
     54  * switch. When you want to submit a workload to the GPU you: A) choose your
     55  * context, B) find its appropriate virtualized ring, C) write commands to it
     56  * and then, finally, D) tell the GPU to switch to that context.
     57  *
     58  * Instead of the legacy MI_SET_CONTEXT, the way you tell the GPU to switch
     59  * to a contexts is via a context execution list, ergo "Execlists".
     60  *
     61  * LRC implementation:
     62  * Regarding the creation of contexts, we have:
     63  *
     64  * - One global default context.
     65  * - One local default context for each opened fd.
     66  * - One local extra context for each context create ioctl call.
     67  *
     68  * Now that ringbuffers belong per-context (and not per-engine, like before)
     69  * and that contexts are uniquely tied to a given engine (and not reusable,
     70  * like before) we need:
     71  *
     72  * - One ringbuffer per-engine inside each context.
     73  * - One backing object per-engine inside each context.
     74  *
     75  * The global default context starts its life with these new objects fully
     76  * allocated and populated. The local default context for each opened fd is
     77  * more complex, because we don't know at creation time which engine is going
     78  * to use them. To handle this, we have implemented a deferred creation of LR
     79  * contexts:
     80  *
     81  * The local context starts its life as a hollow or blank holder, that only
     82  * gets populated for a given engine once we receive an execbuffer. If later
     83  * on we receive another execbuffer ioctl for the same context but a different
     84  * engine, we allocate/populate a new ringbuffer and context backing object and
     85  * so on.
     86  *
     87  * Finally, regarding local contexts created using the ioctl call: as they are
     88  * only allowed with the render ring, we can allocate & populate them right
     89  * away (no need to defer anything, at least for now).
     90  *
     91  * Execlists implementation:
     92  * Execlists are the new method by which, on gen8+ hardware, workloads are
     93  * submitted for execution (as opposed to the legacy, ringbuffer-based, method).
     94  * This method works as follows:
     95  *
     96  * When a request is committed, its commands (the BB start and any leading or
     97  * trailing commands, like the seqno breadcrumbs) are placed in the ringbuffer
     98  * for the appropriate context. The tail pointer in the hardware context is not
     99  * updated at this time, but instead, kept by the driver in the ringbuffer
    100  * structure. A structure representing this request is added to a request queue
    101  * for the appropriate engine: this structure contains a copy of the context's
    102  * tail after the request was written to the ring buffer and a pointer to the
    103  * context itself.
    104  *
    105  * If the engine's request queue was empty before the request was added, the
    106  * queue is processed immediately. Otherwise the queue will be processed during
    107  * a context switch interrupt. In any case, elements on the queue will get sent
    108  * (in pairs) to the GPU's ExecLists Submit Port (ELSP, for short) with a
    109  * globally unique 20-bits submission ID.
    110  *
    111  * When execution of a request completes, the GPU updates the context status
    112  * buffer with a context complete event and generates a context switch interrupt.
    113  * During the interrupt handling, the driver examines the events in the buffer:
    114  * for each context complete event, if the announced ID matches that on the head
    115  * of the request queue, then that request is retired and removed from the queue.
    116  *
    117  * After processing, if any requests were retired and the queue is not empty
    118  * then a new execution list can be submitted. The two requests at the front of
    119  * the queue are next to be submitted but since a context may not occur twice in
    120  * an execution list, if subsequent requests have the same ID as the first then
    121  * the two requests must be combined. This is done simply by discarding requests
    122  * at the head of the queue until either only one requests is left (in which case
    123  * we use a NULL second context) or the first two requests have unique IDs.
    124  *
    125  * By always executing the first two requests in the queue the driver ensures
    126  * that the GPU is kept as busy as possible. In the case where a single context
    127  * completes but a second context is still executing, the request for this second
    128  * context will be at the head of the queue when we remove the first one. This
    129  * request will then be resubmitted along with a new request for a different context,
    130  * which will cause the hardware to continue executing the second request and queue
    131  * the new request (the GPU detects the condition of a context getting preempted
    132  * with the same context and optimizes the context switch flow by not doing
    133  * preemption, but just sampling the new tail pointer).
    134  *
    135  */
    136 #include <sys/cdefs.h>
    137 __KERNEL_RCSID(0, "$NetBSD: intel_lrc.c,v 1.8 2021/12/19 12:32:15 riastradh Exp $");
    138 
    139 #include <linux/interrupt.h>
    140 
    141 #include "i915_drv.h"
    142 #include "i915_perf.h"
    143 #include "i915_trace.h"
    144 #include "i915_vgpu.h"
    145 #include "intel_context.h"
    146 #include "intel_engine_pm.h"
    147 #include "intel_gt.h"
    148 #include "intel_gt_pm.h"
    149 #include "intel_gt_requests.h"
    150 #include "intel_lrc_reg.h"
    151 #include "intel_mocs.h"
    152 #include "intel_reset.h"
    153 #include "intel_ring.h"
    154 #include "intel_workarounds.h"
    155 
    156 #include <linux/nbsd-namespace.h>
    157 
    158 #define RING_EXECLIST_QFULL		(1 << 0x2)
    159 #define RING_EXECLIST1_VALID		(1 << 0x3)
    160 #define RING_EXECLIST0_VALID		(1 << 0x4)
    161 #define RING_EXECLIST_ACTIVE_STATUS	(3 << 0xE)
    162 #define RING_EXECLIST1_ACTIVE		(1 << 0x11)
    163 #define RING_EXECLIST0_ACTIVE		(1 << 0x12)
    164 
    165 #define GEN8_CTX_STATUS_IDLE_ACTIVE	(1 << 0)
    166 #define GEN8_CTX_STATUS_PREEMPTED	(1 << 1)
    167 #define GEN8_CTX_STATUS_ELEMENT_SWITCH	(1 << 2)
    168 #define GEN8_CTX_STATUS_ACTIVE_IDLE	(1 << 3)
    169 #define GEN8_CTX_STATUS_COMPLETE	(1 << 4)
    170 #define GEN8_CTX_STATUS_LITE_RESTORE	(1 << 15)
    171 
    172 #define GEN8_CTX_STATUS_COMPLETED_MASK \
    173 	 (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
    174 
    175 #define CTX_DESC_FORCE_RESTORE BIT_ULL(2)
    176 
    177 #define GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE	(0x1) /* lower csb dword */
    178 #define GEN12_CTX_SWITCH_DETAIL(csb_dw)	((csb_dw) & 0xF) /* upper csb dword */
    179 #define GEN12_CSB_SW_CTX_ID_MASK		GENMASK(25, 15)
    180 #define GEN12_IDLE_CTX_ID		0x7FF
    181 #define GEN12_CSB_CTX_VALID(csb_dw) \
    182 	(FIELD_GET(GEN12_CSB_SW_CTX_ID_MASK, csb_dw) != GEN12_IDLE_CTX_ID)
    183 
    184 /* Typical size of the average request (2 pipecontrols and a MI_BB) */
    185 #define EXECLISTS_REQUEST_SIZE 64 /* bytes */
    186 #define WA_TAIL_DWORDS 2
    187 #define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS)
    188 
    189 struct virtual_engine {
    190 	struct intel_engine_cs base;
    191 	struct intel_context context;
    192 
    193 	/*
    194 	 * We allow only a single request through the virtual engine at a time
    195 	 * (each request in the timeline waits for the completion fence of
    196 	 * the previous before being submitted). By restricting ourselves to
    197 	 * only submitting a single request, each request is placed on to a
    198 	 * physical to maximise load spreading (by virtue of the late greedy
    199 	 * scheduling -- each real engine takes the next available request
    200 	 * upon idling).
    201 	 */
    202 	struct i915_request *request;
    203 
    204 	/*
    205 	 * We keep a rbtree of available virtual engines inside each physical
    206 	 * engine, sorted by priority. Here we preallocate the nodes we need
    207 	 * for the virtual engine, indexed by physical_engine->id.
    208 	 */
    209 	struct ve_node {
    210 		struct rb_node rb;
    211 		int prio;
    212 		uint64_t order;
    213 		bool inserted;
    214 	} nodes[I915_NUM_ENGINES];
    215 	uint64_t order;
    216 
    217 	/*
    218 	 * Keep track of bonded pairs -- restrictions upon on our selection
    219 	 * of physical engines any particular request may be submitted to.
    220 	 * If we receive a submit-fence from a master engine, we will only
    221 	 * use one of sibling_mask physical engines.
    222 	 */
    223 	struct ve_bond {
    224 		const struct intel_engine_cs *master;
    225 		intel_engine_mask_t sibling_mask;
    226 	} *bonds;
    227 	unsigned int num_bonds;
    228 
    229 	/* And finally, which physical engines this virtual engine maps onto. */
    230 	unsigned int num_siblings;
    231 	struct intel_engine_cs *siblings[0];
    232 };
    233 
    234 #ifdef __NetBSD__
    235 static int
    236 compare_ve_nodes(void *cookie, const void *va, const void *vb)
    237 {
    238 	const struct ve_node *na = va;
    239 	const struct ve_node *nb = vb;
    240 
    241 	if (na->prio < nb->prio)
    242 		return -1;
    243 	if (na->prio > nb->prio)
    244 		return +1;
    245 	if (na->order < nb->order)
    246 		return -1;
    247 	if (na->order > nb->order)
    248 		return +1;
    249 	return 0;
    250 }
    251 
    252 static int
    253 compare_ve_node_key(void *cookie, const void *vn, const void *vk)
    254 {
    255 	const struct ve_node *n = vn;
    256 	const int *k = vk;
    257 
    258 	if (n->prio < *k)
    259 		return -1;
    260 	if (n->prio > *k)
    261 		return +1;
    262 	return 0;
    263 }
    264 
    265 static const rb_tree_ops_t ve_tree_ops = {
    266 	.rbto_compare_nodes = compare_ve_nodes,
    267 	.rbto_compare_key = compare_ve_node_key,
    268 	.rbto_node_offset = offsetof(struct ve_node, rb),
    269 };
    270 #endif
    271 
    272 static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
    273 {
    274 	GEM_BUG_ON(!intel_engine_is_virtual(engine));
    275 	return container_of(engine, struct virtual_engine, base);
    276 }
    277 
    278 static int __execlists_context_alloc(struct intel_context *ce,
    279 				     struct intel_engine_cs *engine);
    280 
    281 static void execlists_init_reg_state(u32 *reg_state,
    282 				     const struct intel_context *ce,
    283 				     const struct intel_engine_cs *engine,
    284 				     const struct intel_ring *ring,
    285 				     bool close);
    286 static void
    287 __execlists_update_reg_state(const struct intel_context *ce,
    288 			     const struct intel_engine_cs *engine,
    289 			     u32 head);
    290 
    291 static void mark_eio(struct i915_request *rq)
    292 {
    293 	if (i915_request_completed(rq))
    294 		return;
    295 
    296 	GEM_BUG_ON(i915_request_signaled(rq));
    297 
    298 	dma_fence_set_error(&rq->fence, -EIO);
    299 	i915_request_mark_complete(rq);
    300 }
    301 
    302 static struct i915_request *
    303 active_request(const struct intel_timeline * const tl, struct i915_request *rq)
    304 {
    305 	struct i915_request *active = rq;
    306 
    307 	rcu_read_lock();
    308 	list_for_each_entry_continue_reverse(rq, &tl->requests, link) {
    309 		if (i915_request_completed(rq))
    310 			break;
    311 
    312 		active = rq;
    313 	}
    314 	rcu_read_unlock();
    315 
    316 	return active;
    317 }
    318 
    319 static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine)
    320 {
    321 	return (i915_ggtt_offset(engine->status_page.vma) +
    322 		I915_GEM_HWS_PREEMPT_ADDR);
    323 }
    324 
    325 static inline void
    326 ring_set_paused(const struct intel_engine_cs *engine, int state)
    327 {
    328 	/*
    329 	 * We inspect HWS_PREEMPT with a semaphore inside
    330 	 * engine->emit_fini_breadcrumb. If the dword is true,
    331 	 * the ring is paused as the semaphore will busywait
    332 	 * until the dword is false.
    333 	 */
    334 	engine->status_page.addr[I915_GEM_HWS_PREEMPT] = state;
    335 	if (state)
    336 		wmb();
    337 }
    338 
    339 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
    340 {
    341 	return rb_entry(rb, struct i915_priolist, node);
    342 }
    343 
    344 static inline int rq_prio(const struct i915_request *rq)
    345 {
    346 	return rq->sched.attr.priority;
    347 }
    348 
    349 static int effective_prio(const struct i915_request *rq)
    350 {
    351 	int prio = rq_prio(rq);
    352 
    353 	/*
    354 	 * If this request is special and must not be interrupted at any
    355 	 * cost, so be it. Note we are only checking the most recent request
    356 	 * in the context and so may be masking an earlier vip request. It
    357 	 * is hoped that under the conditions where nopreempt is used, this
    358 	 * will not matter (i.e. all requests to that context will be
    359 	 * nopreempt for as long as desired).
    360 	 */
    361 	if (i915_request_has_nopreempt(rq))
    362 		prio = I915_PRIORITY_UNPREEMPTABLE;
    363 
    364 	/*
    365 	 * On unwinding the active request, we give it a priority bump
    366 	 * if it has completed waiting on any semaphore. If we know that
    367 	 * the request has already started, we can prevent an unwanted
    368 	 * preempt-to-idle cycle by taking that into account now.
    369 	 */
    370 	if (__i915_request_has_started(rq))
    371 		prio |= I915_PRIORITY_NOSEMAPHORE;
    372 
    373 	/* Restrict mere WAIT boosts from triggering preemption */
    374 	BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK); /* only internal */
    375 	return prio | __NO_PREEMPTION;
    376 }
    377 
    378 static int queue_prio(const struct intel_engine_execlists *execlists)
    379 {
    380 	struct i915_priolist *p;
    381 	struct rb_node *rb;
    382 
    383 	rb = rb_first_cached(&execlists->queue);
    384 	if (!rb)
    385 		return INT_MIN;
    386 
    387 	/*
    388 	 * As the priolist[] are inverted, with the highest priority in [0],
    389 	 * we have to flip the index value to become priority.
    390 	 */
    391 	p = to_priolist(rb);
    392 	return ((p->priority + 1) << I915_USER_PRIORITY_SHIFT) - ffs(p->used);
    393 }
    394 
    395 static inline bool need_preempt(const struct intel_engine_cs *engine,
    396 				const struct i915_request *rq,
    397 				struct rb_node *rb)
    398 {
    399 	int last_prio;
    400 
    401 	if (!intel_engine_has_semaphores(engine))
    402 		return false;
    403 
    404 	/*
    405 	 * Check if the current priority hint merits a preemption attempt.
    406 	 *
    407 	 * We record the highest value priority we saw during rescheduling
    408 	 * prior to this dequeue, therefore we know that if it is strictly
    409 	 * less than the current tail of ESLP[0], we do not need to force
    410 	 * a preempt-to-idle cycle.
    411 	 *
    412 	 * However, the priority hint is a mere hint that we may need to
    413 	 * preempt. If that hint is stale or we may be trying to preempt
    414 	 * ourselves, ignore the request.
    415 	 *
    416 	 * More naturally we would write
    417 	 *      prio >= max(0, last);
    418 	 * except that we wish to prevent triggering preemption at the same
    419 	 * priority level: the task that is running should remain running
    420 	 * to preserve FIFO ordering of dependencies.
    421 	 */
    422 	last_prio = max(effective_prio(rq), I915_PRIORITY_NORMAL - 1);
    423 	if (engine->execlists.queue_priority_hint <= last_prio)
    424 		return false;
    425 
    426 	/*
    427 	 * Check against the first request in ELSP[1], it will, thanks to the
    428 	 * power of PI, be the highest priority of that context.
    429 	 */
    430 	if (!list_is_last(&rq->sched.link, &engine->active.requests) &&
    431 	    rq_prio(list_next_entry(rq, sched.link)) > last_prio)
    432 		return true;
    433 
    434 	if (rb) {
    435 		struct virtual_engine *ve =
    436 			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
    437 		bool preempt = false;
    438 
    439 		if (engine == ve->siblings[0]) { /* only preempt one sibling */
    440 			struct i915_request *next;
    441 
    442 			rcu_read_lock();
    443 			next = READ_ONCE(ve->request);
    444 			if (next)
    445 				preempt = rq_prio(next) > last_prio;
    446 			rcu_read_unlock();
    447 		}
    448 
    449 		if (preempt)
    450 			return preempt;
    451 	}
    452 
    453 	/*
    454 	 * If the inflight context did not trigger the preemption, then maybe
    455 	 * it was the set of queued requests? Pick the highest priority in
    456 	 * the queue (the first active priolist) and see if it deserves to be
    457 	 * running instead of ELSP[0].
    458 	 *
    459 	 * The highest priority request in the queue can not be either
    460 	 * ELSP[0] or ELSP[1] as, thanks again to PI, if it was the same
    461 	 * context, it's priority would not exceed ELSP[0] aka last_prio.
    462 	 */
    463 	return queue_prio(&engine->execlists) > last_prio;
    464 }
    465 
    466 __maybe_unused static inline bool
    467 assert_priority_queue(const struct i915_request *prev,
    468 		      const struct i915_request *next)
    469 {
    470 	/*
    471 	 * Without preemption, the prev may refer to the still active element
    472 	 * which we refuse to let go.
    473 	 *
    474 	 * Even with preemption, there are times when we think it is better not
    475 	 * to preempt and leave an ostensibly lower priority request in flight.
    476 	 */
    477 	if (i915_request_is_active(prev))
    478 		return true;
    479 
    480 	return rq_prio(prev) >= rq_prio(next);
    481 }
    482 
    483 /*
    484  * The context descriptor encodes various attributes of a context,
    485  * including its GTT address and some flags. Because it's fairly
    486  * expensive to calculate, we'll just do it once and cache the result,
    487  * which remains valid until the context is unpinned.
    488  *
    489  * This is what a descriptor looks like, from LSB to MSB::
    490  *
    491  *      bits  0-11:    flags, GEN8_CTX_* (cached in ctx->desc_template)
    492  *      bits 12-31:    LRCA, GTT address of (the HWSP of) this context
    493  *      bits 32-52:    ctx ID, a globally unique tag (highest bit used by GuC)
    494  *      bits 53-54:    mbz, reserved for use by hardware
    495  *      bits 55-63:    group ID, currently unused and set to 0
    496  *
    497  * Starting from Gen11, the upper dword of the descriptor has a new format:
    498  *
    499  *      bits 32-36:    reserved
    500  *      bits 37-47:    SW context ID
    501  *      bits 48:53:    engine instance
    502  *      bit 54:        mbz, reserved for use by hardware
    503  *      bits 55-60:    SW counter
    504  *      bits 61-63:    engine class
    505  *
    506  * engine info, SW context ID and SW counter need to form a unique number
    507  * (Context ID) per lrc.
    508  */
    509 static u64
    510 lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
    511 {
    512 	u64 desc;
    513 
    514 	desc = INTEL_LEGACY_32B_CONTEXT;
    515 	if (i915_vm_is_4lvl(ce->vm))
    516 		desc = INTEL_LEGACY_64B_CONTEXT;
    517 	desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT;
    518 
    519 	desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
    520 	if (IS_GEN(engine->i915, 8))
    521 		desc |= GEN8_CTX_L3LLC_COHERENT;
    522 
    523 	desc |= i915_ggtt_offset(ce->state); /* bits 12-31 */
    524 	/*
    525 	 * The following 32bits are copied into the OA reports (dword 2).
    526 	 * Consider updating oa_get_render_ctx_id in i915_perf.c when changing
    527 	 * anything below.
    528 	 */
    529 	if (INTEL_GEN(engine->i915) >= 11) {
    530 		desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT;
    531 								/* bits 48-53 */
    532 
    533 		desc |= (u64)engine->class << GEN11_ENGINE_CLASS_SHIFT;
    534 								/* bits 61-63 */
    535 	}
    536 
    537 	return desc;
    538 }
    539 
    540 static inline unsigned int dword_in_page(void *addr)
    541 {
    542 	return offset_in_page(addr) / sizeof(u32);
    543 }
    544 
    545 static void set_offsets(u32 *regs,
    546 			const u8 *data,
    547 			const struct intel_engine_cs *engine,
    548 			bool clear)
    549 #define NOP(x) (BIT(7) | (x))
    550 #define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6)))
    551 #define POSTED BIT(0)
    552 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
    553 #define REG16(x) \
    554 	(((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
    555 	(((x) >> 2) & 0x7f)
    556 #define END(x) 0, (x)
    557 {
    558 	const u32 base = engine->mmio_base;
    559 
    560 	while (*data) {
    561 		u8 count, flags;
    562 
    563 		if (*data & BIT(7)) { /* skip */
    564 			count = *data++ & ~BIT(7);
    565 			if (clear)
    566 				memset32(regs, MI_NOOP, count);
    567 			regs += count;
    568 			continue;
    569 		}
    570 
    571 		count = *data & 0x3f;
    572 		flags = *data >> 6;
    573 		data++;
    574 
    575 		*regs = MI_LOAD_REGISTER_IMM(count);
    576 		if (flags & POSTED)
    577 			*regs |= MI_LRI_FORCE_POSTED;
    578 		if (INTEL_GEN(engine->i915) >= 11)
    579 			*regs |= MI_LRI_CS_MMIO;
    580 		regs++;
    581 
    582 		GEM_BUG_ON(!count);
    583 		do {
    584 			u32 offset = 0;
    585 			u8 v;
    586 
    587 			do {
    588 				v = *data++;
    589 				offset <<= 7;
    590 				offset |= v & ~BIT(7);
    591 			} while (v & BIT(7));
    592 
    593 			regs[0] = base + (offset << 2);
    594 			if (clear)
    595 				regs[1] = 0;
    596 			regs += 2;
    597 		} while (--count);
    598 	}
    599 
    600 	if (clear) {
    601 		u8 count = *++data;
    602 
    603 		/* Clear past the tail for HW access */
    604 		GEM_BUG_ON(dword_in_page(regs) > count);
    605 		memset32(regs, MI_NOOP, count - dword_in_page(regs));
    606 
    607 		/* Close the batch; used mainly by live_lrc_layout() */
    608 		*regs = MI_BATCH_BUFFER_END;
    609 		if (INTEL_GEN(engine->i915) >= 10)
    610 			*regs |= BIT(0);
    611 	}
    612 }
    613 
    614 static const u8 gen8_xcs_offsets[] = {
    615 	NOP(1),
    616 	LRI(11, 0),
    617 	REG16(0x244),
    618 	REG(0x034),
    619 	REG(0x030),
    620 	REG(0x038),
    621 	REG(0x03c),
    622 	REG(0x168),
    623 	REG(0x140),
    624 	REG(0x110),
    625 	REG(0x11c),
    626 	REG(0x114),
    627 	REG(0x118),
    628 
    629 	NOP(9),
    630 	LRI(9, 0),
    631 	REG16(0x3a8),
    632 	REG16(0x28c),
    633 	REG16(0x288),
    634 	REG16(0x284),
    635 	REG16(0x280),
    636 	REG16(0x27c),
    637 	REG16(0x278),
    638 	REG16(0x274),
    639 	REG16(0x270),
    640 
    641 	NOP(13),
    642 	LRI(2, 0),
    643 	REG16(0x200),
    644 	REG(0x028),
    645 
    646 	END(80)
    647 };
    648 
    649 static const u8 gen9_xcs_offsets[] = {
    650 	NOP(1),
    651 	LRI(14, POSTED),
    652 	REG16(0x244),
    653 	REG(0x034),
    654 	REG(0x030),
    655 	REG(0x038),
    656 	REG(0x03c),
    657 	REG(0x168),
    658 	REG(0x140),
    659 	REG(0x110),
    660 	REG(0x11c),
    661 	REG(0x114),
    662 	REG(0x118),
    663 	REG(0x1c0),
    664 	REG(0x1c4),
    665 	REG(0x1c8),
    666 
    667 	NOP(3),
    668 	LRI(9, POSTED),
    669 	REG16(0x3a8),
    670 	REG16(0x28c),
    671 	REG16(0x288),
    672 	REG16(0x284),
    673 	REG16(0x280),
    674 	REG16(0x27c),
    675 	REG16(0x278),
    676 	REG16(0x274),
    677 	REG16(0x270),
    678 
    679 	NOP(13),
    680 	LRI(1, POSTED),
    681 	REG16(0x200),
    682 
    683 	NOP(13),
    684 	LRI(44, POSTED),
    685 	REG(0x028),
    686 	REG(0x09c),
    687 	REG(0x0c0),
    688 	REG(0x178),
    689 	REG(0x17c),
    690 	REG16(0x358),
    691 	REG(0x170),
    692 	REG(0x150),
    693 	REG(0x154),
    694 	REG(0x158),
    695 	REG16(0x41c),
    696 	REG16(0x600),
    697 	REG16(0x604),
    698 	REG16(0x608),
    699 	REG16(0x60c),
    700 	REG16(0x610),
    701 	REG16(0x614),
    702 	REG16(0x618),
    703 	REG16(0x61c),
    704 	REG16(0x620),
    705 	REG16(0x624),
    706 	REG16(0x628),
    707 	REG16(0x62c),
    708 	REG16(0x630),
    709 	REG16(0x634),
    710 	REG16(0x638),
    711 	REG16(0x63c),
    712 	REG16(0x640),
    713 	REG16(0x644),
    714 	REG16(0x648),
    715 	REG16(0x64c),
    716 	REG16(0x650),
    717 	REG16(0x654),
    718 	REG16(0x658),
    719 	REG16(0x65c),
    720 	REG16(0x660),
    721 	REG16(0x664),
    722 	REG16(0x668),
    723 	REG16(0x66c),
    724 	REG16(0x670),
    725 	REG16(0x674),
    726 	REG16(0x678),
    727 	REG16(0x67c),
    728 	REG(0x068),
    729 
    730 	END(176)
    731 };
    732 
    733 static const u8 gen12_xcs_offsets[] = {
    734 	NOP(1),
    735 	LRI(13, POSTED),
    736 	REG16(0x244),
    737 	REG(0x034),
    738 	REG(0x030),
    739 	REG(0x038),
    740 	REG(0x03c),
    741 	REG(0x168),
    742 	REG(0x140),
    743 	REG(0x110),
    744 	REG(0x1c0),
    745 	REG(0x1c4),
    746 	REG(0x1c8),
    747 	REG(0x180),
    748 	REG16(0x2b4),
    749 
    750 	NOP(5),
    751 	LRI(9, POSTED),
    752 	REG16(0x3a8),
    753 	REG16(0x28c),
    754 	REG16(0x288),
    755 	REG16(0x284),
    756 	REG16(0x280),
    757 	REG16(0x27c),
    758 	REG16(0x278),
    759 	REG16(0x274),
    760 	REG16(0x270),
    761 
    762 	END(80)
    763 };
    764 
    765 static const u8 gen8_rcs_offsets[] = {
    766 	NOP(1),
    767 	LRI(14, POSTED),
    768 	REG16(0x244),
    769 	REG(0x034),
    770 	REG(0x030),
    771 	REG(0x038),
    772 	REG(0x03c),
    773 	REG(0x168),
    774 	REG(0x140),
    775 	REG(0x110),
    776 	REG(0x11c),
    777 	REG(0x114),
    778 	REG(0x118),
    779 	REG(0x1c0),
    780 	REG(0x1c4),
    781 	REG(0x1c8),
    782 
    783 	NOP(3),
    784 	LRI(9, POSTED),
    785 	REG16(0x3a8),
    786 	REG16(0x28c),
    787 	REG16(0x288),
    788 	REG16(0x284),
    789 	REG16(0x280),
    790 	REG16(0x27c),
    791 	REG16(0x278),
    792 	REG16(0x274),
    793 	REG16(0x270),
    794 
    795 	NOP(13),
    796 	LRI(1, 0),
    797 	REG(0x0c8),
    798 
    799 	END(80)
    800 };
    801 
    802 static const u8 gen9_rcs_offsets[] = {
    803 	NOP(1),
    804 	LRI(14, POSTED),
    805 	REG16(0x244),
    806 	REG(0x34),
    807 	REG(0x30),
    808 	REG(0x38),
    809 	REG(0x3c),
    810 	REG(0x168),
    811 	REG(0x140),
    812 	REG(0x110),
    813 	REG(0x11c),
    814 	REG(0x114),
    815 	REG(0x118),
    816 	REG(0x1c0),
    817 	REG(0x1c4),
    818 	REG(0x1c8),
    819 
    820 	NOP(3),
    821 	LRI(9, POSTED),
    822 	REG16(0x3a8),
    823 	REG16(0x28c),
    824 	REG16(0x288),
    825 	REG16(0x284),
    826 	REG16(0x280),
    827 	REG16(0x27c),
    828 	REG16(0x278),
    829 	REG16(0x274),
    830 	REG16(0x270),
    831 
    832 	NOP(13),
    833 	LRI(1, 0),
    834 	REG(0xc8),
    835 
    836 	NOP(13),
    837 	LRI(44, POSTED),
    838 	REG(0x28),
    839 	REG(0x9c),
    840 	REG(0xc0),
    841 	REG(0x178),
    842 	REG(0x17c),
    843 	REG16(0x358),
    844 	REG(0x170),
    845 	REG(0x150),
    846 	REG(0x154),
    847 	REG(0x158),
    848 	REG16(0x41c),
    849 	REG16(0x600),
    850 	REG16(0x604),
    851 	REG16(0x608),
    852 	REG16(0x60c),
    853 	REG16(0x610),
    854 	REG16(0x614),
    855 	REG16(0x618),
    856 	REG16(0x61c),
    857 	REG16(0x620),
    858 	REG16(0x624),
    859 	REG16(0x628),
    860 	REG16(0x62c),
    861 	REG16(0x630),
    862 	REG16(0x634),
    863 	REG16(0x638),
    864 	REG16(0x63c),
    865 	REG16(0x640),
    866 	REG16(0x644),
    867 	REG16(0x648),
    868 	REG16(0x64c),
    869 	REG16(0x650),
    870 	REG16(0x654),
    871 	REG16(0x658),
    872 	REG16(0x65c),
    873 	REG16(0x660),
    874 	REG16(0x664),
    875 	REG16(0x668),
    876 	REG16(0x66c),
    877 	REG16(0x670),
    878 	REG16(0x674),
    879 	REG16(0x678),
    880 	REG16(0x67c),
    881 	REG(0x68),
    882 
    883 	END(176)
    884 };
    885 
    886 static const u8 gen11_rcs_offsets[] = {
    887 	NOP(1),
    888 	LRI(15, POSTED),
    889 	REG16(0x244),
    890 	REG(0x034),
    891 	REG(0x030),
    892 	REG(0x038),
    893 	REG(0x03c),
    894 	REG(0x168),
    895 	REG(0x140),
    896 	REG(0x110),
    897 	REG(0x11c),
    898 	REG(0x114),
    899 	REG(0x118),
    900 	REG(0x1c0),
    901 	REG(0x1c4),
    902 	REG(0x1c8),
    903 	REG(0x180),
    904 
    905 	NOP(1),
    906 	LRI(9, POSTED),
    907 	REG16(0x3a8),
    908 	REG16(0x28c),
    909 	REG16(0x288),
    910 	REG16(0x284),
    911 	REG16(0x280),
    912 	REG16(0x27c),
    913 	REG16(0x278),
    914 	REG16(0x274),
    915 	REG16(0x270),
    916 
    917 	LRI(1, POSTED),
    918 	REG(0x1b0),
    919 
    920 	NOP(10),
    921 	LRI(1, 0),
    922 	REG(0x0c8),
    923 
    924 	END(80)
    925 };
    926 
    927 static const u8 gen12_rcs_offsets[] = {
    928 	NOP(1),
    929 	LRI(13, POSTED),
    930 	REG16(0x244),
    931 	REG(0x034),
    932 	REG(0x030),
    933 	REG(0x038),
    934 	REG(0x03c),
    935 	REG(0x168),
    936 	REG(0x140),
    937 	REG(0x110),
    938 	REG(0x1c0),
    939 	REG(0x1c4),
    940 	REG(0x1c8),
    941 	REG(0x180),
    942 	REG16(0x2b4),
    943 
    944 	NOP(5),
    945 	LRI(9, POSTED),
    946 	REG16(0x3a8),
    947 	REG16(0x28c),
    948 	REG16(0x288),
    949 	REG16(0x284),
    950 	REG16(0x280),
    951 	REG16(0x27c),
    952 	REG16(0x278),
    953 	REG16(0x274),
    954 	REG16(0x270),
    955 
    956 	LRI(3, POSTED),
    957 	REG(0x1b0),
    958 	REG16(0x5a8),
    959 	REG16(0x5ac),
    960 
    961 	NOP(6),
    962 	LRI(1, 0),
    963 	REG(0x0c8),
    964 
    965 	END(80)
    966 };
    967 
    968 #undef END
    969 #undef REG16
    970 #undef REG
    971 #undef LRI
    972 #undef NOP
    973 
    974 static const u8 *reg_offsets(const struct intel_engine_cs *engine)
    975 {
    976 	/*
    977 	 * The gen12+ lists only have the registers we program in the basic
    978 	 * default state. We rely on the context image using relative
    979 	 * addressing to automatic fixup the register state between the
    980 	 * physical engines for virtual engine.
    981 	 */
    982 	GEM_BUG_ON(INTEL_GEN(engine->i915) >= 12 &&
    983 		   !intel_engine_has_relative_mmio(engine));
    984 
    985 	if (engine->class == RENDER_CLASS) {
    986 		if (INTEL_GEN(engine->i915) >= 12)
    987 			return gen12_rcs_offsets;
    988 		else if (INTEL_GEN(engine->i915) >= 11)
    989 			return gen11_rcs_offsets;
    990 		else if (INTEL_GEN(engine->i915) >= 9)
    991 			return gen9_rcs_offsets;
    992 		else
    993 			return gen8_rcs_offsets;
    994 	} else {
    995 		if (INTEL_GEN(engine->i915) >= 12)
    996 			return gen12_xcs_offsets;
    997 		else if (INTEL_GEN(engine->i915) >= 9)
    998 			return gen9_xcs_offsets;
    999 		else
   1000 			return gen8_xcs_offsets;
   1001 	}
   1002 }
   1003 
   1004 static struct i915_request *
   1005 __unwind_incomplete_requests(struct intel_engine_cs *engine)
   1006 {
   1007 	struct i915_request *rq, *rn, *active = NULL;
   1008 	struct list_head *uninitialized_var(pl);
   1009 	int prio = I915_PRIORITY_INVALID;
   1010 
   1011 	lockdep_assert_held(&engine->active.lock);
   1012 
   1013 	list_for_each_entry_safe_reverse(rq, rn,
   1014 					 &engine->active.requests,
   1015 					 sched.link) {
   1016 		if (i915_request_completed(rq))
   1017 			continue; /* XXX */
   1018 
   1019 		__i915_request_unsubmit(rq);
   1020 
   1021 		/*
   1022 		 * Push the request back into the queue for later resubmission.
   1023 		 * If this request is not native to this physical engine (i.e.
   1024 		 * it came from a virtual source), push it back onto the virtual
   1025 		 * engine so that it can be moved across onto another physical
   1026 		 * engine as load dictates.
   1027 		 */
   1028 		if (likely(rq->execution_mask == engine->mask)) {
   1029 			GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
   1030 			if (rq_prio(rq) != prio) {
   1031 				prio = rq_prio(rq);
   1032 				pl = i915_sched_lookup_priolist(engine, prio);
   1033 			}
   1034 			GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
   1035 
   1036 			list_move(&rq->sched.link, pl);
   1037 			set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
   1038 
   1039 			active = rq;
   1040 		} else {
   1041 			struct intel_engine_cs *owner = rq->context->engine;
   1042 
   1043 			/*
   1044 			 * Decouple the virtual breadcrumb before moving it
   1045 			 * back to the virtual engine -- we don't want the
   1046 			 * request to complete in the background and try
   1047 			 * and cancel the breadcrumb on the virtual engine
   1048 			 * (instead of the old engine where it is linked)!
   1049 			 */
   1050 			if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
   1051 				     &rq->fence.flags)) {
   1052 				spin_lock_nested(&rq->lock,
   1053 						 SINGLE_DEPTH_NESTING);
   1054 				i915_request_cancel_breadcrumb(rq);
   1055 				spin_unlock(&rq->lock);
   1056 			}
   1057 			rq->engine = owner;
   1058 			owner->submit_request(rq);
   1059 			active = NULL;
   1060 		}
   1061 	}
   1062 
   1063 	return active;
   1064 }
   1065 
   1066 struct i915_request *
   1067 execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
   1068 {
   1069 	struct intel_engine_cs *engine =
   1070 		container_of(execlists, typeof(*engine), execlists);
   1071 
   1072 	return __unwind_incomplete_requests(engine);
   1073 }
   1074 
   1075 static inline void
   1076 execlists_context_status_change(struct i915_request *rq, unsigned long status)
   1077 {
   1078 	/*
   1079 	 * Only used when GVT-g is enabled now. When GVT-g is disabled,
   1080 	 * The compiler should eliminate this function as dead-code.
   1081 	 */
   1082 	if (!IS_ENABLED(CONFIG_DRM_I915_GVT))
   1083 		return;
   1084 
   1085 	atomic_notifier_call_chain(&rq->engine->context_status_notifier,
   1086 				   status, rq);
   1087 }
   1088 
   1089 static void intel_engine_context_in(struct intel_engine_cs *engine)
   1090 {
   1091 	unsigned long flags;
   1092 
   1093 	if (READ_ONCE(engine->stats.enabled) == 0)
   1094 		return;
   1095 
   1096 	write_seqlock_irqsave(&engine->stats.lock, flags);
   1097 
   1098 	if (engine->stats.enabled > 0) {
   1099 		if (engine->stats.active++ == 0)
   1100 			engine->stats.start = ktime_get();
   1101 		GEM_BUG_ON(engine->stats.active == 0);
   1102 	}
   1103 
   1104 	write_sequnlock_irqrestore(&engine->stats.lock, flags);
   1105 }
   1106 
   1107 static void intel_engine_context_out(struct intel_engine_cs *engine)
   1108 {
   1109 	unsigned long flags;
   1110 
   1111 	if (READ_ONCE(engine->stats.enabled) == 0)
   1112 		return;
   1113 
   1114 	write_seqlock_irqsave(&engine->stats.lock, flags);
   1115 
   1116 	if (engine->stats.enabled > 0) {
   1117 		ktime_t last;
   1118 
   1119 		if (engine->stats.active && --engine->stats.active == 0) {
   1120 			/*
   1121 			 * Decrement the active context count and in case GPU
   1122 			 * is now idle add up to the running total.
   1123 			 */
   1124 			last = ktime_sub(ktime_get(), engine->stats.start);
   1125 
   1126 			engine->stats.total = ktime_add(engine->stats.total,
   1127 							last);
   1128 		} else if (engine->stats.active == 0) {
   1129 			/*
   1130 			 * After turning on engine stats, context out might be
   1131 			 * the first event in which case we account from the
   1132 			 * time stats gathering was turned on.
   1133 			 */
   1134 			last = ktime_sub(ktime_get(), engine->stats.enabled_at);
   1135 
   1136 			engine->stats.total = ktime_add(engine->stats.total,
   1137 							last);
   1138 		}
   1139 	}
   1140 
   1141 	write_sequnlock_irqrestore(&engine->stats.lock, flags);
   1142 }
   1143 
   1144 static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
   1145 {
   1146 	if (INTEL_GEN(engine->i915) >= 12)
   1147 		return 0x60;
   1148 	else if (INTEL_GEN(engine->i915) >= 9)
   1149 		return 0x54;
   1150 	else if (engine->class == RENDER_CLASS)
   1151 		return 0x58;
   1152 	else
   1153 		return -1;
   1154 }
   1155 
   1156 static void
   1157 execlists_check_context(const struct intel_context *ce,
   1158 			const struct intel_engine_cs *engine)
   1159 {
   1160 	const struct intel_ring *ring = ce->ring;
   1161 	u32 *regs = ce->lrc_reg_state;
   1162 	bool valid = true;
   1163 	int x;
   1164 
   1165 	if (regs[CTX_RING_START] != i915_ggtt_offset(ring->vma)) {
   1166 		pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n",
   1167 		       engine->name,
   1168 		       regs[CTX_RING_START],
   1169 		       i915_ggtt_offset(ring->vma));
   1170 		regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
   1171 		valid = false;
   1172 	}
   1173 
   1174 	if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) !=
   1175 	    (RING_CTL_SIZE(ring->size) | RING_VALID)) {
   1176 		pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n",
   1177 		       engine->name,
   1178 		       regs[CTX_RING_CTL],
   1179 		       (u32)(RING_CTL_SIZE(ring->size) | RING_VALID));
   1180 		regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
   1181 		valid = false;
   1182 	}
   1183 
   1184 	x = lrc_ring_mi_mode(engine);
   1185 	if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) {
   1186 		pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n",
   1187 		       engine->name, regs[x + 1]);
   1188 		regs[x + 1] &= ~STOP_RING;
   1189 		regs[x + 1] |= STOP_RING << 16;
   1190 		valid = false;
   1191 	}
   1192 
   1193 	WARN_ONCE(!valid, "Invalid lrc state found before submission\n");
   1194 }
   1195 
   1196 static void restore_default_state(struct intel_context *ce,
   1197 				  struct intel_engine_cs *engine)
   1198 {
   1199 	u32 *regs = ce->lrc_reg_state;
   1200 
   1201 	if (engine->pinned_default_state)
   1202 		memcpy(regs, /* skip restoring the vanilla PPHWSP */
   1203 		       engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
   1204 		       engine->context_size - PAGE_SIZE);
   1205 
   1206 	execlists_init_reg_state(regs, ce, engine, ce->ring, false);
   1207 }
   1208 
   1209 static void reset_active(struct i915_request *rq,
   1210 			 struct intel_engine_cs *engine)
   1211 {
   1212 	struct intel_context * const ce = rq->context;
   1213 	u32 head;
   1214 
   1215 	/*
   1216 	 * The executing context has been cancelled. We want to prevent
   1217 	 * further execution along this context and propagate the error on
   1218 	 * to anything depending on its results.
   1219 	 *
   1220 	 * In __i915_request_submit(), we apply the -EIO and remove the
   1221 	 * requests' payloads for any banned requests. But first, we must
   1222 	 * rewind the context back to the start of the incomplete request so
   1223 	 * that we do not jump back into the middle of the batch.
   1224 	 *
   1225 	 * We preserve the breadcrumbs and semaphores of the incomplete
   1226 	 * requests so that inter-timeline dependencies (i.e other timelines)
   1227 	 * remain correctly ordered. And we defer to __i915_request_submit()
   1228 	 * so that all asynchronous waits are correctly handled.
   1229 	 */
   1230 	ENGINE_TRACE(engine, "{ rq=%llx:%lld }\n",
   1231 		     rq->fence.context, rq->fence.seqno);
   1232 
   1233 	/* On resubmission of the active request, payload will be scrubbed */
   1234 	if (i915_request_completed(rq))
   1235 		head = rq->tail;
   1236 	else
   1237 		head = active_request(ce->timeline, rq)->head;
   1238 	head = intel_ring_wrap(ce->ring, head);
   1239 
   1240 	/* Scrub the context image to prevent replaying the previous batch */
   1241 	restore_default_state(ce, engine);
   1242 	__execlists_update_reg_state(ce, engine, head);
   1243 
   1244 	/* We've switched away, so this should be a no-op, but intent matters */
   1245 	ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
   1246 }
   1247 
   1248 static inline struct intel_engine_cs *
   1249 __execlists_schedule_in(struct i915_request *rq)
   1250 {
   1251 	struct intel_engine_cs * const engine = rq->engine;
   1252 	struct intel_context * const ce = rq->context;
   1253 
   1254 	intel_context_get(ce);
   1255 
   1256 	if (unlikely(intel_context_is_banned(ce)))
   1257 		reset_active(rq, engine);
   1258 
   1259 	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
   1260 		execlists_check_context(ce, engine);
   1261 
   1262 	if (ce->tag) {
   1263 		/* Use a fixed tag for OA and friends */
   1264 		ce->lrc_desc |= (u64)ce->tag << 32;
   1265 	} else {
   1266 		/* We don't need a strict matching tag, just different values */
   1267 		ce->lrc_desc &= ~GENMASK_ULL(47, 37);
   1268 		ce->lrc_desc |=
   1269 			(u64)(++engine->context_tag % NUM_CONTEXT_TAG) <<
   1270 			GEN11_SW_CTX_ID_SHIFT;
   1271 		BUILD_BUG_ON(NUM_CONTEXT_TAG > GEN12_MAX_CONTEXT_HW_ID);
   1272 	}
   1273 
   1274 	__intel_gt_pm_get(engine->gt);
   1275 	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
   1276 	intel_engine_context_in(engine);
   1277 
   1278 	return engine;
   1279 }
   1280 
   1281 static inline struct i915_request *
   1282 execlists_schedule_in(struct i915_request *rq, int idx)
   1283 {
   1284 	struct intel_context * const ce = rq->context;
   1285 	struct intel_engine_cs *old;
   1286 
   1287 	GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine));
   1288 	trace_i915_request_in(rq, idx);
   1289 
   1290 	old = READ_ONCE(ce->inflight);
   1291 	do {
   1292 		if (!old) {
   1293 			WRITE_ONCE(ce->inflight, __execlists_schedule_in(rq));
   1294 			break;
   1295 		}
   1296 	} while (!try_cmpxchg(&ce->inflight, &old, ptr_inc(old)));
   1297 
   1298 	GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
   1299 	return i915_request_get(rq);
   1300 }
   1301 
   1302 static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
   1303 {
   1304 	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
   1305 	struct i915_request *next = READ_ONCE(ve->request);
   1306 
   1307 	if (next && next->execution_mask & ~rq->execution_mask)
   1308 		tasklet_schedule(&ve->base.execlists.tasklet);
   1309 }
   1310 
   1311 static inline void
   1312 __execlists_schedule_out(struct i915_request *rq,
   1313 			 struct intel_engine_cs * const engine)
   1314 {
   1315 	struct intel_context * const ce = rq->context;
   1316 
   1317 	/*
   1318 	 * NB process_csb() is not under the engine->active.lock and hence
   1319 	 * schedule_out can race with schedule_in meaning that we should
   1320 	 * refrain from doing non-trivial work here.
   1321 	 */
   1322 
   1323 	/*
   1324 	 * If we have just completed this context, the engine may now be
   1325 	 * idle and we want to re-enter powersaving.
   1326 	 */
   1327 	if (list_is_last(&rq->link, &ce->timeline->requests) &&
   1328 	    i915_request_completed(rq))
   1329 		intel_engine_add_retire(engine, ce->timeline);
   1330 
   1331 	intel_engine_context_out(engine);
   1332 	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
   1333 	intel_gt_pm_put_async(engine->gt);
   1334 
   1335 	/*
   1336 	 * If this is part of a virtual engine, its next request may
   1337 	 * have been blocked waiting for access to the active context.
   1338 	 * We have to kick all the siblings again in case we need to
   1339 	 * switch (e.g. the next request is not runnable on this
   1340 	 * engine). Hopefully, we will already have submitted the next
   1341 	 * request before the tasklet runs and do not need to rebuild
   1342 	 * each virtual tree and kick everyone again.
   1343 	 */
   1344 	if (ce->engine != engine)
   1345 		kick_siblings(rq, ce);
   1346 
   1347 	intel_context_put(ce);
   1348 }
   1349 
   1350 static inline void
   1351 execlists_schedule_out(struct i915_request *rq)
   1352 {
   1353 	struct intel_context * const ce = rq->context;
   1354 	struct intel_engine_cs *cur, *old;
   1355 
   1356 	trace_i915_request_out(rq);
   1357 
   1358 	old = READ_ONCE(ce->inflight);
   1359 	do
   1360 		cur = ptr_unmask_bits(old, 2) ? ptr_dec(old) : NULL;
   1361 	while (!try_cmpxchg(&ce->inflight, &old, cur));
   1362 	if (!cur)
   1363 		__execlists_schedule_out(rq, old);
   1364 
   1365 	i915_request_put(rq);
   1366 }
   1367 
   1368 static u64 execlists_update_context(struct i915_request *rq)
   1369 {
   1370 	struct intel_context *ce = rq->context;
   1371 	u64 desc = ce->lrc_desc;
   1372 	u32 tail, prev;
   1373 
   1374 	/*
   1375 	 * WaIdleLiteRestore:bdw,skl
   1376 	 *
   1377 	 * We should never submit the context with the same RING_TAIL twice
   1378 	 * just in case we submit an empty ring, which confuses the HW.
   1379 	 *
   1380 	 * We append a couple of NOOPs (gen8_emit_wa_tail) after the end of
   1381 	 * the normal request to be able to always advance the RING_TAIL on
   1382 	 * subsequent resubmissions (for lite restore). Should that fail us,
   1383 	 * and we try and submit the same tail again, force the context
   1384 	 * reload.
   1385 	 *
   1386 	 * If we need to return to a preempted context, we need to skip the
   1387 	 * lite-restore and force it to reload the RING_TAIL. Otherwise, the
   1388 	 * HW has a tendency to ignore us rewinding the TAIL to the end of
   1389 	 * an earlier request.
   1390 	 */
   1391 	tail = intel_ring_set_tail(rq->ring, rq->tail);
   1392 	prev = ce->lrc_reg_state[CTX_RING_TAIL];
   1393 	if (unlikely(intel_ring_direction(rq->ring, tail, prev) <= 0))
   1394 		desc |= CTX_DESC_FORCE_RESTORE;
   1395 	ce->lrc_reg_state[CTX_RING_TAIL] = tail;
   1396 	rq->tail = rq->wa_tail;
   1397 
   1398 	/*
   1399 	 * Make sure the context image is complete before we submit it to HW.
   1400 	 *
   1401 	 * Ostensibly, writes (including the WCB) should be flushed prior to
   1402 	 * an uncached write such as our mmio register access, the empirical
   1403 	 * evidence (esp. on Braswell) suggests that the WC write into memory
   1404 	 * may not be visible to the HW prior to the completion of the UC
   1405 	 * register write and that we may begin execution from the context
   1406 	 * before its image is complete leading to invalid PD chasing.
   1407 	 */
   1408 	wmb();
   1409 
   1410 	ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE;
   1411 	return desc;
   1412 }
   1413 
   1414 static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port)
   1415 {
   1416 #ifdef __NetBSD__
   1417 	if (execlists->ctrl_reg) {
   1418 		bus_space_write_4(execlists->bst, execlists->bsh, execlists->submit_reg + port * 2, lower_32_bits(desc));
   1419 		bus_space_write_4(execlists->bst, execlists->bsh, execlists->submit_reg + port * 2 + 1, upper_32_bits(desc));
   1420 	} else {
   1421 		bus_space_write_4(execlists->bst, execlists->bsh, execlists->submit_reg, upper_32_bits(desc));
   1422 		bus_space_write_4(execlists->bst, execlists->bsh, execlists->submit_reg, lower_32_bits(desc));
   1423 	}
   1424 #else
   1425 	if (execlists->ctrl_reg) {
   1426 		writel(lower_32_bits(desc), execlists->submit_reg + port * 2);
   1427 		writel(upper_32_bits(desc), execlists->submit_reg + port * 2 + 1);
   1428 	} else {
   1429 		writel(upper_32_bits(desc), execlists->submit_reg);
   1430 		writel(lower_32_bits(desc), execlists->submit_reg);
   1431 	}
   1432 #endif
   1433 }
   1434 
   1435 static __maybe_unused void
   1436 trace_ports(const struct intel_engine_execlists *execlists,
   1437 	    const char *msg,
   1438 	    struct i915_request * const *ports)
   1439 {
   1440 	const struct intel_engine_cs *engine =
   1441 		const_container_of(execlists, typeof(*engine), execlists);
   1442 
   1443 	if (!ports[0])
   1444 		return;
   1445 
   1446 	ENGINE_TRACE(engine, "%s { %llx:%lld%s, %llx:%lld }\n", msg,
   1447 		     ports[0]->fence.context,
   1448 		     ports[0]->fence.seqno,
   1449 		     i915_request_completed(ports[0]) ? "!" :
   1450 		     i915_request_started(ports[0]) ? "*" :
   1451 		     "",
   1452 		     ports[1] ? ports[1]->fence.context : 0,
   1453 		     ports[1] ? ports[1]->fence.seqno : 0);
   1454 }
   1455 
   1456 static __maybe_unused bool
   1457 assert_pending_valid(const struct intel_engine_execlists *execlists,
   1458 		     const char *msg)
   1459 {
   1460 	struct i915_request * const *port, *rq;
   1461 	struct intel_context *ce = NULL;
   1462 
   1463 	trace_ports(execlists, msg, execlists->pending);
   1464 
   1465 	if (!execlists->pending[0]) {
   1466 		GEM_TRACE_ERR("Nothing pending for promotion!\n");
   1467 		return false;
   1468 	}
   1469 
   1470 	if (execlists->pending[execlists_num_ports(execlists)]) {
   1471 		GEM_TRACE_ERR("Excess pending[%d] for promotion!\n",
   1472 			      execlists_num_ports(execlists));
   1473 		return false;
   1474 	}
   1475 
   1476 	for (port = execlists->pending; (rq = *port); port++) {
   1477 		unsigned long flags;
   1478 		bool ok = true;
   1479 
   1480 		GEM_BUG_ON(!kref_read(&rq->fence.refcount));
   1481 		GEM_BUG_ON(!i915_request_is_active(rq));
   1482 
   1483 		if (ce == rq->context) {
   1484 			GEM_TRACE_ERR("Dup context:%llx in pending[%zd]\n",
   1485 				      ce->timeline->fence_context,
   1486 				      port - execlists->pending);
   1487 			return false;
   1488 		}
   1489 		ce = rq->context;
   1490 
   1491 		/* Hold tightly onto the lock to prevent concurrent retires! */
   1492 		if (!spin_trylock_irqsave(&rq->lock, flags))
   1493 			continue;
   1494 
   1495 		if (i915_request_completed(rq))
   1496 			goto unlock;
   1497 
   1498 		if (i915_active_is_idle(&ce->active) &&
   1499 		    !intel_context_is_barrier(ce)) {
   1500 			GEM_TRACE_ERR("Inactive context:%llx in pending[%zd]\n",
   1501 				      ce->timeline->fence_context,
   1502 				      port - execlists->pending);
   1503 			ok = false;
   1504 			goto unlock;
   1505 		}
   1506 
   1507 		if (!i915_vma_is_pinned(ce->state)) {
   1508 			GEM_TRACE_ERR("Unpinned context:%llx in pending[%zd]\n",
   1509 				      ce->timeline->fence_context,
   1510 				      port - execlists->pending);
   1511 			ok = false;
   1512 			goto unlock;
   1513 		}
   1514 
   1515 		if (!i915_vma_is_pinned(ce->ring->vma)) {
   1516 			GEM_TRACE_ERR("Unpinned ring:%llx in pending[%zd]\n",
   1517 				      ce->timeline->fence_context,
   1518 				      port - execlists->pending);
   1519 			ok = false;
   1520 			goto unlock;
   1521 		}
   1522 
   1523 unlock:
   1524 		spin_unlock_irqrestore(&rq->lock, flags);
   1525 		if (!ok)
   1526 			return false;
   1527 	}
   1528 
   1529 	return ce;
   1530 }
   1531 
   1532 static void execlists_submit_ports(struct intel_engine_cs *engine)
   1533 {
   1534 	struct intel_engine_execlists *execlists = &engine->execlists;
   1535 	unsigned int n;
   1536 
   1537 	GEM_BUG_ON(!assert_pending_valid(execlists, "submit"));
   1538 
   1539 	/*
   1540 	 * We can skip acquiring intel_runtime_pm_get() here as it was taken
   1541 	 * on our behalf by the request (see i915_gem_mark_busy()) and it will
   1542 	 * not be relinquished until the device is idle (see
   1543 	 * i915_gem_idle_work_handler()). As a precaution, we make sure
   1544 	 * that all ELSP are drained i.e. we have processed the CSB,
   1545 	 * before allowing ourselves to idle and calling intel_runtime_pm_put().
   1546 	 */
   1547 	GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
   1548 
   1549 	/*
   1550 	 * ELSQ note: the submit queue is not cleared after being submitted
   1551 	 * to the HW so we need to make sure we always clean it up. This is
   1552 	 * currently ensured by the fact that we always write the same number
   1553 	 * of elsq entries, keep this in mind before changing the loop below.
   1554 	 */
   1555 	for (n = execlists_num_ports(execlists); n--; ) {
   1556 		struct i915_request *rq = execlists->pending[n];
   1557 
   1558 		write_desc(execlists,
   1559 			   rq ? execlists_update_context(rq) : 0,
   1560 			   n);
   1561 	}
   1562 
   1563 	/* we need to manually load the submit queue */
   1564 	if (execlists->ctrl_reg)
   1565 #ifdef __NetBSD__
   1566 		bus_space_write_4(execlists->bst, execlists->bsh, execlists->ctrl_reg, EL_CTRL_LOAD);
   1567 #else
   1568 		writel(EL_CTRL_LOAD, execlists->ctrl_reg);
   1569 #endif
   1570 }
   1571 
   1572 static bool ctx_single_port_submission(const struct intel_context *ce)
   1573 {
   1574 	return (IS_ENABLED(CONFIG_DRM_I915_GVT) &&
   1575 		intel_context_force_single_submission(ce));
   1576 }
   1577 
   1578 static bool can_merge_ctx(const struct intel_context *prev,
   1579 			  const struct intel_context *next)
   1580 {
   1581 	if (prev != next)
   1582 		return false;
   1583 
   1584 	if (ctx_single_port_submission(prev))
   1585 		return false;
   1586 
   1587 	return true;
   1588 }
   1589 
   1590 static bool can_merge_rq(const struct i915_request *prev,
   1591 			 const struct i915_request *next)
   1592 {
   1593 	GEM_BUG_ON(prev == next);
   1594 	GEM_BUG_ON(!assert_priority_queue(prev, next));
   1595 
   1596 	/*
   1597 	 * We do not submit known completed requests. Therefore if the next
   1598 	 * request is already completed, we can pretend to merge it in
   1599 	 * with the previous context (and we will skip updating the ELSP
   1600 	 * and tracking). Thus hopefully keeping the ELSP full with active
   1601 	 * contexts, despite the best efforts of preempt-to-busy to confuse
   1602 	 * us.
   1603 	 */
   1604 	if (i915_request_completed(next))
   1605 		return true;
   1606 
   1607 	if (unlikely((prev->fence.flags ^ next->fence.flags) &
   1608 		     (BIT(I915_FENCE_FLAG_NOPREEMPT) |
   1609 		      BIT(I915_FENCE_FLAG_SENTINEL))))
   1610 		return false;
   1611 
   1612 	if (!can_merge_ctx(prev->context, next->context))
   1613 		return false;
   1614 
   1615 	return true;
   1616 }
   1617 
   1618 static void virtual_update_register_offsets(u32 *regs,
   1619 					    struct intel_engine_cs *engine)
   1620 {
   1621 	set_offsets(regs, reg_offsets(engine), engine, false);
   1622 }
   1623 
   1624 static bool virtual_matches(const struct virtual_engine *ve,
   1625 			    const struct i915_request *rq,
   1626 			    const struct intel_engine_cs *engine)
   1627 {
   1628 	const struct intel_engine_cs *inflight;
   1629 
   1630 	if (!(rq->execution_mask & engine->mask)) /* We peeked too soon! */
   1631 		return false;
   1632 
   1633 	/*
   1634 	 * We track when the HW has completed saving the context image
   1635 	 * (i.e. when we have seen the final CS event switching out of
   1636 	 * the context) and must not overwrite the context image before
   1637 	 * then. This restricts us to only using the active engine
   1638 	 * while the previous virtualized request is inflight (so
   1639 	 * we reuse the register offsets). This is a very small
   1640 	 * hystersis on the greedy seelction algorithm.
   1641 	 */
   1642 	inflight = intel_context_inflight(&ve->context);
   1643 	if (inflight && inflight != engine)
   1644 		return false;
   1645 
   1646 	return true;
   1647 }
   1648 
   1649 static void virtual_xfer_breadcrumbs(struct virtual_engine *ve,
   1650 				     struct intel_engine_cs *engine)
   1651 {
   1652 	struct intel_engine_cs *old = ve->siblings[0];
   1653 
   1654 	/* All unattached (rq->engine == old) must already be completed */
   1655 
   1656 	spin_lock(&old->breadcrumbs.irq_lock);
   1657 	if (!list_empty(&ve->context.signal_link)) {
   1658 		list_move_tail(&ve->context.signal_link,
   1659 			       &engine->breadcrumbs.signalers);
   1660 		intel_engine_signal_breadcrumbs(engine);
   1661 	}
   1662 	spin_unlock(&old->breadcrumbs.irq_lock);
   1663 }
   1664 
   1665 static struct i915_request *
   1666 last_active(const struct intel_engine_execlists *execlists)
   1667 {
   1668 	struct i915_request * const *last = READ_ONCE(execlists->active);
   1669 
   1670 	while (*last && i915_request_completed(*last))
   1671 		last++;
   1672 
   1673 	return *last;
   1674 }
   1675 
   1676 #define for_each_waiter(p__, rq__) \
   1677 	list_for_each_entry_lockless(p__, \
   1678 				     &(rq__)->sched.waiters_list, \
   1679 				     wait_link)
   1680 
   1681 static void defer_request(struct i915_request *rq, struct list_head * const pl)
   1682 {
   1683 	LIST_HEAD(list);
   1684 
   1685 	/*
   1686 	 * We want to move the interrupted request to the back of
   1687 	 * the round-robin list (i.e. its priority level), but
   1688 	 * in doing so, we must then move all requests that were in
   1689 	 * flight and were waiting for the interrupted request to
   1690 	 * be run after it again.
   1691 	 */
   1692 	do {
   1693 		struct i915_dependency *p;
   1694 
   1695 		GEM_BUG_ON(i915_request_is_active(rq));
   1696 		list_move_tail(&rq->sched.link, pl);
   1697 
   1698 		for_each_waiter(p, rq) {
   1699 			struct i915_request *w =
   1700 				container_of(p->waiter, typeof(*w), sched);
   1701 
   1702 			/* Leave semaphores spinning on the other engines */
   1703 			if (w->engine != rq->engine)
   1704 				continue;
   1705 
   1706 			/* No waiter should start before its signaler */
   1707 			GEM_BUG_ON(i915_request_started(w) &&
   1708 				   !i915_request_completed(rq));
   1709 
   1710 			GEM_BUG_ON(i915_request_is_active(w));
   1711 			if (!i915_request_is_ready(w))
   1712 				continue;
   1713 
   1714 			if (rq_prio(w) < rq_prio(rq))
   1715 				continue;
   1716 
   1717 			GEM_BUG_ON(rq_prio(w) > rq_prio(rq));
   1718 			list_move_tail(&w->sched.link, &list);
   1719 		}
   1720 
   1721 		rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
   1722 	} while (rq);
   1723 }
   1724 
   1725 static void defer_active(struct intel_engine_cs *engine)
   1726 {
   1727 	struct i915_request *rq;
   1728 
   1729 	rq = __unwind_incomplete_requests(engine);
   1730 	if (!rq)
   1731 		return;
   1732 
   1733 	defer_request(rq, i915_sched_lookup_priolist(engine, rq_prio(rq)));
   1734 }
   1735 
   1736 static bool
   1737 need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
   1738 {
   1739 	int hint;
   1740 
   1741 	if (!intel_engine_has_timeslices(engine))
   1742 		return false;
   1743 
   1744 	if (list_is_last(&rq->sched.link, &engine->active.requests))
   1745 		return false;
   1746 
   1747 	hint = max(rq_prio(list_next_entry(rq, sched.link)),
   1748 		   engine->execlists.queue_priority_hint);
   1749 
   1750 	return hint >= effective_prio(rq);
   1751 }
   1752 
   1753 static int
   1754 switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq)
   1755 {
   1756 	if (list_is_last(&rq->sched.link, &engine->active.requests))
   1757 		return INT_MIN;
   1758 
   1759 	return rq_prio(list_next_entry(rq, sched.link));
   1760 }
   1761 
   1762 static inline unsigned long
   1763 timeslice(const struct intel_engine_cs *engine)
   1764 {
   1765 	return READ_ONCE(engine->props.timeslice_duration_ms);
   1766 }
   1767 
   1768 static unsigned long
   1769 active_timeslice(const struct intel_engine_cs *engine)
   1770 {
   1771 	const struct i915_request *rq = *engine->execlists.active;
   1772 
   1773 	if (!rq || i915_request_completed(rq))
   1774 		return 0;
   1775 
   1776 	if (engine->execlists.switch_priority_hint < effective_prio(rq))
   1777 		return 0;
   1778 
   1779 	return timeslice(engine);
   1780 }
   1781 
   1782 static void set_timeslice(struct intel_engine_cs *engine)
   1783 {
   1784 	if (!intel_engine_has_timeslices(engine))
   1785 		return;
   1786 
   1787 	set_timer_ms(&engine->execlists.timer, active_timeslice(engine));
   1788 }
   1789 
   1790 static void record_preemption(struct intel_engine_execlists *execlists)
   1791 {
   1792 	(void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++);
   1793 }
   1794 
   1795 static unsigned long active_preempt_timeout(struct intel_engine_cs *engine)
   1796 {
   1797 	struct i915_request *rq;
   1798 
   1799 	rq = last_active(&engine->execlists);
   1800 	if (!rq)
   1801 		return 0;
   1802 
   1803 	/* Force a fast reset for terminated contexts (ignoring sysfs!) */
   1804 	if (unlikely(intel_context_is_banned(rq->context)))
   1805 		return 1;
   1806 
   1807 	return READ_ONCE(engine->props.preempt_timeout_ms);
   1808 }
   1809 
   1810 static void set_preempt_timeout(struct intel_engine_cs *engine)
   1811 {
   1812 	if (!intel_engine_has_preempt_reset(engine))
   1813 		return;
   1814 
   1815 	set_timer_ms(&engine->execlists.preempt,
   1816 		     active_preempt_timeout(engine));
   1817 }
   1818 
   1819 static inline void clear_ports(struct i915_request **ports, int count)
   1820 {
   1821 	memset_p((void **)ports, NULL, count);
   1822 }
   1823 
   1824 static void execlists_dequeue(struct intel_engine_cs *engine)
   1825 {
   1826 	struct intel_engine_execlists * const execlists = &engine->execlists;
   1827 	struct i915_request **port = execlists->pending;
   1828 	struct i915_request ** const last_port = port + execlists->port_mask;
   1829 	struct i915_request *last;
   1830 	struct rb_node *rb;
   1831 	bool submit = false;
   1832 
   1833 	/*
   1834 	 * Hardware submission is through 2 ports. Conceptually each port
   1835 	 * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
   1836 	 * static for a context, and unique to each, so we only execute
   1837 	 * requests belonging to a single context from each ring. RING_HEAD
   1838 	 * is maintained by the CS in the context image, it marks the place
   1839 	 * where it got up to last time, and through RING_TAIL we tell the CS
   1840 	 * where we want to execute up to this time.
   1841 	 *
   1842 	 * In this list the requests are in order of execution. Consecutive
   1843 	 * requests from the same context are adjacent in the ringbuffer. We
   1844 	 * can combine these requests into a single RING_TAIL update:
   1845 	 *
   1846 	 *              RING_HEAD...req1...req2
   1847 	 *                                    ^- RING_TAIL
   1848 	 * since to execute req2 the CS must first execute req1.
   1849 	 *
   1850 	 * Our goal then is to point each port to the end of a consecutive
   1851 	 * sequence of requests as being the most optimal (fewest wake ups
   1852 	 * and context switches) submission.
   1853 	 */
   1854 
   1855 	for (rb = rb_first_cached(&execlists->virtual); rb; ) {
   1856 		struct virtual_engine *ve =
   1857 			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
   1858 		struct i915_request *rq = READ_ONCE(ve->request);
   1859 
   1860 		if (!rq) { /* lazily cleanup after another engine handled rq */
   1861 			rb_erase_cached(rb, &execlists->virtual);
   1862 			container_of(rb, struct ve_node, rb)->inserted =
   1863 			    false;
   1864 			rb = rb_first_cached(&execlists->virtual);
   1865 			continue;
   1866 		}
   1867 
   1868 		if (!virtual_matches(ve, rq, engine)) {
   1869 			rb = rb_next2(&execlists->virtual.rb_root, rb);
   1870 			continue;
   1871 		}
   1872 
   1873 		break;
   1874 	}
   1875 
   1876 	/*
   1877 	 * If the queue is higher priority than the last
   1878 	 * request in the currently active context, submit afresh.
   1879 	 * We will resubmit again afterwards in case we need to split
   1880 	 * the active context to interject the preemption request,
   1881 	 * i.e. we will retrigger preemption following the ack in case
   1882 	 * of trouble.
   1883 	 */
   1884 	last = last_active(execlists);
   1885 	if (last) {
   1886 		if (need_preempt(engine, last, rb)) {
   1887 			ENGINE_TRACE(engine,
   1888 				     "preempting last=%llx:%lld, prio=%d, hint=%d\n",
   1889 				     last->fence.context,
   1890 				     last->fence.seqno,
   1891 				     last->sched.attr.priority,
   1892 				     execlists->queue_priority_hint);
   1893 			record_preemption(execlists);
   1894 
   1895 			/*
   1896 			 * Don't let the RING_HEAD advance past the breadcrumb
   1897 			 * as we unwind (and until we resubmit) so that we do
   1898 			 * not accidentally tell it to go backwards.
   1899 			 */
   1900 			ring_set_paused(engine, 1);
   1901 
   1902 			/*
   1903 			 * Note that we have not stopped the GPU at this point,
   1904 			 * so we are unwinding the incomplete requests as they
   1905 			 * remain inflight and so by the time we do complete
   1906 			 * the preemption, some of the unwound requests may
   1907 			 * complete!
   1908 			 */
   1909 			__unwind_incomplete_requests(engine);
   1910 
   1911 			last = NULL;
   1912 		} else if (need_timeslice(engine, last) &&
   1913 			   timer_expired(&engine->execlists.timer)) {
   1914 			ENGINE_TRACE(engine,
   1915 				     "expired last=%llx:%lld, prio=%d, hint=%d\n",
   1916 				     last->fence.context,
   1917 				     last->fence.seqno,
   1918 				     last->sched.attr.priority,
   1919 				     execlists->queue_priority_hint);
   1920 
   1921 			ring_set_paused(engine, 1);
   1922 			defer_active(engine);
   1923 
   1924 			/*
   1925 			 * Unlike for preemption, if we rewind and continue
   1926 			 * executing the same context as previously active,
   1927 			 * the order of execution will remain the same and
   1928 			 * the tail will only advance. We do not need to
   1929 			 * force a full context restore, as a lite-restore
   1930 			 * is sufficient to resample the monotonic TAIL.
   1931 			 *
   1932 			 * If we switch to any other context, similarly we
   1933 			 * will not rewind TAIL of current context, and
   1934 			 * normal save/restore will preserve state and allow
   1935 			 * us to later continue executing the same request.
   1936 			 */
   1937 			last = NULL;
   1938 		} else {
   1939 			/*
   1940 			 * Otherwise if we already have a request pending
   1941 			 * for execution after the current one, we can
   1942 			 * just wait until the next CS event before
   1943 			 * queuing more. In either case we will force a
   1944 			 * lite-restore preemption event, but if we wait
   1945 			 * we hopefully coalesce several updates into a single
   1946 			 * submission.
   1947 			 */
   1948 			if (!list_is_last(&last->sched.link,
   1949 					  &engine->active.requests)) {
   1950 				/*
   1951 				 * Even if ELSP[1] is occupied and not worthy
   1952 				 * of timeslices, our queue might be.
   1953 				 */
   1954 				if (!timer_pending(&execlists->timer) &&
   1955 				    need_timeslice(engine, last))
   1956 					set_timer_ms(&execlists->timer,
   1957 						     timeslice(engine));
   1958 
   1959 				return;
   1960 			}
   1961 		}
   1962 	}
   1963 
   1964 	while (rb) { /* XXX virtual is always taking precedence */
   1965 		struct virtual_engine *ve =
   1966 			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
   1967 		struct i915_request *rq;
   1968 
   1969 		spin_lock(&ve->base.active.lock);
   1970 
   1971 		rq = ve->request;
   1972 		if (unlikely(!rq)) { /* lost the race to a sibling */
   1973 			spin_unlock(&ve->base.active.lock);
   1974 			rb_erase_cached(rb, &execlists->virtual);
   1975 			container_of(rb, struct ve_node, rb)->inserted =
   1976 			    false;
   1977 			rb = rb_first_cached(&execlists->virtual);
   1978 			continue;
   1979 		}
   1980 
   1981 		GEM_BUG_ON(rq != ve->request);
   1982 		GEM_BUG_ON(rq->engine != &ve->base);
   1983 		GEM_BUG_ON(rq->context != &ve->context);
   1984 
   1985 		if (rq_prio(rq) >= queue_prio(execlists)) {
   1986 			if (!virtual_matches(ve, rq, engine)) {
   1987 				spin_unlock(&ve->base.active.lock);
   1988 				rb = rb_next2(&execlists->virtual.rb_root,
   1989 				    rb);
   1990 				continue;
   1991 			}
   1992 
   1993 			if (last && !can_merge_rq(last, rq)) {
   1994 				spin_unlock(&ve->base.active.lock);
   1995 				return; /* leave this for another */
   1996 			}
   1997 
   1998 			ENGINE_TRACE(engine,
   1999 				     "virtual rq=%llx:%lld%s, new engine? %s\n",
   2000 				     rq->fence.context,
   2001 				     rq->fence.seqno,
   2002 				     i915_request_completed(rq) ? "!" :
   2003 				     i915_request_started(rq) ? "*" :
   2004 				     "",
   2005 				     yesno(engine != ve->siblings[0]));
   2006 
   2007 			ve->request = NULL;
   2008 			ve->base.execlists.queue_priority_hint = INT_MIN;
   2009 			rb_erase_cached(rb, &execlists->virtual);
   2010 			container_of(rb, struct ve_node, rb)->inserted =
   2011 			    false;
   2012 
   2013 			GEM_BUG_ON(!(rq->execution_mask & engine->mask));
   2014 			rq->engine = engine;
   2015 
   2016 			if (engine != ve->siblings[0]) {
   2017 				u32 *regs = ve->context.lrc_reg_state;
   2018 				unsigned int n;
   2019 
   2020 				GEM_BUG_ON(READ_ONCE(ve->context.inflight));
   2021 
   2022 				if (!intel_engine_has_relative_mmio(engine))
   2023 					virtual_update_register_offsets(regs,
   2024 									engine);
   2025 
   2026 				if (!list_empty(&ve->context.signals))
   2027 					virtual_xfer_breadcrumbs(ve, engine);
   2028 
   2029 				/*
   2030 				 * Move the bound engine to the top of the list
   2031 				 * for future execution. We then kick this
   2032 				 * tasklet first before checking others, so that
   2033 				 * we preferentially reuse this set of bound
   2034 				 * registers.
   2035 				 */
   2036 				for (n = 1; n < ve->num_siblings; n++) {
   2037 					if (ve->siblings[n] == engine) {
   2038 						swap(ve->siblings[n],
   2039 						     ve->siblings[0]);
   2040 						break;
   2041 					}
   2042 				}
   2043 
   2044 				GEM_BUG_ON(ve->siblings[0] != engine);
   2045 			}
   2046 
   2047 			if (__i915_request_submit(rq)) {
   2048 				submit = true;
   2049 				last = rq;
   2050 			}
   2051 			i915_request_put(rq);
   2052 
   2053 			/*
   2054 			 * Hmm, we have a bunch of virtual engine requests,
   2055 			 * but the first one was already completed (thanks
   2056 			 * preempt-to-busy!). Keep looking at the veng queue
   2057 			 * until we have no more relevant requests (i.e.
   2058 			 * the normal submit queue has higher priority).
   2059 			 */
   2060 			if (!submit) {
   2061 				spin_unlock(&ve->base.active.lock);
   2062 				rb = rb_first_cached(&execlists->virtual);
   2063 				continue;
   2064 			}
   2065 		}
   2066 
   2067 		spin_unlock(&ve->base.active.lock);
   2068 		break;
   2069 	}
   2070 
   2071 	while ((rb = rb_first_cached(&execlists->queue))) {
   2072 		struct i915_priolist *p = to_priolist(rb);
   2073 		struct i915_request *rq, *rn;
   2074 		int i;
   2075 
   2076 		priolist_for_each_request_consume(rq, rn, p, i) {
   2077 			bool merge = true;
   2078 
   2079 			/*
   2080 			 * Can we combine this request with the current port?
   2081 			 * It has to be the same context/ringbuffer and not
   2082 			 * have any exceptions (e.g. GVT saying never to
   2083 			 * combine contexts).
   2084 			 *
   2085 			 * If we can combine the requests, we can execute both
   2086 			 * by updating the RING_TAIL to point to the end of the
   2087 			 * second request, and so we never need to tell the
   2088 			 * hardware about the first.
   2089 			 */
   2090 			if (last && !can_merge_rq(last, rq)) {
   2091 				/*
   2092 				 * If we are on the second port and cannot
   2093 				 * combine this request with the last, then we
   2094 				 * are done.
   2095 				 */
   2096 				if (port == last_port)
   2097 					goto done;
   2098 
   2099 				/*
   2100 				 * We must not populate both ELSP[] with the
   2101 				 * same LRCA, i.e. we must submit 2 different
   2102 				 * contexts if we submit 2 ELSP.
   2103 				 */
   2104 				if (last->context == rq->context)
   2105 					goto done;
   2106 
   2107 				if (i915_request_has_sentinel(last))
   2108 					goto done;
   2109 
   2110 				/*
   2111 				 * If GVT overrides us we only ever submit
   2112 				 * port[0], leaving port[1] empty. Note that we
   2113 				 * also have to be careful that we don't queue
   2114 				 * the same context (even though a different
   2115 				 * request) to the second port.
   2116 				 */
   2117 				if (ctx_single_port_submission(last->context) ||
   2118 				    ctx_single_port_submission(rq->context))
   2119 					goto done;
   2120 
   2121 				merge = false;
   2122 			}
   2123 
   2124 			if (__i915_request_submit(rq)) {
   2125 				if (!merge) {
   2126 					*port = execlists_schedule_in(last, port - execlists->pending);
   2127 					port++;
   2128 					last = NULL;
   2129 				}
   2130 
   2131 				GEM_BUG_ON(last &&
   2132 					   !can_merge_ctx(last->context,
   2133 							  rq->context));
   2134 
   2135 				submit = true;
   2136 				last = rq;
   2137 			}
   2138 		}
   2139 
   2140 		rb_erase_cached(&p->node, &execlists->queue);
   2141 		i915_priolist_free(p);
   2142 	}
   2143 
   2144 done:
   2145 	/*
   2146 	 * Here be a bit of magic! Or sleight-of-hand, whichever you prefer.
   2147 	 *
   2148 	 * We choose the priority hint such that if we add a request of greater
   2149 	 * priority than this, we kick the submission tasklet to decide on
   2150 	 * the right order of submitting the requests to hardware. We must
   2151 	 * also be prepared to reorder requests as they are in-flight on the
   2152 	 * HW. We derive the priority hint then as the first "hole" in
   2153 	 * the HW submission ports and if there are no available slots,
   2154 	 * the priority of the lowest executing request, i.e. last.
   2155 	 *
   2156 	 * When we do receive a higher priority request ready to run from the
   2157 	 * user, see queue_request(), the priority hint is bumped to that
   2158 	 * request triggering preemption on the next dequeue (or subsequent
   2159 	 * interrupt for secondary ports).
   2160 	 */
   2161 	execlists->queue_priority_hint = queue_prio(execlists);
   2162 
   2163 	if (submit) {
   2164 		*port = execlists_schedule_in(last, port - execlists->pending);
   2165 		execlists->switch_priority_hint =
   2166 			switch_prio(engine, *execlists->pending);
   2167 
   2168 		/*
   2169 		 * Skip if we ended up with exactly the same set of requests,
   2170 		 * e.g. trying to timeslice a pair of ordered contexts
   2171 		 */
   2172 		if (!memcmp(execlists->active, execlists->pending,
   2173 			    (port - execlists->pending + 1) * sizeof(*port))) {
   2174 			do
   2175 				execlists_schedule_out(fetch_and_zero(port));
   2176 			while (port-- != execlists->pending);
   2177 
   2178 			goto skip_submit;
   2179 		}
   2180 		clear_ports(port + 1, last_port - port);
   2181 
   2182 		execlists_submit_ports(engine);
   2183 		set_preempt_timeout(engine);
   2184 	} else {
   2185 skip_submit:
   2186 		ring_set_paused(engine, 0);
   2187 	}
   2188 }
   2189 
   2190 static void
   2191 cancel_port_requests(struct intel_engine_execlists * const execlists)
   2192 {
   2193 	struct i915_request * const *port;
   2194 
   2195 	for (port = execlists->pending; *port; port++)
   2196 		execlists_schedule_out(*port);
   2197 	clear_ports(execlists->pending, ARRAY_SIZE(execlists->pending));
   2198 
   2199 	/* Mark the end of active before we overwrite *active */
   2200 	for (port = xchg(&execlists->active, execlists->pending); *port; port++)
   2201 		execlists_schedule_out(*port);
   2202 	clear_ports(execlists->inflight, ARRAY_SIZE(execlists->inflight));
   2203 
   2204 	WRITE_ONCE(execlists->active, execlists->inflight);
   2205 }
   2206 
   2207 static inline void
   2208 invalidate_csb_entries(const u32 *first, const u32 *last)
   2209 {
   2210 	clflush(__UNCONST(first));
   2211 	clflush(__UNCONST(last));
   2212 }
   2213 
   2214 static inline bool
   2215 reset_in_progress(const struct intel_engine_execlists *execlists)
   2216 {
   2217 	return unlikely(!__tasklet_is_enabled(&execlists->tasklet));
   2218 }
   2219 
   2220 /*
   2221  * Starting with Gen12, the status has a new format:
   2222  *
   2223  *     bit  0:     switched to new queue
   2224  *     bit  1:     reserved
   2225  *     bit  2:     semaphore wait mode (poll or signal), only valid when
   2226  *                 switch detail is set to "wait on semaphore"
   2227  *     bits 3-5:   engine class
   2228  *     bits 6-11:  engine instance
   2229  *     bits 12-14: reserved
   2230  *     bits 15-25: sw context id of the lrc the GT switched to
   2231  *     bits 26-31: sw counter of the lrc the GT switched to
   2232  *     bits 32-35: context switch detail
   2233  *                  - 0: ctx complete
   2234  *                  - 1: wait on sync flip
   2235  *                  - 2: wait on vblank
   2236  *                  - 3: wait on scanline
   2237  *                  - 4: wait on semaphore
   2238  *                  - 5: context preempted (not on SEMAPHORE_WAIT or
   2239  *                       WAIT_FOR_EVENT)
   2240  *     bit  36:    reserved
   2241  *     bits 37-43: wait detail (for switch detail 1 to 4)
   2242  *     bits 44-46: reserved
   2243  *     bits 47-57: sw context id of the lrc the GT switched away from
   2244  *     bits 58-63: sw counter of the lrc the GT switched away from
   2245  */
   2246 static inline bool
   2247 gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
   2248 {
   2249 	u32 lower_dw = csb[0];
   2250 	u32 upper_dw = csb[1];
   2251 	bool ctx_to_valid = GEN12_CSB_CTX_VALID(lower_dw);
   2252 	bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_dw);
   2253 	bool new_queue = lower_dw & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
   2254 
   2255 	/*
   2256 	 * The context switch detail is not guaranteed to be 5 when a preemption
   2257 	 * occurs, so we can't just check for that. The check below works for
   2258 	 * all the cases we care about, including preemptions of WAIT
   2259 	 * instructions and lite-restore. Preempt-to-idle via the CTRL register
   2260 	 * would require some extra handling, but we don't support that.
   2261 	 */
   2262 	if (!ctx_away_valid || new_queue) {
   2263 		GEM_BUG_ON(!ctx_to_valid);
   2264 		return true;
   2265 	}
   2266 
   2267 	/*
   2268 	 * switch detail = 5 is covered by the case above and we do not expect a
   2269 	 * context switch on an unsuccessful wait instruction since we always
   2270 	 * use polling mode.
   2271 	 */
   2272 	GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_dw));
   2273 	return false;
   2274 }
   2275 
   2276 static inline bool
   2277 gen8_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
   2278 {
   2279 	return *csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
   2280 }
   2281 
   2282 static void process_csb(struct intel_engine_cs *engine)
   2283 {
   2284 	struct intel_engine_execlists * const execlists = &engine->execlists;
   2285 	const u32 * const buf = execlists->csb_status;
   2286 	const u8 num_entries = execlists->csb_size;
   2287 	u8 head, tail;
   2288 
   2289 	/*
   2290 	 * As we modify our execlists state tracking we require exclusive
   2291 	 * access. Either we are inside the tasklet, or the tasklet is disabled
   2292 	 * and we assume that is only inside the reset paths and so serialised.
   2293 	 */
   2294 	GEM_BUG_ON(!tasklet_is_locked(&execlists->tasklet) &&
   2295 		   !reset_in_progress(execlists));
   2296 	GEM_BUG_ON(!intel_engine_in_execlists_submission_mode(engine));
   2297 
   2298 	/*
   2299 	 * Note that csb_write, csb_status may be either in HWSP or mmio.
   2300 	 * When reading from the csb_write mmio register, we have to be
   2301 	 * careful to only use the GEN8_CSB_WRITE_PTR portion, which is
   2302 	 * the low 4bits. As it happens we know the next 4bits are always
   2303 	 * zero and so we can simply masked off the low u8 of the register
   2304 	 * and treat it identically to reading from the HWSP (without having
   2305 	 * to use explicit shifting and masking, and probably bifurcating
   2306 	 * the code to handle the legacy mmio read).
   2307 	 */
   2308 	head = execlists->csb_head;
   2309 	tail = READ_ONCE(*execlists->csb_write);
   2310 	ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail);
   2311 	if (unlikely(head == tail))
   2312 		return;
   2313 
   2314 	/*
   2315 	 * Hopefully paired with a wmb() in HW!
   2316 	 *
   2317 	 * We must complete the read of the write pointer before any reads
   2318 	 * from the CSB, so that we do not see stale values. Without an rmb
   2319 	 * (lfence) the HW may speculatively perform the CSB[] reads *before*
   2320 	 * we perform the READ_ONCE(*csb_write).
   2321 	 */
   2322 	rmb();
   2323 
   2324 	do {
   2325 		bool promote;
   2326 
   2327 		if (++head == num_entries)
   2328 			head = 0;
   2329 
   2330 		/*
   2331 		 * We are flying near dragons again.
   2332 		 *
   2333 		 * We hold a reference to the request in execlist_port[]
   2334 		 * but no more than that. We are operating in softirq
   2335 		 * context and so cannot hold any mutex or sleep. That
   2336 		 * prevents us stopping the requests we are processing
   2337 		 * in port[] from being retired simultaneously (the
   2338 		 * breadcrumb will be complete before we see the
   2339 		 * context-switch). As we only hold the reference to the
   2340 		 * request, any pointer chasing underneath the request
   2341 		 * is subject to a potential use-after-free. Thus we
   2342 		 * store all of the bookkeeping within port[] as
   2343 		 * required, and avoid using unguarded pointers beneath
   2344 		 * request itself. The same applies to the atomic
   2345 		 * status notifier.
   2346 		 */
   2347 
   2348 		ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n",
   2349 			     head, buf[2 * head + 0], buf[2 * head + 1]);
   2350 
   2351 		if (INTEL_GEN(engine->i915) >= 12)
   2352 			promote = gen12_csb_parse(execlists, buf + 2 * head);
   2353 		else
   2354 			promote = gen8_csb_parse(execlists, buf + 2 * head);
   2355 		if (promote) {
   2356 			struct i915_request * const *old = execlists->active;
   2357 
   2358 			/* Point active to the new ELSP; prevent overwriting */
   2359 			WRITE_ONCE(execlists->active, execlists->pending);
   2360 
   2361 			if (!inject_preempt_hang(execlists))
   2362 				ring_set_paused(engine, 0);
   2363 
   2364 			/* cancel old inflight, prepare for switch */
   2365 			trace_ports(execlists, "preempted", old);
   2366 			while (*old)
   2367 				execlists_schedule_out(*old++);
   2368 
   2369 			/* switch pending to inflight */
   2370 			GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
   2371 			WRITE_ONCE(execlists->active,
   2372 				   memcpy(execlists->inflight,
   2373 					  execlists->pending,
   2374 					  execlists_num_ports(execlists) *
   2375 					  sizeof(*execlists->pending)));
   2376 
   2377 			WRITE_ONCE(execlists->pending[0], NULL);
   2378 		} else {
   2379 			GEM_BUG_ON(!*execlists->active);
   2380 
   2381 			/* port0 completed, advanced to port1 */
   2382 			trace_ports(execlists, "completed", execlists->active);
   2383 
   2384 			/*
   2385 			 * We rely on the hardware being strongly
   2386 			 * ordered, that the breadcrumb write is
   2387 			 * coherent (visible from the CPU) before the
   2388 			 * user interrupt and CSB is processed.
   2389 			 */
   2390 			GEM_BUG_ON(!i915_request_completed(*execlists->active) &&
   2391 				   !reset_in_progress(execlists));
   2392 			execlists_schedule_out(*execlists->active++);
   2393 
   2394 			GEM_BUG_ON(execlists->active - execlists->inflight >
   2395 				   execlists_num_ports(execlists));
   2396 		}
   2397 	} while (head != tail);
   2398 
   2399 	execlists->csb_head = head;
   2400 	set_timeslice(engine);
   2401 
   2402 	/*
   2403 	 * Gen11 has proven to fail wrt global observation point between
   2404 	 * entry and tail update, failing on the ordering and thus
   2405 	 * we see an old entry in the context status buffer.
   2406 	 *
   2407 	 * Forcibly evict out entries for the next gpu csb update,
   2408 	 * to increase the odds that we get a fresh entries with non
   2409 	 * working hardware. The cost for doing so comes out mostly with
   2410 	 * the wash as hardware, working or not, will need to do the
   2411 	 * invalidation before.
   2412 	 */
   2413 	invalidate_csb_entries(&buf[0], &buf[num_entries - 1]);
   2414 }
   2415 
   2416 static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
   2417 {
   2418 	lockdep_assert_held(&engine->active.lock);
   2419 	if (!engine->execlists.pending[0]) {
   2420 		rcu_read_lock(); /* protect peeking at execlists->active */
   2421 		execlists_dequeue(engine);
   2422 		rcu_read_unlock();
   2423 	}
   2424 }
   2425 
   2426 static void __execlists_hold(struct i915_request *rq)
   2427 {
   2428 	LIST_HEAD(list);
   2429 
   2430 	do {
   2431 		struct i915_dependency *p;
   2432 
   2433 		if (i915_request_is_active(rq))
   2434 			__i915_request_unsubmit(rq);
   2435 
   2436 		RQ_TRACE(rq, "on hold\n");
   2437 		clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
   2438 		list_move_tail(&rq->sched.link, &rq->engine->active.hold);
   2439 		i915_request_set_hold(rq);
   2440 
   2441 		list_for_each_entry(p, &rq->sched.waiters_list, wait_link) {
   2442 			struct i915_request *w =
   2443 				container_of(p->waiter, typeof(*w), sched);
   2444 
   2445 			/* Leave semaphores spinning on the other engines */
   2446 			if (w->engine != rq->engine)
   2447 				continue;
   2448 
   2449 			if (!i915_request_is_ready(w))
   2450 				continue;
   2451 
   2452 			if (i915_request_completed(w))
   2453 				continue;
   2454 
   2455 			if (i915_request_on_hold(rq))
   2456 				continue;
   2457 
   2458 			list_move_tail(&w->sched.link, &list);
   2459 		}
   2460 
   2461 		rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
   2462 	} while (rq);
   2463 }
   2464 
   2465 static bool execlists_hold(struct intel_engine_cs *engine,
   2466 			   struct i915_request *rq)
   2467 {
   2468 	spin_lock_irq(&engine->active.lock);
   2469 
   2470 	if (i915_request_completed(rq)) { /* too late! */
   2471 		rq = NULL;
   2472 		goto unlock;
   2473 	}
   2474 
   2475 	if (rq->engine != engine) { /* preempted virtual engine */
   2476 		struct virtual_engine *ve = to_virtual_engine(rq->engine);
   2477 
   2478 		/*
   2479 		 * intel_context_inflight() is only protected by virtue
   2480 		 * of process_csb() being called only by the tasklet (or
   2481 		 * directly from inside reset while the tasklet is suspended).
   2482 		 * Assert that neither of those are allowed to run while we
   2483 		 * poke at the request queues.
   2484 		 */
   2485 		GEM_BUG_ON(!reset_in_progress(&engine->execlists));
   2486 
   2487 		/*
   2488 		 * An unsubmitted request along a virtual engine will
   2489 		 * remain on the active (this) engine until we are able
   2490 		 * to process the context switch away (and so mark the
   2491 		 * context as no longer in flight). That cannot have happened
   2492 		 * yet, otherwise we would not be hanging!
   2493 		 */
   2494 		spin_lock(&ve->base.active.lock);
   2495 		GEM_BUG_ON(intel_context_inflight(rq->context) != engine);
   2496 		GEM_BUG_ON(ve->request != rq);
   2497 		ve->request = NULL;
   2498 		spin_unlock(&ve->base.active.lock);
   2499 		i915_request_put(rq);
   2500 
   2501 		rq->engine = engine;
   2502 	}
   2503 
   2504 	/*
   2505 	 * Transfer this request onto the hold queue to prevent it
   2506 	 * being resumbitted to HW (and potentially completed) before we have
   2507 	 * released it. Since we may have already submitted following
   2508 	 * requests, we need to remove those as well.
   2509 	 */
   2510 	GEM_BUG_ON(i915_request_on_hold(rq));
   2511 	GEM_BUG_ON(rq->engine != engine);
   2512 	__execlists_hold(rq);
   2513 
   2514 unlock:
   2515 	spin_unlock_irq(&engine->active.lock);
   2516 	return rq;
   2517 }
   2518 
   2519 static bool hold_request(const struct i915_request *rq)
   2520 {
   2521 	struct i915_dependency *p;
   2522 
   2523 	/*
   2524 	 * If one of our ancestors is on hold, we must also be on hold,
   2525 	 * otherwise we will bypass it and execute before it.
   2526 	 */
   2527 	list_for_each_entry(p, &rq->sched.signalers_list, signal_link) {
   2528 		const struct i915_request *s =
   2529 			container_of(p->signaler, typeof(*s), sched);
   2530 
   2531 		if (s->engine != rq->engine)
   2532 			continue;
   2533 
   2534 		if (i915_request_on_hold(s))
   2535 			return true;
   2536 	}
   2537 
   2538 	return false;
   2539 }
   2540 
   2541 static void __execlists_unhold(struct i915_request *rq)
   2542 {
   2543 	LIST_HEAD(list);
   2544 
   2545 	do {
   2546 		struct i915_dependency *p;
   2547 
   2548 		GEM_BUG_ON(!i915_request_on_hold(rq));
   2549 		GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
   2550 
   2551 		i915_request_clear_hold(rq);
   2552 		list_move_tail(&rq->sched.link,
   2553 			       i915_sched_lookup_priolist(rq->engine,
   2554 							  rq_prio(rq)));
   2555 		set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
   2556 		RQ_TRACE(rq, "hold release\n");
   2557 
   2558 		/* Also release any children on this engine that are ready */
   2559 		list_for_each_entry(p, &rq->sched.waiters_list, wait_link) {
   2560 			struct i915_request *w =
   2561 				container_of(p->waiter, typeof(*w), sched);
   2562 
   2563 			if (w->engine != rq->engine)
   2564 				continue;
   2565 
   2566 			if (!i915_request_on_hold(rq))
   2567 				continue;
   2568 
   2569 			/* Check that no other parents are also on hold */
   2570 			if (hold_request(rq))
   2571 				continue;
   2572 
   2573 			list_move_tail(&w->sched.link, &list);
   2574 		}
   2575 
   2576 		rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
   2577 	} while (rq);
   2578 }
   2579 
   2580 static void execlists_unhold(struct intel_engine_cs *engine,
   2581 			     struct i915_request *rq)
   2582 {
   2583 	spin_lock_irq(&engine->active.lock);
   2584 
   2585 	/*
   2586 	 * Move this request back to the priority queue, and all of its
   2587 	 * children and grandchildren that were suspended along with it.
   2588 	 */
   2589 	__execlists_unhold(rq);
   2590 
   2591 	if (rq_prio(rq) > engine->execlists.queue_priority_hint) {
   2592 		engine->execlists.queue_priority_hint = rq_prio(rq);
   2593 		tasklet_hi_schedule(&engine->execlists.tasklet);
   2594 	}
   2595 
   2596 	spin_unlock_irq(&engine->active.lock);
   2597 }
   2598 
   2599 struct execlists_capture {
   2600 	struct work_struct work;
   2601 	struct i915_request *rq;
   2602 	struct i915_gpu_coredump *error;
   2603 };
   2604 
   2605 static void execlists_capture_work(struct work_struct *work)
   2606 {
   2607 	struct execlists_capture *cap = container_of(work, typeof(*cap), work);
   2608 	const gfp_t gfp = GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN;
   2609 	struct intel_engine_cs *engine = cap->rq->engine;
   2610 	struct intel_gt_coredump *gt = cap->error->gt;
   2611 	struct intel_engine_capture_vma *vma;
   2612 
   2613 	/* Compress all the objects attached to the request, slow! */
   2614 	vma = intel_engine_coredump_add_request(gt->engine, cap->rq, gfp);
   2615 	if (vma) {
   2616 		struct i915_vma_compress *compress =
   2617 			i915_vma_capture_prepare(gt);
   2618 
   2619 		intel_engine_coredump_add_vma(gt->engine, vma, compress);
   2620 		i915_vma_capture_finish(gt, compress);
   2621 	}
   2622 
   2623 	gt->simulated = gt->engine->simulated;
   2624 	cap->error->simulated = gt->simulated;
   2625 
   2626 	/* Publish the error state, and announce it to the world */
   2627 	i915_error_state_store(cap->error);
   2628 	i915_gpu_coredump_put(cap->error);
   2629 
   2630 	/* Return this request and all that depend upon it for signaling */
   2631 	execlists_unhold(engine, cap->rq);
   2632 	i915_request_put(cap->rq);
   2633 
   2634 	kfree(cap);
   2635 }
   2636 
   2637 static struct execlists_capture *capture_regs(struct intel_engine_cs *engine)
   2638 {
   2639 	const gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
   2640 	struct execlists_capture *cap;
   2641 
   2642 	cap = kmalloc(sizeof(*cap), gfp);
   2643 	if (!cap)
   2644 		return NULL;
   2645 
   2646 	cap->error = i915_gpu_coredump_alloc(engine->i915, gfp);
   2647 	if (!cap->error)
   2648 		goto err_cap;
   2649 
   2650 	cap->error->gt = intel_gt_coredump_alloc(engine->gt, gfp);
   2651 	if (!cap->error->gt)
   2652 		goto err_gpu;
   2653 
   2654 	cap->error->gt->engine = intel_engine_coredump_alloc(engine, gfp);
   2655 	if (!cap->error->gt->engine)
   2656 		goto err_gt;
   2657 
   2658 	return cap;
   2659 
   2660 err_gt:
   2661 	kfree(cap->error->gt);
   2662 err_gpu:
   2663 	kfree(cap->error);
   2664 err_cap:
   2665 	kfree(cap);
   2666 	return NULL;
   2667 }
   2668 
   2669 static bool execlists_capture(struct intel_engine_cs *engine)
   2670 {
   2671 	struct execlists_capture *cap;
   2672 
   2673 	if (!IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR))
   2674 		return true;
   2675 
   2676 	/*
   2677 	 * We need to _quickly_ capture the engine state before we reset.
   2678 	 * We are inside an atomic section (softirq) here and we are delaying
   2679 	 * the forced preemption event.
   2680 	 */
   2681 	cap = capture_regs(engine);
   2682 	if (!cap)
   2683 		return true;
   2684 
   2685 	cap->rq = execlists_active(&engine->execlists);
   2686 	GEM_BUG_ON(!cap->rq);
   2687 
   2688 	rcu_read_lock();
   2689 	cap->rq = active_request(cap->rq->context->timeline, cap->rq);
   2690 	cap->rq = i915_request_get_rcu(cap->rq);
   2691 	rcu_read_unlock();
   2692 	if (!cap->rq)
   2693 		goto err_free;
   2694 
   2695 	/*
   2696 	 * Remove the request from the execlists queue, and take ownership
   2697 	 * of the request. We pass it to our worker who will _slowly_ compress
   2698 	 * all the pages the _user_ requested for debugging their batch, after
   2699 	 * which we return it to the queue for signaling.
   2700 	 *
   2701 	 * By removing them from the execlists queue, we also remove the
   2702 	 * requests from being processed by __unwind_incomplete_requests()
   2703 	 * during the intel_engine_reset(), and so they will *not* be replayed
   2704 	 * afterwards.
   2705 	 *
   2706 	 * Note that because we have not yet reset the engine at this point,
   2707 	 * it is possible for the request that we have identified as being
   2708 	 * guilty, did in fact complete and we will then hit an arbitration
   2709 	 * point allowing the outstanding preemption to succeed. The likelihood
   2710 	 * of that is very low (as capturing of the engine registers should be
   2711 	 * fast enough to run inside an irq-off atomic section!), so we will
   2712 	 * simply hold that request accountable for being non-preemptible
   2713 	 * long enough to force the reset.
   2714 	 */
   2715 	if (!execlists_hold(engine, cap->rq))
   2716 		goto err_rq;
   2717 
   2718 	INIT_WORK(&cap->work, execlists_capture_work);
   2719 	schedule_work(&cap->work);
   2720 	return true;
   2721 
   2722 err_rq:
   2723 	i915_request_put(cap->rq);
   2724 err_free:
   2725 	i915_gpu_coredump_put(cap->error);
   2726 	kfree(cap);
   2727 	return false;
   2728 }
   2729 
   2730 static noinline void preempt_reset(struct intel_engine_cs *engine)
   2731 {
   2732 	const unsigned int bit = I915_RESET_ENGINE + engine->id;
   2733 	unsigned long *lock = &engine->gt->reset.flags;
   2734 
   2735 	if (i915_modparams.reset < 3)
   2736 		return;
   2737 
   2738 	if (test_and_set_bit(bit, lock))
   2739 		return;
   2740 
   2741 	/* Mark this tasklet as disabled to avoid waiting for it to complete */
   2742 	tasklet_disable_nosync(&engine->execlists.tasklet);
   2743 
   2744 	ENGINE_TRACE(engine, "preempt timeout %lu+%ums\n",
   2745 		     READ_ONCE(engine->props.preempt_timeout_ms),
   2746 		     jiffies_to_msecs(jiffies - engine->execlists.preempt.expires));
   2747 
   2748 	ring_set_paused(engine, 1); /* Freeze the current request in place */
   2749 	if (execlists_capture(engine))
   2750 		intel_engine_reset(engine, "preemption time out");
   2751 	else
   2752 		ring_set_paused(engine, 0);
   2753 
   2754 	tasklet_enable(&engine->execlists.tasklet);
   2755 	clear_and_wake_up_bit(bit, lock);
   2756 }
   2757 
   2758 static bool preempt_timeout(const struct intel_engine_cs *const engine)
   2759 {
   2760 	const struct timer_list *t = &engine->execlists.preempt;
   2761 
   2762 	if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
   2763 		return false;
   2764 
   2765 	if (!timer_expired(t))
   2766 		return false;
   2767 
   2768 	return READ_ONCE(engine->execlists.pending[0]);
   2769 }
   2770 
   2771 /*
   2772  * Check the unread Context Status Buffers and manage the submission of new
   2773  * contexts to the ELSP accordingly.
   2774  */
   2775 static void execlists_submission_tasklet(unsigned long data)
   2776 {
   2777 	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
   2778 	bool timeout = preempt_timeout(engine);
   2779 
   2780 	process_csb(engine);
   2781 	if (!READ_ONCE(engine->execlists.pending[0]) || timeout) {
   2782 		unsigned long flags;
   2783 
   2784 		spin_lock_irqsave(&engine->active.lock, flags);
   2785 		__execlists_submission_tasklet(engine);
   2786 		spin_unlock_irqrestore(&engine->active.lock, flags);
   2787 
   2788 		/* Recheck after serialising with direct-submission */
   2789 		if (timeout && preempt_timeout(engine))
   2790 			preempt_reset(engine);
   2791 	}
   2792 }
   2793 
   2794 static void __execlists_kick(struct intel_engine_execlists *execlists)
   2795 {
   2796 	/* Kick the tasklet for some interrupt coalescing and reset handling */
   2797 	tasklet_hi_schedule(&execlists->tasklet);
   2798 }
   2799 
   2800 #define execlists_kick(t, member) \
   2801 	__execlists_kick(container_of(t, struct intel_engine_execlists, member))
   2802 
   2803 static void execlists_timeslice(struct timer_list *timer)
   2804 {
   2805 	execlists_kick(timer, timer);
   2806 }
   2807 
   2808 static void execlists_preempt(struct timer_list *timer)
   2809 {
   2810 	execlists_kick(timer, preempt);
   2811 }
   2812 
   2813 static void queue_request(struct intel_engine_cs *engine,
   2814 			  struct i915_request *rq)
   2815 {
   2816 	GEM_BUG_ON(!list_empty(&rq->sched.link));
   2817 	list_add_tail(&rq->sched.link,
   2818 		      i915_sched_lookup_priolist(engine, rq_prio(rq)));
   2819 	set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
   2820 }
   2821 
   2822 static void __submit_queue_imm(struct intel_engine_cs *engine)
   2823 {
   2824 	struct intel_engine_execlists * const execlists = &engine->execlists;
   2825 
   2826 	if (reset_in_progress(execlists))
   2827 		return; /* defer until we restart the engine following reset */
   2828 
   2829 	if (execlists->tasklet.func == execlists_submission_tasklet)
   2830 		__execlists_submission_tasklet(engine);
   2831 	else
   2832 		tasklet_hi_schedule(&execlists->tasklet);
   2833 }
   2834 
   2835 static void submit_queue(struct intel_engine_cs *engine,
   2836 			 const struct i915_request *rq)
   2837 {
   2838 	struct intel_engine_execlists *execlists = &engine->execlists;
   2839 
   2840 	if (rq_prio(rq) <= execlists->queue_priority_hint)
   2841 		return;
   2842 
   2843 	execlists->queue_priority_hint = rq_prio(rq);
   2844 	__submit_queue_imm(engine);
   2845 }
   2846 
   2847 static bool ancestor_on_hold(const struct intel_engine_cs *engine,
   2848 			     const struct i915_request *rq)
   2849 {
   2850 	GEM_BUG_ON(i915_request_on_hold(rq));
   2851 	return !list_empty(&engine->active.hold) && hold_request(rq);
   2852 }
   2853 
   2854 static void execlists_submit_request(struct i915_request *request)
   2855 {
   2856 	struct intel_engine_cs *engine = request->engine;
   2857 	unsigned long flags;
   2858 
   2859 	/* Will be called from irq-context when using foreign fences. */
   2860 	spin_lock_irqsave(&engine->active.lock, flags);
   2861 
   2862 	if (unlikely(ancestor_on_hold(engine, request))) {
   2863 		list_add_tail(&request->sched.link, &engine->active.hold);
   2864 		i915_request_set_hold(request);
   2865 	} else {
   2866 		queue_request(engine, request);
   2867 
   2868 		GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
   2869 		GEM_BUG_ON(list_empty(&request->sched.link));
   2870 
   2871 		submit_queue(engine, request);
   2872 	}
   2873 
   2874 	spin_unlock_irqrestore(&engine->active.lock, flags);
   2875 }
   2876 
   2877 static void __execlists_context_fini(struct intel_context *ce)
   2878 {
   2879 	intel_ring_put(ce->ring);
   2880 	i915_vma_put(ce->state);
   2881 }
   2882 
   2883 static void execlists_context_destroy(struct kref *kref)
   2884 {
   2885 	struct intel_context *ce = container_of(kref, typeof(*ce), ref);
   2886 
   2887 	GEM_BUG_ON(!i915_active_is_idle(&ce->active));
   2888 	GEM_BUG_ON(intel_context_is_pinned(ce));
   2889 
   2890 	if (ce->state)
   2891 		__execlists_context_fini(ce);
   2892 
   2893 	intel_context_fini(ce);
   2894 	intel_context_free(ce);
   2895 }
   2896 
   2897 static void
   2898 set_redzone(void *vaddr, const struct intel_engine_cs *engine)
   2899 {
   2900 	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
   2901 		return;
   2902 
   2903 	vaddr += engine->context_size;
   2904 
   2905 	memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE);
   2906 }
   2907 
   2908 static void
   2909 check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
   2910 {
   2911 	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
   2912 		return;
   2913 
   2914 	vaddr += engine->context_size;
   2915 
   2916 	if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE))
   2917 		dev_err_once(engine->i915->drm.dev,
   2918 			     "%s context redzone overwritten!\n",
   2919 			     engine->name);
   2920 }
   2921 
   2922 static void execlists_context_unpin(struct intel_context *ce)
   2923 {
   2924 	check_redzone((void *)ce->lrc_reg_state - LRC_STATE_PN * PAGE_SIZE,
   2925 		      ce->engine);
   2926 
   2927 	i915_gem_object_unpin_map(ce->state->obj);
   2928 }
   2929 
   2930 static void
   2931 __execlists_update_reg_state(const struct intel_context *ce,
   2932 			     const struct intel_engine_cs *engine,
   2933 			     u32 head)
   2934 {
   2935 	struct intel_ring *ring = ce->ring;
   2936 	u32 *regs = ce->lrc_reg_state;
   2937 
   2938 	GEM_BUG_ON(!intel_ring_offset_valid(ring, head));
   2939 	GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
   2940 
   2941 	regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
   2942 	regs[CTX_RING_HEAD] = head;
   2943 	regs[CTX_RING_TAIL] = ring->tail;
   2944 
   2945 	/* RPCS */
   2946 	if (engine->class == RENDER_CLASS) {
   2947 		regs[CTX_R_PWR_CLK_STATE] =
   2948 			intel_sseu_make_rpcs(engine->i915, &ce->sseu);
   2949 
   2950 		i915_oa_init_reg_state(ce, engine);
   2951 	}
   2952 }
   2953 
   2954 static int
   2955 __execlists_context_pin(struct intel_context *ce,
   2956 			struct intel_engine_cs *engine)
   2957 {
   2958 	void *vaddr;
   2959 
   2960 	GEM_BUG_ON(!ce->state);
   2961 	GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
   2962 
   2963 	vaddr = i915_gem_object_pin_map(ce->state->obj,
   2964 					i915_coherent_map_type(engine->i915) |
   2965 					I915_MAP_OVERRIDE);
   2966 	if (IS_ERR(vaddr))
   2967 		return PTR_ERR(vaddr);
   2968 
   2969 	ce->lrc_desc = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE;
   2970 	ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
   2971 	__execlists_update_reg_state(ce, engine, ce->ring->tail);
   2972 
   2973 	return 0;
   2974 }
   2975 
   2976 static int execlists_context_pin(struct intel_context *ce)
   2977 {
   2978 	return __execlists_context_pin(ce, ce->engine);
   2979 }
   2980 
   2981 static int execlists_context_alloc(struct intel_context *ce)
   2982 {
   2983 	return __execlists_context_alloc(ce, ce->engine);
   2984 }
   2985 
   2986 static void execlists_context_reset(struct intel_context *ce)
   2987 {
   2988 	CE_TRACE(ce, "reset\n");
   2989 	GEM_BUG_ON(!intel_context_is_pinned(ce));
   2990 
   2991 	/*
   2992 	 * Because we emit WA_TAIL_DWORDS there may be a disparity
   2993 	 * between our bookkeeping in ce->ring->head and ce->ring->tail and
   2994 	 * that stored in context. As we only write new commands from
   2995 	 * ce->ring->tail onwards, everything before that is junk. If the GPU
   2996 	 * starts reading from its RING_HEAD from the context, it may try to
   2997 	 * execute that junk and die.
   2998 	 *
   2999 	 * The contexts that are stilled pinned on resume belong to the
   3000 	 * kernel, and are local to each engine. All other contexts will
   3001 	 * have their head/tail sanitized upon pinning before use, so they
   3002 	 * will never see garbage,
   3003 	 *
   3004 	 * So to avoid that we reset the context images upon resume. For
   3005 	 * simplicity, we just zero everything out.
   3006 	 */
   3007 	intel_ring_reset(ce->ring, ce->ring->emit);
   3008 
   3009 	/* Scrub away the garbage */
   3010 	execlists_init_reg_state(ce->lrc_reg_state,
   3011 				 ce, ce->engine, ce->ring, true);
   3012 	__execlists_update_reg_state(ce, ce->engine, ce->ring->tail);
   3013 
   3014 	ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
   3015 }
   3016 
   3017 static const struct intel_context_ops execlists_context_ops = {
   3018 	.alloc = execlists_context_alloc,
   3019 
   3020 	.pin = execlists_context_pin,
   3021 	.unpin = execlists_context_unpin,
   3022 
   3023 	.enter = intel_context_enter_engine,
   3024 	.exit = intel_context_exit_engine,
   3025 
   3026 	.reset = execlists_context_reset,
   3027 	.destroy = execlists_context_destroy,
   3028 };
   3029 
   3030 static int gen8_emit_init_breadcrumb(struct i915_request *rq)
   3031 {
   3032 	u32 *cs;
   3033 
   3034 	GEM_BUG_ON(!i915_request_timeline(rq)->has_initial_breadcrumb);
   3035 
   3036 	cs = intel_ring_begin(rq, 6);
   3037 	if (IS_ERR(cs))
   3038 		return PTR_ERR(cs);
   3039 
   3040 	/*
   3041 	 * Check if we have been preempted before we even get started.
   3042 	 *
   3043 	 * After this point i915_request_started() reports true, even if
   3044 	 * we get preempted and so are no longer running.
   3045 	 */
   3046 	*cs++ = MI_ARB_CHECK;
   3047 	*cs++ = MI_NOOP;
   3048 
   3049 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
   3050 	*cs++ = i915_request_timeline(rq)->hwsp_offset;
   3051 	*cs++ = 0;
   3052 	*cs++ = rq->fence.seqno - 1;
   3053 
   3054 	intel_ring_advance(rq, cs);
   3055 
   3056 	/* Record the updated position of the request's payload */
   3057 	rq->infix = intel_ring_offset(rq, cs);
   3058 
   3059 	return 0;
   3060 }
   3061 
   3062 static int execlists_request_alloc(struct i915_request *request)
   3063 {
   3064 	int ret;
   3065 
   3066 	GEM_BUG_ON(!intel_context_is_pinned(request->context));
   3067 
   3068 	/*
   3069 	 * Flush enough space to reduce the likelihood of waiting after
   3070 	 * we start building the request - in which case we will just
   3071 	 * have to repeat work.
   3072 	 */
   3073 	request->reserved_space += EXECLISTS_REQUEST_SIZE;
   3074 
   3075 	/*
   3076 	 * Note that after this point, we have committed to using
   3077 	 * this request as it is being used to both track the
   3078 	 * state of engine initialisation and liveness of the
   3079 	 * golden renderstate above. Think twice before you try
   3080 	 * to cancel/unwind this request now.
   3081 	 */
   3082 
   3083 	/* Unconditionally invalidate GPU caches and TLBs. */
   3084 	ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
   3085 	if (ret)
   3086 		return ret;
   3087 
   3088 	request->reserved_space -= EXECLISTS_REQUEST_SIZE;
   3089 	return 0;
   3090 }
   3091 
   3092 /*
   3093  * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
   3094  * PIPE_CONTROL instruction. This is required for the flush to happen correctly
   3095  * but there is a slight complication as this is applied in WA batch where the
   3096  * values are only initialized once so we cannot take register value at the
   3097  * beginning and reuse it further; hence we save its value to memory, upload a
   3098  * constant value with bit21 set and then we restore it back with the saved value.
   3099  * To simplify the WA, a constant value is formed by using the default value
   3100  * of this register. This shouldn't be a problem because we are only modifying
   3101  * it for a short period and this batch in non-premptible. We can ofcourse
   3102  * use additional instructions that read the actual value of the register
   3103  * at that time and set our bit of interest but it makes the WA complicated.
   3104  *
   3105  * This WA is also required for Gen9 so extracting as a function avoids
   3106  * code duplication.
   3107  */
   3108 static u32 *
   3109 gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
   3110 {
   3111 	/* NB no one else is allowed to scribble over scratch + 256! */
   3112 	*batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
   3113 	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
   3114 	*batch++ = intel_gt_scratch_offset(engine->gt,
   3115 					   INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
   3116 	*batch++ = 0;
   3117 
   3118 	*batch++ = MI_LOAD_REGISTER_IMM(1);
   3119 	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
   3120 	*batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
   3121 
   3122 	batch = gen8_emit_pipe_control(batch,
   3123 				       PIPE_CONTROL_CS_STALL |
   3124 				       PIPE_CONTROL_DC_FLUSH_ENABLE,
   3125 				       0);
   3126 
   3127 	*batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
   3128 	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
   3129 	*batch++ = intel_gt_scratch_offset(engine->gt,
   3130 					   INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
   3131 	*batch++ = 0;
   3132 
   3133 	return batch;
   3134 }
   3135 
   3136 /*
   3137  * Typically we only have one indirect_ctx and per_ctx batch buffer which are
   3138  * initialized at the beginning and shared across all contexts but this field
   3139  * helps us to have multiple batches at different offsets and select them based
   3140  * on a criteria. At the moment this batch always start at the beginning of the page
   3141  * and at this point we don't have multiple wa_ctx batch buffers.
   3142  *
   3143  * The number of WA applied are not known at the beginning; we use this field
   3144  * to return the no of DWORDS written.
   3145  *
   3146  * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END
   3147  * so it adds NOOPs as padding to make it cacheline aligned.
   3148  * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
   3149  * makes a complete batch buffer.
   3150  */
   3151 static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
   3152 {
   3153 	/* WaDisableCtxRestoreArbitration:bdw,chv */
   3154 	*batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
   3155 
   3156 	/* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
   3157 	if (IS_BROADWELL(engine->i915))
   3158 		batch = gen8_emit_flush_coherentl3_wa(engine, batch);
   3159 
   3160 	/* WaClearSlmSpaceAtContextSwitch:bdw,chv */
   3161 	/* Actual scratch location is at 128 bytes offset */
   3162 	batch = gen8_emit_pipe_control(batch,
   3163 				       PIPE_CONTROL_FLUSH_L3 |
   3164 				       PIPE_CONTROL_STORE_DATA_INDEX |
   3165 				       PIPE_CONTROL_CS_STALL |
   3166 				       PIPE_CONTROL_QW_WRITE,
   3167 				       LRC_PPHWSP_SCRATCH_ADDR);
   3168 
   3169 	*batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
   3170 
   3171 	/* Pad to end of cacheline */
   3172 	while ((unsigned long)batch % CACHELINE_BYTES)
   3173 		*batch++ = MI_NOOP;
   3174 
   3175 	/*
   3176 	 * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
   3177 	 * execution depends on the length specified in terms of cache lines
   3178 	 * in the register CTX_RCS_INDIRECT_CTX
   3179 	 */
   3180 
   3181 	return batch;
   3182 }
   3183 
   3184 struct lri {
   3185 	i915_reg_t reg;
   3186 	u32 value;
   3187 };
   3188 
   3189 static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count)
   3190 {
   3191 	GEM_BUG_ON(!count || count > 63);
   3192 
   3193 	*batch++ = MI_LOAD_REGISTER_IMM(count);
   3194 	do {
   3195 		*batch++ = i915_mmio_reg_offset(lri->reg);
   3196 		*batch++ = lri->value;
   3197 	} while (lri++, --count);
   3198 	*batch++ = MI_NOOP;
   3199 
   3200 	return batch;
   3201 }
   3202 
   3203 static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
   3204 {
   3205 	static const struct lri lri[] = {
   3206 		/* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */
   3207 		{
   3208 			COMMON_SLICE_CHICKEN2,
   3209 			__MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE,
   3210 				       0),
   3211 		},
   3212 
   3213 		/* BSpec: 11391 */
   3214 		{
   3215 			FF_SLICE_CHICKEN,
   3216 			__MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX,
   3217 				       FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX),
   3218 		},
   3219 
   3220 		/* BSpec: 11299 */
   3221 		{
   3222 			_3D_CHICKEN3,
   3223 			__MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX,
   3224 				       _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX),
   3225 		}
   3226 	};
   3227 
   3228 	*batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
   3229 
   3230 	/* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
   3231 	batch = gen8_emit_flush_coherentl3_wa(engine, batch);
   3232 
   3233 	/* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */
   3234 	batch = gen8_emit_pipe_control(batch,
   3235 				       PIPE_CONTROL_FLUSH_L3 |
   3236 				       PIPE_CONTROL_STORE_DATA_INDEX |
   3237 				       PIPE_CONTROL_CS_STALL |
   3238 				       PIPE_CONTROL_QW_WRITE,
   3239 				       LRC_PPHWSP_SCRATCH_ADDR);
   3240 
   3241 	batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
   3242 
   3243 	/* WaMediaPoolStateCmdInWABB:bxt,glk */
   3244 	if (HAS_POOLED_EU(engine->i915)) {
   3245 		/*
   3246 		 * EU pool configuration is setup along with golden context
   3247 		 * during context initialization. This value depends on
   3248 		 * device type (2x6 or 3x6) and needs to be updated based
   3249 		 * on which subslice is disabled especially for 2x6
   3250 		 * devices, however it is safe to load default
   3251 		 * configuration of 3x6 device instead of masking off
   3252 		 * corresponding bits because HW ignores bits of a disabled
   3253 		 * subslice and drops down to appropriate config. Please
   3254 		 * see render_state_setup() in i915_gem_render_state.c for
   3255 		 * possible configurations, to avoid duplication they are
   3256 		 * not shown here again.
   3257 		 */
   3258 		*batch++ = GEN9_MEDIA_POOL_STATE;
   3259 		*batch++ = GEN9_MEDIA_POOL_ENABLE;
   3260 		*batch++ = 0x00777000;
   3261 		*batch++ = 0;
   3262 		*batch++ = 0;
   3263 		*batch++ = 0;
   3264 	}
   3265 
   3266 	*batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
   3267 
   3268 	/* Pad to end of cacheline */
   3269 	while ((unsigned long)batch % CACHELINE_BYTES)
   3270 		*batch++ = MI_NOOP;
   3271 
   3272 	return batch;
   3273 }
   3274 
   3275 static u32 *
   3276 gen10_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
   3277 {
   3278 	int i;
   3279 
   3280 	/*
   3281 	 * WaPipeControlBefore3DStateSamplePattern: cnl
   3282 	 *
   3283 	 * Ensure the engine is idle prior to programming a
   3284 	 * 3DSTATE_SAMPLE_PATTERN during a context restore.
   3285 	 */
   3286 	batch = gen8_emit_pipe_control(batch,
   3287 				       PIPE_CONTROL_CS_STALL,
   3288 				       0);
   3289 	/*
   3290 	 * WaPipeControlBefore3DStateSamplePattern says we need 4 dwords for
   3291 	 * the PIPE_CONTROL followed by 12 dwords of 0x0, so 16 dwords in
   3292 	 * total. However, a PIPE_CONTROL is 6 dwords long, not 4, which is
   3293 	 * confusing. Since gen8_emit_pipe_control() already advances the
   3294 	 * batch by 6 dwords, we advance the other 10 here, completing a
   3295 	 * cacheline. It's not clear if the workaround requires this padding
   3296 	 * before other commands, or if it's just the regular padding we would
   3297 	 * already have for the workaround bb, so leave it here for now.
   3298 	 */
   3299 	for (i = 0; i < 10; i++)
   3300 		*batch++ = MI_NOOP;
   3301 
   3302 	/* Pad to end of cacheline */
   3303 	while ((unsigned long)batch % CACHELINE_BYTES)
   3304 		*batch++ = MI_NOOP;
   3305 
   3306 	return batch;
   3307 }
   3308 
   3309 #define CTX_WA_BB_OBJ_SIZE (PAGE_SIZE)
   3310 
   3311 static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
   3312 {
   3313 	struct drm_i915_gem_object *obj;
   3314 	struct i915_vma *vma;
   3315 	int err;
   3316 
   3317 	obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_OBJ_SIZE);
   3318 	if (IS_ERR(obj))
   3319 		return PTR_ERR(obj);
   3320 
   3321 	vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
   3322 	if (IS_ERR(vma)) {
   3323 		err = PTR_ERR(vma);
   3324 		goto err;
   3325 	}
   3326 
   3327 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
   3328 	if (err)
   3329 		goto err;
   3330 
   3331 	engine->wa_ctx.vma = vma;
   3332 	return 0;
   3333 
   3334 err:
   3335 	i915_gem_object_put(obj);
   3336 	return err;
   3337 }
   3338 
   3339 static void lrc_destroy_wa_ctx(struct intel_engine_cs *engine)
   3340 {
   3341 	i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0);
   3342 }
   3343 
   3344 typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch);
   3345 
   3346 static int intel_init_workaround_bb(struct intel_engine_cs *engine)
   3347 {
   3348 	struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
   3349 	struct i915_wa_ctx_bb *wa_bb[2] = { &wa_ctx->indirect_ctx,
   3350 					    &wa_ctx->per_ctx };
   3351 	wa_bb_func_t wa_bb_fn[2];
   3352 	struct page *page;
   3353 	void *batch, *batch_ptr;
   3354 	unsigned int i;
   3355 	int ret;
   3356 
   3357 	if (engine->class != RENDER_CLASS)
   3358 		return 0;
   3359 
   3360 	switch (INTEL_GEN(engine->i915)) {
   3361 	case 12:
   3362 	case 11:
   3363 		return 0;
   3364 	case 10:
   3365 		wa_bb_fn[0] = gen10_init_indirectctx_bb;
   3366 		wa_bb_fn[1] = NULL;
   3367 		break;
   3368 	case 9:
   3369 		wa_bb_fn[0] = gen9_init_indirectctx_bb;
   3370 		wa_bb_fn[1] = NULL;
   3371 		break;
   3372 	case 8:
   3373 		wa_bb_fn[0] = gen8_init_indirectctx_bb;
   3374 		wa_bb_fn[1] = NULL;
   3375 		break;
   3376 	default:
   3377 		MISSING_CASE(INTEL_GEN(engine->i915));
   3378 		return 0;
   3379 	}
   3380 
   3381 	ret = lrc_setup_wa_ctx(engine);
   3382 	if (ret) {
   3383 		DRM_DEBUG_DRIVER("Failed to setup context WA page: %d\n", ret);
   3384 		return ret;
   3385 	}
   3386 
   3387 	page = i915_gem_object_get_dirty_page(wa_ctx->vma->obj, 0);
   3388 	batch = batch_ptr = kmap_atomic(page);
   3389 
   3390 	/*
   3391 	 * Emit the two workaround batch buffers, recording the offset from the
   3392 	 * start of the workaround batch buffer object for each and their
   3393 	 * respective sizes.
   3394 	 */
   3395 	for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) {
   3396 		wa_bb[i]->offset = batch_ptr - batch;
   3397 		if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset,
   3398 						  CACHELINE_BYTES))) {
   3399 			ret = -EINVAL;
   3400 			break;
   3401 		}
   3402 		if (wa_bb_fn[i])
   3403 			batch_ptr = wa_bb_fn[i](engine, batch_ptr);
   3404 		wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset);
   3405 	}
   3406 
   3407 	BUG_ON(batch_ptr - batch > CTX_WA_BB_OBJ_SIZE);
   3408 
   3409 	kunmap_atomic(batch);
   3410 	if (ret)
   3411 		lrc_destroy_wa_ctx(engine);
   3412 
   3413 	return ret;
   3414 }
   3415 
   3416 static void enable_execlists(struct intel_engine_cs *engine)
   3417 {
   3418 	u32 mode;
   3419 
   3420 	assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL);
   3421 
   3422 	intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */
   3423 
   3424 	if (INTEL_GEN(engine->i915) >= 11)
   3425 		mode = _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE);
   3426 	else
   3427 		mode = _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE);
   3428 	ENGINE_WRITE_FW(engine, RING_MODE_GEN7, mode);
   3429 
   3430 	ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
   3431 
   3432 	ENGINE_WRITE_FW(engine,
   3433 			RING_HWS_PGA,
   3434 			i915_ggtt_offset(engine->status_page.vma));
   3435 	ENGINE_POSTING_READ(engine, RING_HWS_PGA);
   3436 
   3437 	engine->context_tag = 0;
   3438 }
   3439 
   3440 static bool unexpected_starting_state(struct intel_engine_cs *engine)
   3441 {
   3442 	bool unexpected = false;
   3443 
   3444 	if (ENGINE_READ_FW(engine, RING_MI_MODE) & STOP_RING) {
   3445 		DRM_DEBUG_DRIVER("STOP_RING still set in RING_MI_MODE\n");
   3446 		unexpected = true;
   3447 	}
   3448 
   3449 	return unexpected;
   3450 }
   3451 
   3452 static int execlists_resume(struct intel_engine_cs *engine)
   3453 {
   3454 	intel_engine_apply_workarounds(engine);
   3455 	intel_engine_apply_whitelist(engine);
   3456 
   3457 	intel_mocs_init_engine(engine);
   3458 
   3459 	intel_engine_reset_breadcrumbs(engine);
   3460 
   3461 	if (GEM_SHOW_DEBUG() && unexpected_starting_state(engine)) {
   3462 		struct drm_printer p = drm_debug_printer(__func__);
   3463 
   3464 		intel_engine_dump(engine, &p, NULL);
   3465 	}
   3466 
   3467 	enable_execlists(engine);
   3468 
   3469 	return 0;
   3470 }
   3471 
   3472 static void execlists_reset_prepare(struct intel_engine_cs *engine)
   3473 {
   3474 	struct intel_engine_execlists * const execlists = &engine->execlists;
   3475 	unsigned long flags;
   3476 
   3477 	ENGINE_TRACE(engine, "depth<-%d\n",
   3478 		     atomic_read(&execlists->tasklet.count));
   3479 
   3480 	/*
   3481 	 * Prevent request submission to the hardware until we have
   3482 	 * completed the reset in i915_gem_reset_finish(). If a request
   3483 	 * is completed by one engine, it may then queue a request
   3484 	 * to a second via its execlists->tasklet *just* as we are
   3485 	 * calling engine->resume() and also writing the ELSP.
   3486 	 * Turning off the execlists->tasklet until the reset is over
   3487 	 * prevents the race.
   3488 	 */
   3489 	__tasklet_disable_sync_once(&execlists->tasklet);
   3490 	GEM_BUG_ON(!reset_in_progress(execlists));
   3491 
   3492 	/* And flush any current direct submission. */
   3493 	spin_lock_irqsave(&engine->active.lock, flags);
   3494 	spin_unlock_irqrestore(&engine->active.lock, flags);
   3495 
   3496 	/*
   3497 	 * We stop engines, otherwise we might get failed reset and a
   3498 	 * dead gpu (on elk). Also as modern gpu as kbl can suffer
   3499 	 * from system hang if batchbuffer is progressing when
   3500 	 * the reset is issued, regardless of READY_TO_RESET ack.
   3501 	 * Thus assume it is best to stop engines on all gens
   3502 	 * where we have a gpu reset.
   3503 	 *
   3504 	 * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES)
   3505 	 *
   3506 	 * FIXME: Wa for more modern gens needs to be validated
   3507 	 */
   3508 	intel_engine_stop_cs(engine);
   3509 }
   3510 
   3511 static void reset_csb_pointers(struct intel_engine_cs *engine)
   3512 {
   3513 	struct intel_engine_execlists * const execlists = &engine->execlists;
   3514 	const unsigned int reset_value = execlists->csb_size - 1;
   3515 
   3516 	ring_set_paused(engine, 0);
   3517 
   3518 	/*
   3519 	 * After a reset, the HW starts writing into CSB entry [0]. We
   3520 	 * therefore have to set our HEAD pointer back one entry so that
   3521 	 * the *first* entry we check is entry 0. To complicate this further,
   3522 	 * as we don't wait for the first interrupt after reset, we have to
   3523 	 * fake the HW write to point back to the last entry so that our
   3524 	 * inline comparison of our cached head position against the last HW
   3525 	 * write works even before the first interrupt.
   3526 	 */
   3527 	execlists->csb_head = reset_value;
   3528 	WRITE_ONCE(*execlists->csb_write, reset_value);
   3529 	wmb(); /* Make sure this is visible to HW (paranoia?) */
   3530 
   3531 	/*
   3532 	 * Sometimes Icelake forgets to reset its pointers on a GPU reset.
   3533 	 * Bludgeon them with a mmio update to be sure.
   3534 	 */
   3535 	ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,
   3536 		     reset_value << 8 | reset_value);
   3537 	ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR);
   3538 
   3539 	invalidate_csb_entries(&execlists->csb_status[0],
   3540 			       &execlists->csb_status[reset_value]);
   3541 }
   3542 
   3543 static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine)
   3544 {
   3545 	int x;
   3546 
   3547 	x = lrc_ring_mi_mode(engine);
   3548 	if (x != -1) {
   3549 		regs[x + 1] &= ~STOP_RING;
   3550 		regs[x + 1] |= STOP_RING << 16;
   3551 	}
   3552 }
   3553 
   3554 static void __execlists_reset_reg_state(const struct intel_context *ce,
   3555 					const struct intel_engine_cs *engine)
   3556 {
   3557 	u32 *regs = ce->lrc_reg_state;
   3558 
   3559 	__reset_stop_ring(regs, engine);
   3560 }
   3561 
   3562 static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
   3563 {
   3564 	struct intel_engine_execlists * const execlists = &engine->execlists;
   3565 	struct intel_context *ce;
   3566 	struct i915_request *rq;
   3567 	u32 head;
   3568 
   3569 	mb(); /* paranoia: read the CSB pointers from after the reset */
   3570 	clflush(execlists->csb_write);
   3571 	mb();
   3572 
   3573 	process_csb(engine); /* drain preemption events */
   3574 
   3575 	/* Following the reset, we need to reload the CSB read/write pointers */
   3576 	reset_csb_pointers(engine);
   3577 
   3578 	/*
   3579 	 * Save the currently executing context, even if we completed
   3580 	 * its request, it was still running at the time of the
   3581 	 * reset and will have been clobbered.
   3582 	 */
   3583 	rq = execlists_active(execlists);
   3584 	if (!rq)
   3585 		goto unwind;
   3586 
   3587 	/* We still have requests in-flight; the engine should be active */
   3588 	GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
   3589 
   3590 	ce = rq->context;
   3591 	GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
   3592 
   3593 	if (i915_request_completed(rq)) {
   3594 		/* Idle context; tidy up the ring so we can restart afresh */
   3595 		head = intel_ring_wrap(ce->ring, rq->tail);
   3596 		goto out_replay;
   3597 	}
   3598 
   3599 	/* Context has requests still in-flight; it should not be idle! */
   3600 	GEM_BUG_ON(i915_active_is_idle(&ce->active));
   3601 	rq = active_request(ce->timeline, rq);
   3602 	head = intel_ring_wrap(ce->ring, rq->head);
   3603 	GEM_BUG_ON(head == ce->ring->tail);
   3604 
   3605 	/*
   3606 	 * If this request hasn't started yet, e.g. it is waiting on a
   3607 	 * semaphore, we need to avoid skipping the request or else we
   3608 	 * break the signaling chain. However, if the context is corrupt
   3609 	 * the request will not restart and we will be stuck with a wedged
   3610 	 * device. It is quite often the case that if we issue a reset
   3611 	 * while the GPU is loading the context image, that the context
   3612 	 * image becomes corrupt.
   3613 	 *
   3614 	 * Otherwise, if we have not started yet, the request should replay
   3615 	 * perfectly and we do not need to flag the result as being erroneous.
   3616 	 */
   3617 	if (!i915_request_started(rq))
   3618 		goto out_replay;
   3619 
   3620 	/*
   3621 	 * If the request was innocent, we leave the request in the ELSP
   3622 	 * and will try to replay it on restarting. The context image may
   3623 	 * have been corrupted by the reset, in which case we may have
   3624 	 * to service a new GPU hang, but more likely we can continue on
   3625 	 * without impact.
   3626 	 *
   3627 	 * If the request was guilty, we presume the context is corrupt
   3628 	 * and have to at least restore the RING register in the context
   3629 	 * image back to the expected values to skip over the guilty request.
   3630 	 */
   3631 	__i915_request_reset(rq, stalled);
   3632 	if (!stalled)
   3633 		goto out_replay;
   3634 
   3635 	/*
   3636 	 * We want a simple context + ring to execute the breadcrumb update.
   3637 	 * We cannot rely on the context being intact across the GPU hang,
   3638 	 * so clear it and rebuild just what we need for the breadcrumb.
   3639 	 * All pending requests for this context will be zapped, and any
   3640 	 * future request will be after userspace has had the opportunity
   3641 	 * to recreate its own state.
   3642 	 */
   3643 	GEM_BUG_ON(!intel_context_is_pinned(ce));
   3644 	restore_default_state(ce, engine);
   3645 
   3646 out_replay:
   3647 	ENGINE_TRACE(engine, "replay {head:%04x, tail:%04x}\n",
   3648 		     head, ce->ring->tail);
   3649 	__execlists_reset_reg_state(ce, engine);
   3650 	__execlists_update_reg_state(ce, engine, head);
   3651 	ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */
   3652 
   3653 unwind:
   3654 	/* Push back any incomplete requests for replay after the reset. */
   3655 	cancel_port_requests(execlists);
   3656 	__unwind_incomplete_requests(engine);
   3657 }
   3658 
   3659 static void execlists_reset_rewind(struct intel_engine_cs *engine, bool stalled)
   3660 {
   3661 	unsigned long flags;
   3662 
   3663 	ENGINE_TRACE(engine, "\n");
   3664 
   3665 	spin_lock_irqsave(&engine->active.lock, flags);
   3666 
   3667 	__execlists_reset(engine, stalled);
   3668 
   3669 	spin_unlock_irqrestore(&engine->active.lock, flags);
   3670 }
   3671 
   3672 static void nop_submission_tasklet(unsigned long data)
   3673 {
   3674 	/* The driver is wedged; don't process any more events. */
   3675 }
   3676 
   3677 static void execlists_reset_cancel(struct intel_engine_cs *engine)
   3678 {
   3679 	struct intel_engine_execlists * const execlists = &engine->execlists;
   3680 	struct i915_request *rq, *rn;
   3681 	struct rb_node *rb;
   3682 	unsigned long flags;
   3683 
   3684 	ENGINE_TRACE(engine, "\n");
   3685 
   3686 	/*
   3687 	 * Before we call engine->cancel_requests(), we should have exclusive
   3688 	 * access to the submission state. This is arranged for us by the
   3689 	 * caller disabling the interrupt generation, the tasklet and other
   3690 	 * threads that may then access the same state, giving us a free hand
   3691 	 * to reset state. However, we still need to let lockdep be aware that
   3692 	 * we know this state may be accessed in hardirq context, so we
   3693 	 * disable the irq around this manipulation and we want to keep
   3694 	 * the spinlock focused on its duties and not accidentally conflate
   3695 	 * coverage to the submission's irq state. (Similarly, although we
   3696 	 * shouldn't need to disable irq around the manipulation of the
   3697 	 * submission's irq state, we also wish to remind ourselves that
   3698 	 * it is irq state.)
   3699 	 */
   3700 	spin_lock_irqsave(&engine->active.lock, flags);
   3701 
   3702 	__execlists_reset(engine, true);
   3703 
   3704 	/* Mark all executing requests as skipped. */
   3705 	list_for_each_entry(rq, &engine->active.requests, sched.link)
   3706 		mark_eio(rq);
   3707 
   3708 	/* Flush the queued requests to the timeline list (for retiring). */
   3709 	while ((rb = rb_first_cached(&execlists->queue))) {
   3710 		struct i915_priolist *p = to_priolist(rb);
   3711 		int i;
   3712 
   3713 		priolist_for_each_request_consume(rq, rn, p, i) {
   3714 			mark_eio(rq);
   3715 			__i915_request_submit(rq);
   3716 		}
   3717 
   3718 		rb_erase_cached(&p->node, &execlists->queue);
   3719 		i915_priolist_free(p);
   3720 	}
   3721 
   3722 	/* On-hold requests will be flushed to timeline upon their release */
   3723 	list_for_each_entry(rq, &engine->active.hold, sched.link)
   3724 		mark_eio(rq);
   3725 
   3726 	/* Cancel all attached virtual engines */
   3727 	while ((rb = rb_first_cached(&execlists->virtual))) {
   3728 		struct virtual_engine *ve =
   3729 			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
   3730 
   3731 		rb_erase_cached(rb, &execlists->virtual);
   3732 		container_of(rb, struct ve_node, rb)->inserted = false;
   3733 
   3734 		spin_lock(&ve->base.active.lock);
   3735 		rq = fetch_and_zero(&ve->request);
   3736 		if (rq) {
   3737 			mark_eio(rq);
   3738 
   3739 			rq->engine = engine;
   3740 			__i915_request_submit(rq);
   3741 			i915_request_put(rq);
   3742 
   3743 			ve->base.execlists.queue_priority_hint = INT_MIN;
   3744 		}
   3745 		spin_unlock(&ve->base.active.lock);
   3746 	}
   3747 
   3748 	/* Remaining _unready_ requests will be nop'ed when submitted */
   3749 
   3750 	execlists->queue_priority_hint = INT_MIN;
   3751 #ifdef __NetBSD__
   3752 	i915_sched_init(execlists);
   3753 	rb_tree_init(&execlists->virtual.rb_root.rbr_tree, &ve_tree_ops);
   3754 #else
   3755 	execlists->queue = RB_ROOT_CACHED;
   3756 #endif
   3757 
   3758 	GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet));
   3759 	execlists->tasklet.func = nop_submission_tasklet;
   3760 
   3761 	spin_unlock_irqrestore(&engine->active.lock, flags);
   3762 }
   3763 
   3764 static void execlists_reset_finish(struct intel_engine_cs *engine)
   3765 {
   3766 	struct intel_engine_execlists * const execlists = &engine->execlists;
   3767 
   3768 	/*
   3769 	 * After a GPU reset, we may have requests to replay. Do so now while
   3770 	 * we still have the forcewake to be sure that the GPU is not allowed
   3771 	 * to sleep before we restart and reload a context.
   3772 	 */
   3773 	GEM_BUG_ON(!reset_in_progress(execlists));
   3774 	if (!RB_EMPTY_ROOT(&execlists->queue.rb_root))
   3775 		execlists->tasklet.func(execlists->tasklet.data);
   3776 
   3777 	if (__tasklet_enable(&execlists->tasklet))
   3778 		/* And kick in case we missed a new request submission. */
   3779 		tasklet_hi_schedule(&execlists->tasklet);
   3780 	ENGINE_TRACE(engine, "depth->%d\n",
   3781 		     atomic_read(&execlists->tasklet.count));
   3782 }
   3783 
   3784 static int gen8_emit_bb_start_noarb(struct i915_request *rq,
   3785 				    u64 offset, u32 len,
   3786 				    const unsigned int flags)
   3787 {
   3788 	u32 *cs;
   3789 
   3790 	cs = intel_ring_begin(rq, 4);
   3791 	if (IS_ERR(cs))
   3792 		return PTR_ERR(cs);
   3793 
   3794 	/*
   3795 	 * WaDisableCtxRestoreArbitration:bdw,chv
   3796 	 *
   3797 	 * We don't need to perform MI_ARB_ENABLE as often as we do (in
   3798 	 * particular all the gen that do not need the w/a at all!), if we
   3799 	 * took care to make sure that on every switch into this context
   3800 	 * (both ordinary and for preemption) that arbitrartion was enabled
   3801 	 * we would be fine.  However, for gen8 there is another w/a that
   3802 	 * requires us to not preempt inside GPGPU execution, so we keep
   3803 	 * arbitration disabled for gen8 batches. Arbitration will be
   3804 	 * re-enabled before we close the request
   3805 	 * (engine->emit_fini_breadcrumb).
   3806 	 */
   3807 	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
   3808 
   3809 	/* FIXME(BDW+): Address space and security selectors. */
   3810 	*cs++ = MI_BATCH_BUFFER_START_GEN8 |
   3811 		(flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
   3812 	*cs++ = lower_32_bits(offset);
   3813 	*cs++ = upper_32_bits(offset);
   3814 
   3815 	intel_ring_advance(rq, cs);
   3816 
   3817 	return 0;
   3818 }
   3819 
   3820 static int gen8_emit_bb_start(struct i915_request *rq,
   3821 			      u64 offset, u32 len,
   3822 			      const unsigned int flags)
   3823 {
   3824 	u32 *cs;
   3825 
   3826 	cs = intel_ring_begin(rq, 6);
   3827 	if (IS_ERR(cs))
   3828 		return PTR_ERR(cs);
   3829 
   3830 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
   3831 
   3832 	*cs++ = MI_BATCH_BUFFER_START_GEN8 |
   3833 		(flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
   3834 	*cs++ = lower_32_bits(offset);
   3835 	*cs++ = upper_32_bits(offset);
   3836 
   3837 	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
   3838 	*cs++ = MI_NOOP;
   3839 
   3840 	intel_ring_advance(rq, cs);
   3841 
   3842 	return 0;
   3843 }
   3844 
   3845 static void gen8_logical_ring_enable_irq(struct intel_engine_cs *engine)
   3846 {
   3847 	ENGINE_WRITE(engine, RING_IMR,
   3848 		     ~(engine->irq_enable_mask | engine->irq_keep_mask));
   3849 	ENGINE_POSTING_READ(engine, RING_IMR);
   3850 }
   3851 
   3852 static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine)
   3853 {
   3854 	ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask);
   3855 }
   3856 
   3857 static int gen8_emit_flush(struct i915_request *request, u32 mode)
   3858 {
   3859 	u32 cmd, *cs;
   3860 
   3861 	cs = intel_ring_begin(request, 4);
   3862 	if (IS_ERR(cs))
   3863 		return PTR_ERR(cs);
   3864 
   3865 	cmd = MI_FLUSH_DW + 1;
   3866 
   3867 	/* We always require a command barrier so that subsequent
   3868 	 * commands, such as breadcrumb interrupts, are strictly ordered
   3869 	 * wrt the contents of the write cache being flushed to memory
   3870 	 * (and thus being coherent from the CPU).
   3871 	 */
   3872 	cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
   3873 
   3874 	if (mode & EMIT_INVALIDATE) {
   3875 		cmd |= MI_INVALIDATE_TLB;
   3876 		if (request->engine->class == VIDEO_DECODE_CLASS)
   3877 			cmd |= MI_INVALIDATE_BSD;
   3878 	}
   3879 
   3880 	*cs++ = cmd;
   3881 	*cs++ = LRC_PPHWSP_SCRATCH_ADDR;
   3882 	*cs++ = 0; /* upper addr */
   3883 	*cs++ = 0; /* value */
   3884 	intel_ring_advance(request, cs);
   3885 
   3886 	return 0;
   3887 }
   3888 
   3889 static int gen8_emit_flush_render(struct i915_request *request,
   3890 				  u32 mode)
   3891 {
   3892 	bool vf_flush_wa = false, dc_flush_wa = false;
   3893 	u32 *cs, flags = 0;
   3894 	int len;
   3895 
   3896 	flags |= PIPE_CONTROL_CS_STALL;
   3897 
   3898 	if (mode & EMIT_FLUSH) {
   3899 		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
   3900 		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
   3901 		flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
   3902 		flags |= PIPE_CONTROL_FLUSH_ENABLE;
   3903 	}
   3904 
   3905 	if (mode & EMIT_INVALIDATE) {
   3906 		flags |= PIPE_CONTROL_TLB_INVALIDATE;
   3907 		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
   3908 		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
   3909 		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
   3910 		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
   3911 		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
   3912 		flags |= PIPE_CONTROL_QW_WRITE;
   3913 		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
   3914 
   3915 		/*
   3916 		 * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL
   3917 		 * pipe control.
   3918 		 */
   3919 		if (IS_GEN(request->i915, 9))
   3920 			vf_flush_wa = true;
   3921 
   3922 		/* WaForGAMHang:kbl */
   3923 		if (IS_KBL_REVID(request->i915, 0, KBL_REVID_B0))
   3924 			dc_flush_wa = true;
   3925 	}
   3926 
   3927 	len = 6;
   3928 
   3929 	if (vf_flush_wa)
   3930 		len += 6;
   3931 
   3932 	if (dc_flush_wa)
   3933 		len += 12;
   3934 
   3935 	cs = intel_ring_begin(request, len);
   3936 	if (IS_ERR(cs))
   3937 		return PTR_ERR(cs);
   3938 
   3939 	if (vf_flush_wa)
   3940 		cs = gen8_emit_pipe_control(cs, 0, 0);
   3941 
   3942 	if (dc_flush_wa)
   3943 		cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE,
   3944 					    0);
   3945 
   3946 	cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
   3947 
   3948 	if (dc_flush_wa)
   3949 		cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0);
   3950 
   3951 	intel_ring_advance(request, cs);
   3952 
   3953 	return 0;
   3954 }
   3955 
   3956 static int gen11_emit_flush_render(struct i915_request *request,
   3957 				   u32 mode)
   3958 {
   3959 	if (mode & EMIT_FLUSH) {
   3960 		u32 *cs;
   3961 		u32 flags = 0;
   3962 
   3963 		flags |= PIPE_CONTROL_CS_STALL;
   3964 
   3965 		flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
   3966 		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
   3967 		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
   3968 		flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
   3969 		flags |= PIPE_CONTROL_FLUSH_ENABLE;
   3970 		flags |= PIPE_CONTROL_QW_WRITE;
   3971 		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
   3972 
   3973 		cs = intel_ring_begin(request, 6);
   3974 		if (IS_ERR(cs))
   3975 			return PTR_ERR(cs);
   3976 
   3977 		cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
   3978 		intel_ring_advance(request, cs);
   3979 	}
   3980 
   3981 	if (mode & EMIT_INVALIDATE) {
   3982 		u32 *cs;
   3983 		u32 flags = 0;
   3984 
   3985 		flags |= PIPE_CONTROL_CS_STALL;
   3986 
   3987 		flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
   3988 		flags |= PIPE_CONTROL_TLB_INVALIDATE;
   3989 		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
   3990 		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
   3991 		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
   3992 		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
   3993 		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
   3994 		flags |= PIPE_CONTROL_QW_WRITE;
   3995 		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
   3996 
   3997 		cs = intel_ring_begin(request, 6);
   3998 		if (IS_ERR(cs))
   3999 			return PTR_ERR(cs);
   4000 
   4001 		cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
   4002 		intel_ring_advance(request, cs);
   4003 	}
   4004 
   4005 	return 0;
   4006 }
   4007 
   4008 static u32 preparser_disable(bool state)
   4009 {
   4010 	return MI_ARB_CHECK | 1 << 8 | state;
   4011 }
   4012 
   4013 static int gen12_emit_flush_render(struct i915_request *request,
   4014 				   u32 mode)
   4015 {
   4016 	if (mode & EMIT_FLUSH) {
   4017 		u32 flags = 0;
   4018 		u32 *cs;
   4019 
   4020 		flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
   4021 		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
   4022 		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
   4023 		/* Wa_1409600907:tgl */
   4024 		flags |= PIPE_CONTROL_DEPTH_STALL;
   4025 		flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
   4026 		flags |= PIPE_CONTROL_FLUSH_ENABLE;
   4027 		flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH;
   4028 
   4029 		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
   4030 		flags |= PIPE_CONTROL_QW_WRITE;
   4031 
   4032 		flags |= PIPE_CONTROL_CS_STALL;
   4033 
   4034 		cs = intel_ring_begin(request, 6);
   4035 		if (IS_ERR(cs))
   4036 			return PTR_ERR(cs);
   4037 
   4038 		cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
   4039 		intel_ring_advance(request, cs);
   4040 	}
   4041 
   4042 	if (mode & EMIT_INVALIDATE) {
   4043 		u32 flags = 0;
   4044 		u32 *cs;
   4045 
   4046 		flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
   4047 		flags |= PIPE_CONTROL_TLB_INVALIDATE;
   4048 		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
   4049 		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
   4050 		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
   4051 		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
   4052 		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
   4053 		flags |= PIPE_CONTROL_L3_RO_CACHE_INVALIDATE;
   4054 
   4055 		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
   4056 		flags |= PIPE_CONTROL_QW_WRITE;
   4057 
   4058 		flags |= PIPE_CONTROL_CS_STALL;
   4059 
   4060 		cs = intel_ring_begin(request, 8);
   4061 		if (IS_ERR(cs))
   4062 			return PTR_ERR(cs);
   4063 
   4064 		/*
   4065 		 * Prevent the pre-parser from skipping past the TLB
   4066 		 * invalidate and loading a stale page for the batch
   4067 		 * buffer / request payload.
   4068 		 */
   4069 		*cs++ = preparser_disable(true);
   4070 
   4071 		cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
   4072 
   4073 		*cs++ = preparser_disable(false);
   4074 		intel_ring_advance(request, cs);
   4075 
   4076 		/*
   4077 		 * Wa_1604544889:tgl
   4078 		 */
   4079 		if (IS_TGL_REVID(request->i915, TGL_REVID_A0, TGL_REVID_A0)) {
   4080 			flags = 0;
   4081 			flags |= PIPE_CONTROL_CS_STALL;
   4082 			flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH;
   4083 
   4084 			flags |= PIPE_CONTROL_STORE_DATA_INDEX;
   4085 			flags |= PIPE_CONTROL_QW_WRITE;
   4086 
   4087 			cs = intel_ring_begin(request, 6);
   4088 			if (IS_ERR(cs))
   4089 				return PTR_ERR(cs);
   4090 
   4091 			cs = gen8_emit_pipe_control(cs, flags,
   4092 						    LRC_PPHWSP_SCRATCH_ADDR);
   4093 			intel_ring_advance(request, cs);
   4094 		}
   4095 	}
   4096 
   4097 	return 0;
   4098 }
   4099 
   4100 /*
   4101  * Reserve space for 2 NOOPs at the end of each request to be
   4102  * used as a workaround for not being allowed to do lite
   4103  * restore with HEAD==TAIL (WaIdleLiteRestore).
   4104  */
   4105 static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
   4106 {
   4107 	/* Ensure there's always at least one preemption point per-request. */
   4108 	*cs++ = MI_ARB_CHECK;
   4109 	*cs++ = MI_NOOP;
   4110 	request->wa_tail = intel_ring_offset(request, cs);
   4111 
   4112 	return cs;
   4113 }
   4114 
   4115 static u32 *emit_preempt_busywait(struct i915_request *request, u32 *cs)
   4116 {
   4117 	*cs++ = MI_SEMAPHORE_WAIT |
   4118 		MI_SEMAPHORE_GLOBAL_GTT |
   4119 		MI_SEMAPHORE_POLL |
   4120 		MI_SEMAPHORE_SAD_EQ_SDD;
   4121 	*cs++ = 0;
   4122 	*cs++ = intel_hws_preempt_address(request->engine);
   4123 	*cs++ = 0;
   4124 
   4125 	return cs;
   4126 }
   4127 
   4128 static __always_inline u32*
   4129 gen8_emit_fini_breadcrumb_footer(struct i915_request *request,
   4130 				 u32 *cs)
   4131 {
   4132 	*cs++ = MI_USER_INTERRUPT;
   4133 
   4134 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
   4135 	if (intel_engine_has_semaphores(request->engine))
   4136 		cs = emit_preempt_busywait(request, cs);
   4137 
   4138 	request->tail = intel_ring_offset(request, cs);
   4139 	assert_ring_tail_valid(request->ring, request->tail);
   4140 
   4141 	return gen8_emit_wa_tail(request, cs);
   4142 }
   4143 
   4144 static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
   4145 {
   4146 	cs = gen8_emit_ggtt_write(cs,
   4147 				  request->fence.seqno,
   4148 				  i915_request_active_timeline(request)->hwsp_offset,
   4149 				  0);
   4150 
   4151 	return gen8_emit_fini_breadcrumb_footer(request, cs);
   4152 }
   4153 
   4154 static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
   4155 {
   4156 	cs = gen8_emit_pipe_control(cs,
   4157 				    PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
   4158 				    PIPE_CONTROL_DEPTH_CACHE_FLUSH |
   4159 				    PIPE_CONTROL_DC_FLUSH_ENABLE,
   4160 				    0);
   4161 
   4162 	/* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
   4163 	cs = gen8_emit_ggtt_write_rcs(cs,
   4164 				      request->fence.seqno,
   4165 				      i915_request_active_timeline(request)->hwsp_offset,
   4166 				      PIPE_CONTROL_FLUSH_ENABLE |
   4167 				      PIPE_CONTROL_CS_STALL);
   4168 
   4169 	return gen8_emit_fini_breadcrumb_footer(request, cs);
   4170 }
   4171 
   4172 static u32 *
   4173 gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
   4174 {
   4175 	cs = gen8_emit_ggtt_write_rcs(cs,
   4176 				      request->fence.seqno,
   4177 				      i915_request_active_timeline(request)->hwsp_offset,
   4178 				      PIPE_CONTROL_CS_STALL |
   4179 				      PIPE_CONTROL_TILE_CACHE_FLUSH |
   4180 				      PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
   4181 				      PIPE_CONTROL_DEPTH_CACHE_FLUSH |
   4182 				      PIPE_CONTROL_DC_FLUSH_ENABLE |
   4183 				      PIPE_CONTROL_FLUSH_ENABLE);
   4184 
   4185 	return gen8_emit_fini_breadcrumb_footer(request, cs);
   4186 }
   4187 
   4188 /*
   4189  * Note that the CS instruction pre-parser will not stall on the breadcrumb
   4190  * flush and will continue pre-fetching the instructions after it before the
   4191  * memory sync is completed. On pre-gen12 HW, the pre-parser will stop at
   4192  * BB_START/END instructions, so, even though we might pre-fetch the pre-amble
   4193  * of the next request before the memory has been flushed, we're guaranteed that
   4194  * we won't access the batch itself too early.
   4195  * However, on gen12+ the parser can pre-fetch across the BB_START/END commands,
   4196  * so, if the current request is modifying an instruction in the next request on
   4197  * the same intel_context, we might pre-fetch and then execute the pre-update
   4198  * instruction. To avoid this, the users of self-modifying code should either
   4199  * disable the parser around the code emitting the memory writes, via a new flag
   4200  * added to MI_ARB_CHECK, or emit the writes from a different intel_context. For
   4201  * the in-kernel use-cases we've opted to use a separate context, see
   4202  * reloc_gpu() as an example.
   4203  * All the above applies only to the instructions themselves. Non-inline data
   4204  * used by the instructions is not pre-fetched.
   4205  */
   4206 
   4207 static u32 *gen12_emit_preempt_busywait(struct i915_request *request, u32 *cs)
   4208 {
   4209 	*cs++ = MI_SEMAPHORE_WAIT_TOKEN |
   4210 		MI_SEMAPHORE_GLOBAL_GTT |
   4211 		MI_SEMAPHORE_POLL |
   4212 		MI_SEMAPHORE_SAD_EQ_SDD;
   4213 	*cs++ = 0;
   4214 	*cs++ = intel_hws_preempt_address(request->engine);
   4215 	*cs++ = 0;
   4216 	*cs++ = 0;
   4217 	*cs++ = MI_NOOP;
   4218 
   4219 	return cs;
   4220 }
   4221 
   4222 static __always_inline u32*
   4223 gen12_emit_fini_breadcrumb_footer(struct i915_request *request, u32 *cs)
   4224 {
   4225 	*cs++ = MI_USER_INTERRUPT;
   4226 
   4227 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
   4228 	if (intel_engine_has_semaphores(request->engine))
   4229 		cs = gen12_emit_preempt_busywait(request, cs);
   4230 
   4231 	request->tail = intel_ring_offset(request, cs);
   4232 	assert_ring_tail_valid(request->ring, request->tail);
   4233 
   4234 	return gen8_emit_wa_tail(request, cs);
   4235 }
   4236 
   4237 static u32 *gen12_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
   4238 {
   4239 	cs = gen8_emit_ggtt_write(cs,
   4240 				  request->fence.seqno,
   4241 				  i915_request_active_timeline(request)->hwsp_offset,
   4242 				  0);
   4243 
   4244 	return gen12_emit_fini_breadcrumb_footer(request, cs);
   4245 }
   4246 
   4247 static u32 *
   4248 gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
   4249 {
   4250 	cs = gen8_emit_ggtt_write_rcs(cs,
   4251 				      request->fence.seqno,
   4252 				      i915_request_active_timeline(request)->hwsp_offset,
   4253 				      PIPE_CONTROL_CS_STALL |
   4254 				      PIPE_CONTROL_TILE_CACHE_FLUSH |
   4255 				      PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
   4256 				      PIPE_CONTROL_DEPTH_CACHE_FLUSH |
   4257 				      /* Wa_1409600907:tgl */
   4258 				      PIPE_CONTROL_DEPTH_STALL |
   4259 				      PIPE_CONTROL_DC_FLUSH_ENABLE |
   4260 				      PIPE_CONTROL_FLUSH_ENABLE |
   4261 				      PIPE_CONTROL_HDC_PIPELINE_FLUSH);
   4262 
   4263 	return gen12_emit_fini_breadcrumb_footer(request, cs);
   4264 }
   4265 
   4266 static void execlists_park(struct intel_engine_cs *engine)
   4267 {
   4268 	cancel_timer(&engine->execlists.timer);
   4269 	cancel_timer(&engine->execlists.preempt);
   4270 }
   4271 
   4272 void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
   4273 {
   4274 	engine->submit_request = execlists_submit_request;
   4275 	engine->schedule = i915_schedule;
   4276 	engine->execlists.tasklet.func = execlists_submission_tasklet;
   4277 
   4278 	engine->reset.prepare = execlists_reset_prepare;
   4279 	engine->reset.rewind = execlists_reset_rewind;
   4280 	engine->reset.cancel = execlists_reset_cancel;
   4281 	engine->reset.finish = execlists_reset_finish;
   4282 
   4283 	engine->park = execlists_park;
   4284 	engine->unpark = NULL;
   4285 
   4286 	engine->flags |= I915_ENGINE_SUPPORTS_STATS;
   4287 	if (!intel_vgpu_active(engine->i915)) {
   4288 		engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
   4289 		if (HAS_LOGICAL_RING_PREEMPTION(engine->i915))
   4290 			engine->flags |= I915_ENGINE_HAS_PREEMPTION;
   4291 	}
   4292 
   4293 	if (INTEL_GEN(engine->i915) >= 12)
   4294 		engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO;
   4295 
   4296 	if (intel_engine_has_preemption(engine))
   4297 		engine->emit_bb_start = gen8_emit_bb_start;
   4298 	else
   4299 		engine->emit_bb_start = gen8_emit_bb_start_noarb;
   4300 }
   4301 
   4302 static void execlists_shutdown(struct intel_engine_cs *engine)
   4303 {
   4304 	/* Synchronise with residual timers and any softirq they raise */
   4305 	del_timer_sync(&engine->execlists.timer);
   4306 	del_timer_sync(&engine->execlists.preempt);
   4307 	tasklet_kill(&engine->execlists.tasklet);
   4308 }
   4309 
   4310 static void execlists_release(struct intel_engine_cs *engine)
   4311 {
   4312 	execlists_shutdown(engine);
   4313 
   4314 	intel_engine_cleanup_common(engine);
   4315 	lrc_destroy_wa_ctx(engine);
   4316 }
   4317 
   4318 static void
   4319 logical_ring_default_vfuncs(struct intel_engine_cs *engine)
   4320 {
   4321 	/* Default vfuncs which can be overriden by each engine. */
   4322 
   4323 	engine->resume = execlists_resume;
   4324 
   4325 	engine->cops = &execlists_context_ops;
   4326 	engine->request_alloc = execlists_request_alloc;
   4327 
   4328 	engine->emit_flush = gen8_emit_flush;
   4329 	engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
   4330 	engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb;
   4331 	if (INTEL_GEN(engine->i915) >= 12)
   4332 		engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb;
   4333 
   4334 	engine->set_default_submission = intel_execlists_set_default_submission;
   4335 
   4336 	if (INTEL_GEN(engine->i915) < 11) {
   4337 		engine->irq_enable = gen8_logical_ring_enable_irq;
   4338 		engine->irq_disable = gen8_logical_ring_disable_irq;
   4339 	} else {
   4340 		/*
   4341 		 * TODO: On Gen11 interrupt masks need to be clear
   4342 		 * to allow C6 entry. Keep interrupts enabled at
   4343 		 * and take the hit of generating extra interrupts
   4344 		 * until a more refined solution exists.
   4345 		 */
   4346 	}
   4347 }
   4348 
   4349 static inline void
   4350 logical_ring_default_irqs(struct intel_engine_cs *engine)
   4351 {
   4352 	unsigned int shift = 0;
   4353 
   4354 	if (INTEL_GEN(engine->i915) < 11) {
   4355 		const u8 irq_shifts[] = {
   4356 			[RCS0]  = GEN8_RCS_IRQ_SHIFT,
   4357 			[BCS0]  = GEN8_BCS_IRQ_SHIFT,
   4358 			[VCS0]  = GEN8_VCS0_IRQ_SHIFT,
   4359 			[VCS1]  = GEN8_VCS1_IRQ_SHIFT,
   4360 			[VECS0] = GEN8_VECS_IRQ_SHIFT,
   4361 		};
   4362 
   4363 		shift = irq_shifts[engine->id];
   4364 	}
   4365 
   4366 	engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift;
   4367 	engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
   4368 }
   4369 
   4370 static void rcs_submission_override(struct intel_engine_cs *engine)
   4371 {
   4372 	switch (INTEL_GEN(engine->i915)) {
   4373 	case 12:
   4374 		engine->emit_flush = gen12_emit_flush_render;
   4375 		engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs;
   4376 		break;
   4377 	case 11:
   4378 		engine->emit_flush = gen11_emit_flush_render;
   4379 		engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs;
   4380 		break;
   4381 	default:
   4382 		engine->emit_flush = gen8_emit_flush_render;
   4383 		engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
   4384 		break;
   4385 	}
   4386 }
   4387 
   4388 int intel_execlists_submission_setup(struct intel_engine_cs *engine)
   4389 {
   4390 	struct intel_engine_execlists * const execlists = &engine->execlists;
   4391 	struct drm_i915_private *i915 = engine->i915;
   4392 	struct intel_uncore *uncore = engine->uncore;
   4393 	u32 base = engine->mmio_base;
   4394 
   4395 	i915_sched_init(&engine->execlists);
   4396 
   4397 	tasklet_init(&engine->execlists.tasklet,
   4398 		     execlists_submission_tasklet, (unsigned long)engine);
   4399 	timer_setup(&engine->execlists.timer, execlists_timeslice, 0);
   4400 	timer_setup(&engine->execlists.preempt, execlists_preempt, 0);
   4401 
   4402 	logical_ring_default_vfuncs(engine);
   4403 	logical_ring_default_irqs(engine);
   4404 
   4405 	if (engine->class == RENDER_CLASS)
   4406 		rcs_submission_override(engine);
   4407 
   4408 	if (intel_init_workaround_bb(engine))
   4409 		/*
   4410 		 * We continue even if we fail to initialize WA batch
   4411 		 * because we only expect rare glitches but nothing
   4412 		 * critical to prevent us from using GPU
   4413 		 */
   4414 		DRM_ERROR("WA batch buffer initialization failed\n");
   4415 
   4416 	if (HAS_LOGICAL_RING_ELSQ(i915)) {
   4417 #ifdef __NetBSD__
   4418 		execlists->submit_reg = i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(base));
   4419 		execlists->ctrl_reg = i915_mmio_reg_offset(RING_EXECLIST_CONTROL(base));
   4420 		execlists->bsh = uncore->regs_bsh;
   4421 		execlists->bst = uncore->regs_bst;
   4422 #else
   4423 		execlists->submit_reg = uncore->regs +
   4424 			i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(base));
   4425 		execlists->ctrl_reg = uncore->regs +
   4426 			i915_mmio_reg_offset(RING_EXECLIST_CONTROL(base));
   4427 #endif
   4428 	} else {
   4429 #ifdef __NetBSD__
   4430 		execlists->submit_reg = i915_mmio_reg_offset(RING_ELSP(base));
   4431 		execlists->bsh = uncore->regs_bsh;
   4432 		execlists->bst = uncore->regs_bst;
   4433 #else
   4434 		execlists->submit_reg = uncore->regs +
   4435 			i915_mmio_reg_offset(RING_ELSP(base));
   4436 #endif
   4437 	}
   4438 
   4439 	execlists->csb_status =
   4440 		&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
   4441 
   4442 	execlists->csb_write =
   4443 		&engine->status_page.addr[intel_hws_csb_write_index(i915)];
   4444 
   4445 	if (INTEL_GEN(i915) < 11)
   4446 		execlists->csb_size = GEN8_CSB_ENTRIES;
   4447 	else
   4448 		execlists->csb_size = GEN11_CSB_ENTRIES;
   4449 
   4450 	reset_csb_pointers(engine);
   4451 
   4452 	/* Finally, take ownership and responsibility for cleanup! */
   4453 	engine->release = execlists_release;
   4454 
   4455 	return 0;
   4456 }
   4457 
   4458 static u32 intel_lr_indirect_ctx_offset(const struct intel_engine_cs *engine)
   4459 {
   4460 	u32 indirect_ctx_offset;
   4461 
   4462 	switch (INTEL_GEN(engine->i915)) {
   4463 	default:
   4464 		MISSING_CASE(INTEL_GEN(engine->i915));
   4465 		/* fall through */
   4466 	case 12:
   4467 		indirect_ctx_offset =
   4468 			GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
   4469 		break;
   4470 	case 11:
   4471 		indirect_ctx_offset =
   4472 			GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
   4473 		break;
   4474 	case 10:
   4475 		indirect_ctx_offset =
   4476 			GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
   4477 		break;
   4478 	case 9:
   4479 		indirect_ctx_offset =
   4480 			GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
   4481 		break;
   4482 	case 8:
   4483 		indirect_ctx_offset =
   4484 			GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
   4485 		break;
   4486 	}
   4487 
   4488 	return indirect_ctx_offset;
   4489 }
   4490 
   4491 
   4492 static void init_common_reg_state(u32 * const regs,
   4493 				  const struct intel_engine_cs *engine,
   4494 				  const struct intel_ring *ring,
   4495 				  bool inhibit)
   4496 {
   4497 	u32 ctl;
   4498 
   4499 	ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
   4500 	ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
   4501 	if (inhibit)
   4502 		ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
   4503 	if (INTEL_GEN(engine->i915) < 11)
   4504 		ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
   4505 					   CTX_CTRL_RS_CTX_ENABLE);
   4506 	regs[CTX_CONTEXT_CONTROL] = ctl;
   4507 
   4508 	regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
   4509 }
   4510 
   4511 static void init_wa_bb_reg_state(u32 * const regs,
   4512 				 const struct intel_engine_cs *engine,
   4513 				 u32 pos_bb_per_ctx)
   4514 {
   4515 	const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
   4516 
   4517 	if (wa_ctx->per_ctx.size) {
   4518 		const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
   4519 
   4520 		regs[pos_bb_per_ctx] =
   4521 			(ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
   4522 	}
   4523 
   4524 	if (wa_ctx->indirect_ctx.size) {
   4525 		const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
   4526 
   4527 		regs[pos_bb_per_ctx + 2] =
   4528 			(ggtt_offset + wa_ctx->indirect_ctx.offset) |
   4529 			(wa_ctx->indirect_ctx.size / CACHELINE_BYTES);
   4530 
   4531 		regs[pos_bb_per_ctx + 4] =
   4532 			intel_lr_indirect_ctx_offset(engine) << 6;
   4533 	}
   4534 }
   4535 
   4536 static void init_ppgtt_reg_state(u32 *regs, const struct i915_ppgtt *ppgtt)
   4537 {
   4538 	if (i915_vm_is_4lvl(&ppgtt->vm)) {
   4539 		/* 64b PPGTT (48bit canonical)
   4540 		 * PDP0_DESCRIPTOR contains the base address to PML4 and
   4541 		 * other PDP Descriptors are ignored.
   4542 		 */
   4543 		ASSIGN_CTX_PML4(ppgtt, regs);
   4544 	} else {
   4545 		ASSIGN_CTX_PDP(ppgtt, regs, 3);
   4546 		ASSIGN_CTX_PDP(ppgtt, regs, 2);
   4547 		ASSIGN_CTX_PDP(ppgtt, regs, 1);
   4548 		ASSIGN_CTX_PDP(ppgtt, regs, 0);
   4549 	}
   4550 }
   4551 
   4552 static struct i915_ppgtt *vm_alias(struct i915_address_space *vm)
   4553 {
   4554 	if (i915_is_ggtt(vm))
   4555 		return i915_vm_to_ggtt(vm)->alias;
   4556 	else
   4557 		return i915_vm_to_ppgtt(vm);
   4558 }
   4559 
   4560 static void execlists_init_reg_state(u32 *regs,
   4561 				     const struct intel_context *ce,
   4562 				     const struct intel_engine_cs *engine,
   4563 				     const struct intel_ring *ring,
   4564 				     bool inhibit)
   4565 {
   4566 	/*
   4567 	 * A context is actually a big batch buffer with several
   4568 	 * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
   4569 	 * values we are setting here are only for the first context restore:
   4570 	 * on a subsequent save, the GPU will recreate this batchbuffer with new
   4571 	 * values (including all the missing MI_LOAD_REGISTER_IMM commands that
   4572 	 * we are not initializing here).
   4573 	 *
   4574 	 * Must keep consistent with virtual_update_register_offsets().
   4575 	 */
   4576 	set_offsets(regs, reg_offsets(engine), engine, inhibit);
   4577 
   4578 	init_common_reg_state(regs, engine, ring, inhibit);
   4579 	init_ppgtt_reg_state(regs, vm_alias(ce->vm));
   4580 
   4581 	init_wa_bb_reg_state(regs, engine,
   4582 			     INTEL_GEN(engine->i915) >= 12 ?
   4583 			     GEN12_CTX_BB_PER_CTX_PTR :
   4584 			     CTX_BB_PER_CTX_PTR);
   4585 
   4586 	__reset_stop_ring(regs, engine);
   4587 }
   4588 
   4589 static int
   4590 populate_lr_context(struct intel_context *ce,
   4591 		    struct drm_i915_gem_object *ctx_obj,
   4592 		    struct intel_engine_cs *engine,
   4593 		    struct intel_ring *ring)
   4594 {
   4595 	bool inhibit = true;
   4596 	void *vaddr;
   4597 	int ret;
   4598 
   4599 	vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
   4600 	if (IS_ERR(vaddr)) {
   4601 		ret = PTR_ERR(vaddr);
   4602 		DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret);
   4603 		return ret;
   4604 	}
   4605 
   4606 	set_redzone(vaddr, engine);
   4607 
   4608 	if (engine->default_state) {
   4609 		void *defaults;
   4610 
   4611 		defaults = i915_gem_object_pin_map(engine->default_state,
   4612 						   I915_MAP_WB);
   4613 		if (IS_ERR(defaults)) {
   4614 			ret = PTR_ERR(defaults);
   4615 			goto err_unpin_ctx;
   4616 		}
   4617 
   4618 		memcpy(vaddr, defaults, engine->context_size);
   4619 		i915_gem_object_unpin_map(engine->default_state);
   4620 		__set_bit(CONTEXT_VALID_BIT, &ce->flags);
   4621 		inhibit = false;
   4622 	}
   4623 
   4624 	/* The second page of the context object contains some fields which must
   4625 	 * be set up prior to the first execution. */
   4626 	execlists_init_reg_state(vaddr + LRC_STATE_PN * PAGE_SIZE,
   4627 				 ce, engine, ring, inhibit);
   4628 
   4629 	ret = 0;
   4630 err_unpin_ctx:
   4631 	__i915_gem_object_flush_map(ctx_obj, 0, engine->context_size);
   4632 	i915_gem_object_unpin_map(ctx_obj);
   4633 	return ret;
   4634 }
   4635 
   4636 static int __execlists_context_alloc(struct intel_context *ce,
   4637 				     struct intel_engine_cs *engine)
   4638 {
   4639 	struct drm_i915_gem_object *ctx_obj;
   4640 	struct intel_ring *ring;
   4641 	struct i915_vma *vma;
   4642 	u32 context_size;
   4643 	int ret;
   4644 
   4645 	GEM_BUG_ON(ce->state);
   4646 	context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
   4647 
   4648 	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
   4649 		context_size += I915_GTT_PAGE_SIZE; /* for redzone */
   4650 
   4651 	ctx_obj = i915_gem_object_create_shmem(engine->i915, context_size);
   4652 	if (IS_ERR(ctx_obj))
   4653 		return PTR_ERR(ctx_obj);
   4654 
   4655 	vma = i915_vma_instance(ctx_obj, &engine->gt->ggtt->vm, NULL);
   4656 	if (IS_ERR(vma)) {
   4657 		ret = PTR_ERR(vma);
   4658 		goto error_deref_obj;
   4659 	}
   4660 
   4661 	if (!ce->timeline) {
   4662 		struct intel_timeline *tl;
   4663 
   4664 		tl = intel_timeline_create(engine->gt, NULL);
   4665 		if (IS_ERR(tl)) {
   4666 			ret = PTR_ERR(tl);
   4667 			goto error_deref_obj;
   4668 		}
   4669 
   4670 		ce->timeline = tl;
   4671 	}
   4672 
   4673 	ring = intel_engine_create_ring(engine, (unsigned long)ce->ring);
   4674 	if (IS_ERR(ring)) {
   4675 		ret = PTR_ERR(ring);
   4676 		goto error_deref_obj;
   4677 	}
   4678 
   4679 	ret = populate_lr_context(ce, ctx_obj, engine, ring);
   4680 	if (ret) {
   4681 		DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret);
   4682 		goto error_ring_free;
   4683 	}
   4684 
   4685 	ce->ring = ring;
   4686 	ce->state = vma;
   4687 
   4688 	return 0;
   4689 
   4690 error_ring_free:
   4691 	intel_ring_put(ring);
   4692 error_deref_obj:
   4693 	i915_gem_object_put(ctx_obj);
   4694 	return ret;
   4695 }
   4696 
   4697 static struct list_head *virtual_queue(struct virtual_engine *ve)
   4698 {
   4699 	return &ve->base.execlists.default_priolist.requests[0];
   4700 }
   4701 
   4702 static void virtual_context_destroy(struct kref *kref)
   4703 {
   4704 	struct virtual_engine *ve =
   4705 		container_of(kref, typeof(*ve), context.ref);
   4706 	unsigned int n;
   4707 
   4708 	GEM_BUG_ON(!list_empty(virtual_queue(ve)));
   4709 	GEM_BUG_ON(ve->request);
   4710 	GEM_BUG_ON(ve->context.inflight);
   4711 
   4712 	for (n = 0; n < ve->num_siblings; n++) {
   4713 		struct intel_engine_cs *sibling = ve->siblings[n];
   4714 		struct rb_node *node = &ve->nodes[sibling->id].rb;
   4715 		unsigned long flags;
   4716 
   4717 		if (!ve->nodes[sibling->id].inserted)
   4718 			continue;
   4719 
   4720 		spin_lock_irqsave(&sibling->active.lock, flags);
   4721 
   4722 		/* Detachment is lazily performed in the execlists tasklet */
   4723 		if (ve->nodes[sibling->id].inserted) {
   4724 			rb_erase_cached(node, &sibling->execlists.virtual);
   4725 			ve->nodes[sibling->id].inserted = false;
   4726 		}
   4727 
   4728 		spin_unlock_irqrestore(&sibling->active.lock, flags);
   4729 	}
   4730 	GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
   4731 
   4732 	if (ve->context.state)
   4733 		__execlists_context_fini(&ve->context);
   4734 	intel_context_fini(&ve->context);
   4735 
   4736 	intel_engine_fini_breadcrumbs(&ve->base);
   4737 	spin_lock_destroy(&ve->base.active.lock);
   4738 
   4739 	kfree(ve->bonds);
   4740 	kfree(ve);
   4741 }
   4742 
   4743 static void virtual_engine_initial_hint(struct virtual_engine *ve)
   4744 {
   4745 	int swp;
   4746 
   4747 	/*
   4748 	 * Pick a random sibling on starting to help spread the load around.
   4749 	 *
   4750 	 * New contexts are typically created with exactly the same order
   4751 	 * of siblings, and often started in batches. Due to the way we iterate
   4752 	 * the array of sibling when submitting requests, sibling[0] is
   4753 	 * prioritised for dequeuing. If we make sure that sibling[0] is fairly
   4754 	 * randomised across the system, we also help spread the load by the
   4755 	 * first engine we inspect being different each time.
   4756 	 *
   4757 	 * NB This does not force us to execute on this engine, it will just
   4758 	 * typically be the first we inspect for submission.
   4759 	 */
   4760 	swp = prandom_u32_max(ve->num_siblings);
   4761 	if (!swp)
   4762 		return;
   4763 
   4764 	swap(ve->siblings[swp], ve->siblings[0]);
   4765 	if (!intel_engine_has_relative_mmio(ve->siblings[0]))
   4766 		virtual_update_register_offsets(ve->context.lrc_reg_state,
   4767 						ve->siblings[0]);
   4768 }
   4769 
   4770 static int virtual_context_alloc(struct intel_context *ce)
   4771 {
   4772 	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
   4773 
   4774 	return __execlists_context_alloc(ce, ve->siblings[0]);
   4775 }
   4776 
   4777 static int virtual_context_pin(struct intel_context *ce)
   4778 {
   4779 	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
   4780 	int err;
   4781 
   4782 	/* Note: we must use a real engine class for setting up reg state */
   4783 	err = __execlists_context_pin(ce, ve->siblings[0]);
   4784 	if (err)
   4785 		return err;
   4786 
   4787 	virtual_engine_initial_hint(ve);
   4788 	return 0;
   4789 }
   4790 
   4791 static void virtual_context_enter(struct intel_context *ce)
   4792 {
   4793 	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
   4794 	unsigned int n;
   4795 
   4796 	for (n = 0; n < ve->num_siblings; n++)
   4797 		intel_engine_pm_get(ve->siblings[n]);
   4798 
   4799 	intel_timeline_enter(ce->timeline);
   4800 }
   4801 
   4802 static void virtual_context_exit(struct intel_context *ce)
   4803 {
   4804 	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
   4805 	unsigned int n;
   4806 
   4807 	intel_timeline_exit(ce->timeline);
   4808 
   4809 	for (n = 0; n < ve->num_siblings; n++)
   4810 		intel_engine_pm_put(ve->siblings[n]);
   4811 }
   4812 
   4813 static const struct intel_context_ops virtual_context_ops = {
   4814 	.alloc = virtual_context_alloc,
   4815 
   4816 	.pin = virtual_context_pin,
   4817 	.unpin = execlists_context_unpin,
   4818 
   4819 	.enter = virtual_context_enter,
   4820 	.exit = virtual_context_exit,
   4821 
   4822 	.destroy = virtual_context_destroy,
   4823 };
   4824 
   4825 static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
   4826 {
   4827 	struct i915_request *rq;
   4828 	intel_engine_mask_t mask;
   4829 
   4830 	rq = READ_ONCE(ve->request);
   4831 	if (!rq)
   4832 		return 0;
   4833 
   4834 	/* The rq is ready for submission; rq->execution_mask is now stable. */
   4835 	mask = rq->execution_mask;
   4836 	if (unlikely(!mask)) {
   4837 		/* Invalid selection, submit to a random engine in error */
   4838 		i915_request_skip(rq, -ENODEV);
   4839 		mask = ve->siblings[0]->mask;
   4840 	}
   4841 
   4842 	ENGINE_TRACE(&ve->base, "rq=%llx:%lld, mask=%x, prio=%d\n",
   4843 		     rq->fence.context, rq->fence.seqno,
   4844 		     mask, ve->base.execlists.queue_priority_hint);
   4845 
   4846 	return mask;
   4847 }
   4848 
   4849 static void virtual_submission_tasklet(unsigned long data)
   4850 {
   4851 	struct virtual_engine * const ve = (struct virtual_engine *)data;
   4852 	const int prio = ve->base.execlists.queue_priority_hint;
   4853 	intel_engine_mask_t mask;
   4854 	unsigned int n;
   4855 
   4856 	rcu_read_lock();
   4857 	mask = virtual_submission_mask(ve);
   4858 	rcu_read_unlock();
   4859 	if (unlikely(!mask))
   4860 		return;
   4861 
   4862 #ifdef __NetBSD__
   4863 	int s = splsoftserial(); /* block tasklets=softints */
   4864 #else
   4865 	local_irq_disable();
   4866 #endif
   4867 	for (n = 0; READ_ONCE(ve->request) && n < ve->num_siblings; n++) {
   4868 		struct intel_engine_cs *sibling = ve->siblings[n];
   4869 		struct ve_node * const node = &ve->nodes[sibling->id];
   4870 		struct rb_node **parent, *rb;
   4871 		bool first;
   4872 
   4873 		if (unlikely(!(mask & sibling->mask))) {
   4874 			if (node->inserted) {
   4875 				spin_lock(&sibling->active.lock);
   4876 				rb_erase_cached(&node->rb,
   4877 						&sibling->execlists.virtual);
   4878 				node->inserted = false;
   4879 				spin_unlock(&sibling->active.lock);
   4880 			}
   4881 			continue;
   4882 		}
   4883 
   4884 		spin_lock(&sibling->active.lock);
   4885 
   4886 		if (node->inserted) {
   4887 			/*
   4888 			 * Cheat and avoid rebalancing the tree if we can
   4889 			 * reuse this node in situ.
   4890 			 */
   4891 			first = rb_first_cached(&sibling->execlists.virtual) ==
   4892 				&node->rb;
   4893 			if (prio == node->prio || (prio > node->prio && first))
   4894 				goto submit_engine;
   4895 
   4896 			rb_erase_cached(&node->rb, &sibling->execlists.virtual);
   4897 			node->inserted = false;
   4898 		}
   4899 
   4900 #ifdef __NetBSD__
   4901 		__USE(parent);
   4902 		__USE(rb);
   4903 		struct ve_node *collision __diagused;
   4904 		/* XXX kludge to get insertion order */
   4905 		node->order = ve->order++;
   4906 		collision = rb_tree_insert_node(
   4907 			&sibling->execlists.virtual.rb_root.rbr_tree,
   4908 			node);
   4909 		KASSERT(collision == node);
   4910 		node->inserted = true;
   4911 		first = rb_tree_find_node_geq(
   4912 			&sibling->execlists.virtual.rb_root.rbr_tree,
   4913 			&node->prio) == node;
   4914 #else
   4915 		rb = NULL;
   4916 		first = true;
   4917 		parent = &sibling->execlists.virtual.rb_root.rb_node;
   4918 		while (*parent) {
   4919 			struct ve_node *other;
   4920 
   4921 			rb = *parent;
   4922 			other = rb_entry(rb, typeof(*other), rb);
   4923 			if (prio > other->prio) {
   4924 				parent = &rb->rb_left;
   4925 			} else {
   4926 				parent = &rb->rb_right;
   4927 				first = false;
   4928 			}
   4929 		}
   4930 
   4931 		rb_link_node(&node->rb, rb, parent);
   4932 		rb_insert_color_cached(&node->rb,
   4933 				       &sibling->execlists.virtual,
   4934 				       first);
   4935 #endif
   4936 
   4937 submit_engine:
   4938 		GEM_BUG_ON(!node->inserted);
   4939 		node->prio = prio;
   4940 		if (first && prio > sibling->execlists.queue_priority_hint) {
   4941 			sibling->execlists.queue_priority_hint = prio;
   4942 			tasklet_hi_schedule(&sibling->execlists.tasklet);
   4943 		}
   4944 
   4945 		spin_unlock(&sibling->active.lock);
   4946 	}
   4947 #ifdef __NetBSD__
   4948 	splx(s);
   4949 #else
   4950 	local_irq_enable();
   4951 #endif
   4952 }
   4953 
   4954 static void virtual_submit_request(struct i915_request *rq)
   4955 {
   4956 	struct virtual_engine *ve = to_virtual_engine(rq->engine);
   4957 	struct i915_request *old;
   4958 	unsigned long flags;
   4959 
   4960 	ENGINE_TRACE(&ve->base, "rq=%llx:%lld\n",
   4961 		     rq->fence.context,
   4962 		     rq->fence.seqno);
   4963 
   4964 	GEM_BUG_ON(ve->base.submit_request != virtual_submit_request);
   4965 
   4966 	spin_lock_irqsave(&ve->base.active.lock, flags);
   4967 
   4968 	old = ve->request;
   4969 	if (old) { /* background completion event from preempt-to-busy */
   4970 		GEM_BUG_ON(!i915_request_completed(old));
   4971 		__i915_request_submit(old);
   4972 		i915_request_put(old);
   4973 	}
   4974 
   4975 	if (i915_request_completed(rq)) {
   4976 		__i915_request_submit(rq);
   4977 
   4978 		ve->base.execlists.queue_priority_hint = INT_MIN;
   4979 		ve->request = NULL;
   4980 	} else {
   4981 		ve->base.execlists.queue_priority_hint = rq_prio(rq);
   4982 		ve->request = i915_request_get(rq);
   4983 
   4984 		GEM_BUG_ON(!list_empty(virtual_queue(ve)));
   4985 		list_move_tail(&rq->sched.link, virtual_queue(ve));
   4986 
   4987 		tasklet_schedule(&ve->base.execlists.tasklet);
   4988 	}
   4989 
   4990 	spin_unlock_irqrestore(&ve->base.active.lock, flags);
   4991 }
   4992 
   4993 static struct ve_bond *
   4994 virtual_find_bond(struct virtual_engine *ve,
   4995 		  const struct intel_engine_cs *master)
   4996 {
   4997 	int i;
   4998 
   4999 	for (i = 0; i < ve->num_bonds; i++) {
   5000 		if (ve->bonds[i].master == master)
   5001 			return &ve->bonds[i];
   5002 	}
   5003 
   5004 	return NULL;
   5005 }
   5006 
   5007 static void
   5008 virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
   5009 {
   5010 	struct virtual_engine *ve = to_virtual_engine(rq->engine);
   5011 	intel_engine_mask_t allowed, exec;
   5012 	struct ve_bond *bond;
   5013 
   5014 	allowed = ~to_request(signal)->engine->mask;
   5015 
   5016 	bond = virtual_find_bond(ve, to_request(signal)->engine);
   5017 	if (bond)
   5018 		allowed &= bond->sibling_mask;
   5019 
   5020 	/* Restrict the bonded request to run on only the available engines */
   5021 	exec = READ_ONCE(rq->execution_mask);
   5022 	while (!try_cmpxchg(&rq->execution_mask, &exec, exec & allowed))
   5023 		;
   5024 
   5025 	/* Prevent the master from being re-run on the bonded engines */
   5026 	to_request(signal)->execution_mask &= ~allowed;
   5027 }
   5028 
   5029 struct intel_context *
   5030 intel_execlists_create_virtual(struct intel_engine_cs **siblings,
   5031 			       unsigned int count)
   5032 {
   5033 	struct virtual_engine *ve;
   5034 	unsigned int n;
   5035 	int err;
   5036 
   5037 	if (count == 0)
   5038 		return ERR_PTR(-EINVAL);
   5039 
   5040 	if (count == 1)
   5041 		return intel_context_create(siblings[0]);
   5042 
   5043 	ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL);
   5044 	if (!ve)
   5045 		return ERR_PTR(-ENOMEM);
   5046 
   5047 	ve->base.i915 = siblings[0]->i915;
   5048 	ve->base.gt = siblings[0]->gt;
   5049 	ve->base.uncore = siblings[0]->uncore;
   5050 	ve->base.id = -1;
   5051 
   5052 	ve->base.class = OTHER_CLASS;
   5053 	ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
   5054 	ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
   5055 	ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
   5056 
   5057 	/*
   5058 	 * The decision on whether to submit a request using semaphores
   5059 	 * depends on the saturated state of the engine. We only compute
   5060 	 * this during HW submission of the request, and we need for this
   5061 	 * state to be globally applied to all requests being submitted
   5062 	 * to this engine. Virtual engines encompass more than one physical
   5063 	 * engine and so we cannot accurately tell in advance if one of those
   5064 	 * engines is already saturated and so cannot afford to use a semaphore
   5065 	 * and be pessimized in priority for doing so -- if we are the only
   5066 	 * context using semaphores after all other clients have stopped, we
   5067 	 * will be starved on the saturated system. Such a global switch for
   5068 	 * semaphores is less than ideal, but alas is the current compromise.
   5069 	 */
   5070 	ve->base.saturated = ALL_ENGINES;
   5071 
   5072 	snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
   5073 
   5074 	intel_engine_init_active(&ve->base, ENGINE_VIRTUAL);
   5075 	intel_engine_init_breadcrumbs(&ve->base);
   5076 	intel_engine_init_execlists(&ve->base);
   5077 
   5078 	ve->base.cops = &virtual_context_ops;
   5079 	ve->base.request_alloc = execlists_request_alloc;
   5080 
   5081 	ve->base.schedule = i915_schedule;
   5082 	ve->base.submit_request = virtual_submit_request;
   5083 	ve->base.bond_execute = virtual_bond_execute;
   5084 
   5085 	INIT_LIST_HEAD(virtual_queue(ve));
   5086 	ve->base.execlists.queue_priority_hint = INT_MIN;
   5087 	tasklet_init(&ve->base.execlists.tasklet,
   5088 		     virtual_submission_tasklet,
   5089 		     (unsigned long)ve);
   5090 
   5091 	intel_context_init(&ve->context, &ve->base);
   5092 
   5093 	for (n = 0; n < count; n++) {
   5094 		struct intel_engine_cs *sibling = siblings[n];
   5095 
   5096 		GEM_BUG_ON(!is_power_of_2(sibling->mask));
   5097 		if (sibling->mask & ve->base.mask) {
   5098 			DRM_DEBUG("duplicate %s entry in load balancer\n",
   5099 				  sibling->name);
   5100 			err = -EINVAL;
   5101 			goto err_put;
   5102 		}
   5103 
   5104 		/*
   5105 		 * The virtual engine implementation is tightly coupled to
   5106 		 * the execlists backend -- we push out request directly
   5107 		 * into a tree inside each physical engine. We could support
   5108 		 * layering if we handle cloning of the requests and
   5109 		 * submitting a copy into each backend.
   5110 		 */
   5111 		if (sibling->execlists.tasklet.func !=
   5112 		    execlists_submission_tasklet) {
   5113 			err = -ENODEV;
   5114 			goto err_put;
   5115 		}
   5116 
   5117 		GEM_BUG_ON(!ve->nodes[sibling->id].inserted);
   5118 		ve->nodes[sibling->id].inserted = false;
   5119 
   5120 		ve->siblings[ve->num_siblings++] = sibling;
   5121 		ve->base.mask |= sibling->mask;
   5122 
   5123 		/*
   5124 		 * All physical engines must be compatible for their emission
   5125 		 * functions (as we build the instructions during request
   5126 		 * construction and do not alter them before submission
   5127 		 * on the physical engine). We use the engine class as a guide
   5128 		 * here, although that could be refined.
   5129 		 */
   5130 		if (ve->base.class != OTHER_CLASS) {
   5131 			if (ve->base.class != sibling->class) {
   5132 				DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",
   5133 					  sibling->class, ve->base.class);
   5134 				err = -EINVAL;
   5135 				goto err_put;
   5136 			}
   5137 			continue;
   5138 		}
   5139 
   5140 		ve->base.class = sibling->class;
   5141 		ve->base.uabi_class = sibling->uabi_class;
   5142 		snprintf(ve->base.name, sizeof(ve->base.name),
   5143 			 "v%dx%d", ve->base.class, count);
   5144 		ve->base.context_size = sibling->context_size;
   5145 
   5146 		ve->base.emit_bb_start = sibling->emit_bb_start;
   5147 		ve->base.emit_flush = sibling->emit_flush;
   5148 		ve->base.emit_init_breadcrumb = sibling->emit_init_breadcrumb;
   5149 		ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb;
   5150 		ve->base.emit_fini_breadcrumb_dw =
   5151 			sibling->emit_fini_breadcrumb_dw;
   5152 
   5153 		ve->base.flags = sibling->flags;
   5154 	}
   5155 
   5156 	ve->base.flags |= I915_ENGINE_IS_VIRTUAL;
   5157 
   5158 	return &ve->context;
   5159 
   5160 err_put:
   5161 	intel_context_put(&ve->context);
   5162 	return ERR_PTR(err);
   5163 }
   5164 
   5165 struct intel_context *
   5166 intel_execlists_clone_virtual(struct intel_engine_cs *src)
   5167 {
   5168 	struct virtual_engine *se = to_virtual_engine(src);
   5169 	struct intel_context *dst;
   5170 
   5171 	dst = intel_execlists_create_virtual(se->siblings,
   5172 					     se->num_siblings);
   5173 	if (IS_ERR(dst))
   5174 		return dst;
   5175 
   5176 	if (se->num_bonds) {
   5177 		struct virtual_engine *de = to_virtual_engine(dst->engine);
   5178 
   5179 		de->bonds = kmemdup(se->bonds,
   5180 				    sizeof(*se->bonds) * se->num_bonds,
   5181 				    GFP_KERNEL);
   5182 		if (!de->bonds) {
   5183 			intel_context_put(dst);
   5184 			return ERR_PTR(-ENOMEM);
   5185 		}
   5186 
   5187 		de->num_bonds = se->num_bonds;
   5188 	}
   5189 
   5190 	return dst;
   5191 }
   5192 
   5193 int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
   5194 				     const struct intel_engine_cs *master,
   5195 				     const struct intel_engine_cs *sibling)
   5196 {
   5197 	struct virtual_engine *ve = to_virtual_engine(engine);
   5198 	struct ve_bond *bond;
   5199 	int n;
   5200 
   5201 	/* Sanity check the sibling is part of the virtual engine */
   5202 	for (n = 0; n < ve->num_siblings; n++)
   5203 		if (sibling == ve->siblings[n])
   5204 			break;
   5205 	if (n == ve->num_siblings)
   5206 		return -EINVAL;
   5207 
   5208 	bond = virtual_find_bond(ve, master);
   5209 	if (bond) {
   5210 		bond->sibling_mask |= sibling->mask;
   5211 		return 0;
   5212 	}
   5213 
   5214 	bond = krealloc(ve->bonds,
   5215 			sizeof(*bond) * (ve->num_bonds + 1),
   5216 			GFP_KERNEL);
   5217 	if (!bond)
   5218 		return -ENOMEM;
   5219 
   5220 	bond[ve->num_bonds].master = master;
   5221 	bond[ve->num_bonds].sibling_mask = sibling->mask;
   5222 
   5223 	ve->bonds = bond;
   5224 	ve->num_bonds++;
   5225 
   5226 	return 0;
   5227 }
   5228 
   5229 struct intel_engine_cs *
   5230 intel_virtual_engine_get_sibling(struct intel_engine_cs *engine,
   5231 				 unsigned int sibling)
   5232 {
   5233 	struct virtual_engine *ve = to_virtual_engine(engine);
   5234 
   5235 	if (sibling >= ve->num_siblings)
   5236 		return NULL;
   5237 
   5238 	return ve->siblings[sibling];
   5239 }
   5240 
   5241 void intel_execlists_show_requests(struct intel_engine_cs *engine,
   5242 				   struct drm_printer *m,
   5243 				   void (*show_request)(struct drm_printer *m,
   5244 							struct i915_request *rq,
   5245 							const char *prefix),
   5246 				   unsigned int max)
   5247 {
   5248 	const struct intel_engine_execlists *execlists = &engine->execlists;
   5249 	struct i915_request *rq, *last;
   5250 	unsigned long flags;
   5251 	unsigned int count;
   5252 	struct rb_node *rb;
   5253 
   5254 	spin_lock_irqsave(&engine->active.lock, flags);
   5255 
   5256 	last = NULL;
   5257 	count = 0;
   5258 	list_for_each_entry(rq, &engine->active.requests, sched.link) {
   5259 		if (count++ < max - 1)
   5260 			show_request(m, rq, "\t\tE ");
   5261 		else
   5262 			last = rq;
   5263 	}
   5264 	if (last) {
   5265 		if (count > max) {
   5266 			drm_printf(m,
   5267 				   "\t\t...skipping %d executing requests...\n",
   5268 				   count - max);
   5269 		}
   5270 		show_request(m, last, "\t\tE ");
   5271 	}
   5272 
   5273 	last = NULL;
   5274 	count = 0;
   5275 	if (execlists->queue_priority_hint != INT_MIN)
   5276 		drm_printf(m, "\t\tQueue priority hint: %d\n",
   5277 			   execlists->queue_priority_hint);
   5278 	for (rb = rb_first_cached(&execlists->queue);
   5279 	     rb;
   5280 	     rb = rb_next2(&execlists->queue.rb_root, rb)) {
   5281 		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
   5282 		int i;
   5283 
   5284 		priolist_for_each_request(rq, p, i) {
   5285 			if (count++ < max - 1)
   5286 				show_request(m, rq, "\t\tQ ");
   5287 			else
   5288 				last = rq;
   5289 		}
   5290 	}
   5291 	if (last) {
   5292 		if (count > max) {
   5293 			drm_printf(m,
   5294 				   "\t\t...skipping %d queued requests...\n",
   5295 				   count - max);
   5296 		}
   5297 		show_request(m, last, "\t\tQ ");
   5298 	}
   5299 
   5300 	last = NULL;
   5301 	count = 0;
   5302 	for (rb = rb_first_cached(&execlists->virtual);
   5303 	     rb;
   5304 	     rb = rb_next2(&execlists->virtual.rb_root, rb)) {
   5305 		struct virtual_engine *ve =
   5306 			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
   5307 		struct i915_request *rq = READ_ONCE(ve->request);
   5308 
   5309 		if (rq) {
   5310 			if (count++ < max - 1)
   5311 				show_request(m, rq, "\t\tV ");
   5312 			else
   5313 				last = rq;
   5314 		}
   5315 	}
   5316 	if (last) {
   5317 		if (count > max) {
   5318 			drm_printf(m,
   5319 				   "\t\t...skipping %d virtual requests...\n",
   5320 				   count - max);
   5321 		}
   5322 		show_request(m, last, "\t\tV ");
   5323 	}
   5324 
   5325 	spin_unlock_irqrestore(&engine->active.lock, flags);
   5326 }
   5327 
   5328 void intel_lr_context_reset(struct intel_engine_cs *engine,
   5329 			    struct intel_context *ce,
   5330 			    u32 head,
   5331 			    bool scrub)
   5332 {
   5333 	GEM_BUG_ON(!intel_context_is_pinned(ce));
   5334 
   5335 	/*
   5336 	 * We want a simple context + ring to execute the breadcrumb update.
   5337 	 * We cannot rely on the context being intact across the GPU hang,
   5338 	 * so clear it and rebuild just what we need for the breadcrumb.
   5339 	 * All pending requests for this context will be zapped, and any
   5340 	 * future request will be after userspace has had the opportunity
   5341 	 * to recreate its own state.
   5342 	 */
   5343 	if (scrub)
   5344 		restore_default_state(ce, engine);
   5345 
   5346 	/* Rerun the request; its payload has been neutered (if guilty). */
   5347 	__execlists_update_reg_state(ce, engine, head);
   5348 }
   5349 
   5350 bool
   5351 intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine)
   5352 {
   5353 	return engine->set_default_submission ==
   5354 	       intel_execlists_set_default_submission;
   5355 }
   5356 
   5357 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
   5358 #include "selftest_lrc.c"
   5359 #endif
   5360